1use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole, Snippet};
4use crate::sources::provenance::{LOCAL_SOURCE_ID, Source, SourceKind};
5use anyhow::{Context, Result, anyhow, bail};
6use frankensqlite::{
7 Connection as FrankenConnection, Row as FrankenRow, SqliteValue,
8 compat::{
9 ConnectionExt as FrankenConnectionExt, OpenFlags as FrankenOpenFlags,
10 OptionalExtension as FrankenOptionalExtension, ParamValue, RowExt as FrankenRowExt,
11 Transaction as FrankenTransaction, TransactionExt as FrankenTransactionExt,
12 open_with_flags as open_franken_with_flags, param_slice_to_values, params_from_iter,
13 },
14 migrate::MigrationRunner,
15};
16use serde::{Deserialize, Serialize};
17use smallvec::SmallVec;
18use std::borrow::Cow;
19use std::collections::{HashMap, HashSet};
20use std::fs;
21use std::io::{BufRead, BufReader, Write};
22use std::process::{Command, Stdio};
23use std::sync::{
24 Arc,
25 atomic::{AtomicBool, AtomicI8, AtomicI64, AtomicU64, AtomicUsize, Ordering},
26};
27
28macro_rules! fparams {
30 () => {
31 &[] as &[ParamValue]
32 };
33 ($($val:expr),+ $(,)?) => {
34 &[$(ParamValue::from($val)),+] as &[ParamValue]
35 };
36}
37use std::path::{Path, PathBuf};
38use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
39use thiserror::Error;
40use tracing::info;
41
42const DOCTOR_MUTATION_DB_OPEN_LOCK_TIMEOUT: Duration = Duration::from_secs(30);
43const DOCTOR_MUTATION_LOCK_MAX_METADATA_READ: u64 = 64 * 1024;
44
45#[derive(Debug, Error)]
54pub enum LazyDbError {
55 #[error("Database not found at {0}")]
56 NotFound(PathBuf),
57 #[error("Failed to open FrankenSQLite database at {path}: {source}")]
58 FrankenOpenFailed {
59 path: PathBuf,
60 source: frankensqlite::FrankenError,
61 },
62}
63
64pub struct SendFrankenConnection(FrankenConnection, i64, u64);
75
76unsafe impl Send for SendFrankenConnection {}
79
80impl SendFrankenConnection {
81 pub(crate) fn new(conn: FrankenConnection) -> Self {
82 Self(
83 conn,
84 UNSET_INDEX_WRITER_CHECKPOINT_PAGES,
85 UNSET_INDEX_WRITER_BUSY_TIMEOUT_MS,
86 )
87 }
88
89 pub(crate) fn new_with_index_writer_state(
90 conn: FrankenConnection,
91 checkpoint_pages: i64,
92 busy_timeout_ms: u64,
93 ) -> Self {
94 Self(conn, checkpoint_pages, busy_timeout_ms)
95 }
96
97 pub(crate) fn into_parts(self) -> (FrankenConnection, i64, u64) {
98 (self.0, self.1, self.2)
99 }
100}
101
102impl std::ops::Deref for SendFrankenConnection {
103 type Target = FrankenConnection;
104 fn deref(&self) -> &FrankenConnection {
105 &self.0
106 }
107}
108
109pub struct LazyFrankenDb {
115 path: PathBuf,
116 conn: parking_lot::Mutex<Option<SendFrankenConnection>>,
117}
118
119pub struct LazyFrankenDbGuard<'a>(parking_lot::MutexGuard<'a, Option<SendFrankenConnection>>);
121
122impl std::fmt::Debug for LazyFrankenDbGuard<'_> {
123 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
124 f.debug_tuple("LazyFrankenDbGuard")
125 .field(&self.0.is_some())
126 .finish()
127 }
128}
129
130impl std::ops::Deref for LazyFrankenDbGuard<'_> {
131 type Target = FrankenConnection;
132 fn deref(&self) -> &FrankenConnection {
133 self.0
134 .as_ref()
135 .expect("LazyFrankenDb connection must be initialized before access")
136 }
137}
138
139impl LazyFrankenDb {
140 pub fn new(path: PathBuf) -> Self {
142 Self {
143 path,
144 conn: parking_lot::Mutex::new(None),
145 }
146 }
147
148 pub fn from_overrides(data_dir: &Option<PathBuf>, db_override: Option<PathBuf>) -> Self {
152 let data_dir = data_dir.clone().unwrap_or_else(crate::default_data_dir);
153 let path = db_override.unwrap_or_else(|| data_dir.join("agent_search.db"));
154 Self::new(path)
155 }
156
157 pub fn get(&self, reason: &str) -> std::result::Result<LazyFrankenDbGuard<'_>, LazyDbError> {
162 let mut guard = self.conn.lock();
163 if guard.is_none() {
164 if !self.path.exists() {
165 return Err(LazyDbError::NotFound(self.path.clone()));
166 }
167 let start = Instant::now();
168 let _doctor_guard = acquire_doctor_mutation_db_open_guard(
169 &self.path,
170 DOCTOR_MUTATION_DB_OPEN_LOCK_TIMEOUT,
171 )
172 .map_err(|err| LazyDbError::FrankenOpenFailed {
173 path: self.path.clone(),
174 source: frankensqlite::FrankenError::Internal(err.to_string()),
175 })?;
176 let conn =
177 FrankenConnection::open(self.path.to_string_lossy().into_owned()).map_err(|e| {
178 LazyDbError::FrankenOpenFailed {
179 path: self.path.clone(),
180 source: e,
181 }
182 })?;
183 let elapsed_ms = start.elapsed().as_millis();
184 info!(
185 path = %self.path.display(),
186 elapsed_ms = elapsed_ms,
187 reason = reason,
188 "lazily opened FrankenSQLite database"
189 );
190 *guard = Some(SendFrankenConnection::new(conn));
191 }
192 Ok(LazyFrankenDbGuard(guard))
193 }
194
195 pub fn get_with_timeout(
201 &self,
202 reason: &str,
203 timeout: Duration,
204 ) -> std::result::Result<LazyFrankenDbGuard<'_>, LazyDbError> {
205 let mut guard = self.conn.lock();
206 if guard.is_none() {
207 if !self.path.exists() {
208 return Err(LazyDbError::NotFound(self.path.clone()));
209 }
210 let start = Instant::now();
211 let path_owned = self.path.to_string_lossy().into_owned();
212 let path_for_guard = self.path.clone();
213 let (tx, rx) = std::sync::mpsc::channel();
214 std::thread::spawn(move || {
215 let _doctor_guard =
216 match acquire_doctor_mutation_db_open_guard(&path_for_guard, timeout) {
217 Ok(guard) => guard,
218 Err(err) => {
219 let _ = tx
220 .send(Err(frankensqlite::FrankenError::Internal(err.to_string())));
221 return;
222 }
223 };
224 let _ =
225 tx.send(FrankenConnection::open(path_owned).map(SendFrankenConnection::new));
226 });
227 let conn = rx
228 .recv_timeout(timeout)
229 .map_err(|_| LazyDbError::FrankenOpenFailed {
230 path: self.path.clone(),
231 source: frankensqlite::FrankenError::Internal(format!(
232 "database open timed out after {}s (possible corruption or lock contention)",
233 timeout.as_secs()
234 )),
235 })?
236 .map_err(|e| LazyDbError::FrankenOpenFailed {
237 path: self.path.clone(),
238 source: e,
239 })?;
240 let elapsed_ms = start.elapsed().as_millis();
241 info!(
242 path = %self.path.display(),
243 elapsed_ms = elapsed_ms,
244 reason = reason,
245 "lazily opened FrankenSQLite database (with timeout)"
246 );
247 *guard = Some(conn);
248 }
249 Ok(LazyFrankenDbGuard(guard))
250 }
251
252 pub fn path(&self) -> &Path {
254 &self.path
255 }
256
257 pub fn is_open(&self) -> bool {
259 self.conn.lock().is_some()
260 }
261}
262
263static FRANKEN_RETRY_JITTER_STATE: AtomicU64 = AtomicU64::new(0x9e37_79b9_7f4a_7c15);
264static DOCTOR_MUTATION_DB_OPEN_BYPASS_DEPTH: AtomicUsize = AtomicUsize::new(0);
265static MESSAGE_LOOKUP_TRACE_ENABLED: AtomicBool = AtomicBool::new(false);
266static MESSAGE_LOOKUP_EXACT_IDX_PROBES: AtomicU64 = AtomicU64::new(0);
267static MESSAGE_LOOKUP_BOUNDED_QUERIES: AtomicU64 = AtomicU64::new(0);
268static MESSAGE_LOOKUP_FULL_SCAN_QUERIES: AtomicU64 = AtomicU64::new(0);
269static MESSAGE_LOOKUP_ROWS_MATERIALIZED: AtomicU64 = AtomicU64::new(0);
270
271#[derive(Debug, Clone, Copy, Default, Serialize)]
272pub(crate) struct MessageLookupTraceCounters {
273 pub exact_idx_probes: u64,
274 pub bounded_lookup_queries: u64,
275 pub full_scan_queries: u64,
276 pub rows_materialized: u64,
277}
278
279impl MessageLookupTraceCounters {
280 pub(crate) fn saturating_sub(self, before: Self) -> Self {
281 Self {
282 exact_idx_probes: self
283 .exact_idx_probes
284 .saturating_sub(before.exact_idx_probes),
285 bounded_lookup_queries: self
286 .bounded_lookup_queries
287 .saturating_sub(before.bounded_lookup_queries),
288 full_scan_queries: self
289 .full_scan_queries
290 .saturating_sub(before.full_scan_queries),
291 rows_materialized: self
292 .rows_materialized
293 .saturating_sub(before.rows_materialized),
294 }
295 }
296
297 pub(crate) fn lookups_against_global(self) -> u64 {
298 self.exact_idx_probes.saturating_add(self.rows_materialized)
299 }
300}
301
302pub(crate) fn set_message_lookup_trace_enabled(enabled: bool) -> bool {
303 MESSAGE_LOOKUP_TRACE_ENABLED.swap(enabled, Ordering::Relaxed)
304}
305
306pub(crate) fn message_lookup_trace_snapshot() -> MessageLookupTraceCounters {
307 MessageLookupTraceCounters {
308 exact_idx_probes: MESSAGE_LOOKUP_EXACT_IDX_PROBES.load(Ordering::Relaxed),
309 bounded_lookup_queries: MESSAGE_LOOKUP_BOUNDED_QUERIES.load(Ordering::Relaxed),
310 full_scan_queries: MESSAGE_LOOKUP_FULL_SCAN_QUERIES.load(Ordering::Relaxed),
311 rows_materialized: MESSAGE_LOOKUP_ROWS_MATERIALIZED.load(Ordering::Relaxed),
312 }
313}
314
315fn record_message_lookup_exact_idx_probe() {
316 if MESSAGE_LOOKUP_TRACE_ENABLED.load(Ordering::Relaxed) {
317 MESSAGE_LOOKUP_EXACT_IDX_PROBES.fetch_add(1, Ordering::Relaxed);
318 }
319}
320
321fn record_message_lookup_bounded_queries(query_count: u64, rows: usize) {
322 if MESSAGE_LOOKUP_TRACE_ENABLED.load(Ordering::Relaxed) {
323 MESSAGE_LOOKUP_BOUNDED_QUERIES.fetch_add(query_count, Ordering::Relaxed);
324 MESSAGE_LOOKUP_ROWS_MATERIALIZED.fetch_add(rows as u64, Ordering::Relaxed);
325 }
326}
327
328fn record_message_lookup_full_scan_query(rows: usize) {
329 if MESSAGE_LOOKUP_TRACE_ENABLED.load(Ordering::Relaxed) {
330 MESSAGE_LOOKUP_FULL_SCAN_QUERIES.fetch_add(1, Ordering::Relaxed);
331 MESSAGE_LOOKUP_ROWS_MATERIALIZED.fetch_add(rows as u64, Ordering::Relaxed);
332 }
333}
334
335pub(crate) struct DoctorMutationDbOpenBypassGuard;
336
337impl Drop for DoctorMutationDbOpenBypassGuard {
338 fn drop(&mut self) {
339 DOCTOR_MUTATION_DB_OPEN_BYPASS_DEPTH.fetch_sub(1, Ordering::SeqCst);
340 }
341}
342
343pub(crate) fn enter_doctor_mutation_db_open_bypass() -> DoctorMutationDbOpenBypassGuard {
344 DOCTOR_MUTATION_DB_OPEN_BYPASS_DEPTH.fetch_add(1, Ordering::SeqCst);
345 DoctorMutationDbOpenBypassGuard
346}
347
348fn doctor_mutation_db_open_bypass_active() -> bool {
349 DOCTOR_MUTATION_DB_OPEN_BYPASS_DEPTH.load(Ordering::SeqCst) > 0
350}
351
352fn next_franken_retry_jitter_ms(max_inclusive: u64) -> u64 {
353 let mut value = FRANKEN_RETRY_JITTER_STATE.fetch_add(0x9e37_79b9_7f4a_7c15, Ordering::Relaxed);
354 value ^= value >> 30;
355 value = value.wrapping_mul(0xbf58_476d_1ce4_e5b9);
356 value ^= value >> 27;
357 value = value.wrapping_mul(0x94d0_49bb_1331_11eb);
358 value ^= value >> 31;
359 value % max_inclusive.saturating_add(1)
360}
361
362pub(crate) fn sleep_with_franken_retry_backoff(
365 backoff: &mut Duration,
366 remaining: Duration,
367 max_backoff: Duration,
368) {
369 let capped = (*backoff).min(remaining);
370 let extra_budget = remaining.saturating_sub(capped).min(capped);
371 let extra_ms = extra_budget.as_millis().min(u128::from(u64::MAX)) as u64;
372 let sleep_for = if extra_ms == 0 {
373 capped
374 } else {
375 capped
376 .saturating_add(Duration::from_millis(next_franken_retry_jitter_ms(
377 extra_ms,
378 )))
379 .min(remaining)
380 };
381 std::thread::sleep(sleep_for);
382 *backoff = backoff.saturating_mul(2).min(max_backoff);
383}
384
385struct DoctorMutationDbOpenGuard(Option<fs::File>);
386
387impl Drop for DoctorMutationDbOpenGuard {
388 fn drop(&mut self) {
389 if let Some(file) = self.0.as_ref() {
390 let _ = fs2::FileExt::unlock(file);
391 }
392 }
393}
394
395fn doctor_mutation_lock_path_for_db_open(db_path: &Path) -> Option<PathBuf> {
396 if db_path.file_name().and_then(|name| name.to_str()) != Some("agent_search.db") {
397 return None;
398 }
399
400 Some(
401 db_path
402 .parent()?
403 .join("doctor")
404 .join("locks")
405 .join("doctor-repair.lock"),
406 )
407}
408
409fn doctor_lock_metadata_pid_is_current_process(raw: &str) -> bool {
410 raw.lines().any(|line| {
411 let Some((key, value)) = line.split_once('=') else {
412 return false;
413 };
414 key.trim() == "pid"
415 && value
416 .trim()
417 .parse::<u32>()
418 .is_ok_and(|pid| pid == std::process::id())
419 })
420}
421
422fn doctor_lock_file_pid_is_current_process(file: &fs::File) -> bool {
423 use std::io::Read as _;
424
425 let Ok(mut file) = file.try_clone() else {
426 return false;
427 };
428 let mut raw = String::new();
429 let _ = std::io::Read::take(&mut file, DOCTOR_MUTATION_LOCK_MAX_METADATA_READ)
430 .read_to_string(&mut raw);
431 doctor_lock_metadata_pid_is_current_process(&raw)
432}
433
434fn acquire_doctor_mutation_db_open_guard(
435 db_path: &Path,
436 timeout: Duration,
437) -> Result<DoctorMutationDbOpenGuard> {
438 let Some(lock_path) = doctor_mutation_lock_path_for_db_open(db_path) else {
439 return Ok(DoctorMutationDbOpenGuard(None));
440 };
441 if doctor_mutation_db_open_bypass_active() {
442 return Ok(DoctorMutationDbOpenGuard(None));
443 }
444
445 if let Some(parent) = lock_path.parent() {
446 fs::create_dir_all(parent).with_context(|| {
447 format!(
448 "creating doctor mutation lock directory {} before opening {}",
449 parent.display(),
450 db_path.display()
451 )
452 })?;
453 }
454
455 let deadline = Instant::now() + timeout;
456 let mut backoff = Duration::from_millis(4);
457 loop {
458 let file = fs::OpenOptions::new()
459 .create(true)
460 .truncate(false)
461 .read(true)
462 .write(true)
463 .open(&lock_path)
464 .with_context(|| {
465 format!(
466 "opening doctor mutation lock {} before opening {}",
467 lock_path.display(),
468 db_path.display()
469 )
470 })?;
471
472 if doctor_lock_file_pid_is_current_process(&file) {
473 return Ok(DoctorMutationDbOpenGuard(None));
474 }
475
476 match fs2::FileExt::try_lock_shared(&file) {
477 Ok(()) => return Ok(DoctorMutationDbOpenGuard(Some(file))),
478 Err(err) if err.kind() == std::io::ErrorKind::WouldBlock => {
479 let now = Instant::now();
480 if now >= deadline {
481 return Err(anyhow!(
482 "doctor mutation lock {} is active while opening {}; refusing to open during repair after waiting {}ms",
483 lock_path.display(),
484 db_path.display(),
485 timeout.as_millis()
486 ));
487 }
488 let remaining = deadline.saturating_duration_since(now);
489 sleep_with_franken_retry_backoff(
490 &mut backoff,
491 remaining,
492 Duration::from_millis(128),
493 );
494 }
495 Err(err) => {
496 return Err(anyhow!(
497 "failed to acquire shared doctor mutation lock {} before opening {}: {}",
498 lock_path.display(),
499 db_path.display(),
500 err
501 ));
502 }
503 }
504 }
505}
506
507pub(crate) fn open_franken_storage_with_timeout(
508 path: &Path,
509 timeout: Duration,
510) -> Result<FrankenStorage> {
511 if !path.exists() {
512 return Err(anyhow!("Database not found at {}", path.display()));
513 }
514
515 let deadline = Instant::now() + timeout;
516 let mut backoff = Duration::from_millis(4);
517 loop {
518 match FrankenStorage::open(path) {
519 Ok(storage) => return Ok(storage),
520 Err(err) if retryable_franken_anyhow(&err) => {
521 let now = Instant::now();
522 if now >= deadline {
523 return Err(err);
524 }
525 let remaining = deadline.saturating_duration_since(now);
526 sleep_with_franken_retry_backoff(
527 &mut backoff,
528 remaining,
529 Duration::from_millis(128),
530 );
531 }
532 Err(err) => return Err(err),
533 }
534 }
535}
536
537pub(crate) fn open_current_schema_storage_with_timeout(
538 path: &Path,
539 timeout: Duration,
540) -> Result<Option<FrankenStorage>> {
541 if !path.exists() {
542 return Ok(None);
543 }
544
545 let mut storage = FrankenStorage::new(
546 open_franken_raw_connection_with_timeout(path, timeout)?,
547 path.to_path_buf(),
548 );
549 storage.apply_open_stage_busy_timeout();
550
551 let version = storage
552 .raw()
553 .query("SELECT value FROM meta WHERE key = 'schema_version';")
554 .ok()
555 .and_then(|rows| rows.first().cloned())
556 .and_then(|row| row.get_typed::<String>(0).ok())
557 .and_then(|raw| raw.parse::<i64>().ok());
558
559 if version != Some(CURRENT_SCHEMA_VERSION) {
560 if let Err(close_err) = storage.close_without_checkpoint_in_place() {
561 tracing::debug!(
562 error = %close_err,
563 db_path = %path.display(),
564 "open_current_schema_storage_with_timeout: close_without_checkpoint_in_place failed; falling back to best-effort close"
565 );
566 storage.close_best_effort_in_place();
567 }
568 return Ok(None);
569 }
570
571 transition_from_meta_version(&storage.conn)?;
572 storage.repair_missing_current_schema_objects()?;
573 storage.apply_config()?;
574 Ok(Some(storage))
575}
576
577pub(crate) fn open_franken_readonly_storage_with_timeout(
578 path: &Path,
579 timeout: Duration,
580) -> Result<FrankenStorage> {
581 if !path.exists() {
582 return Err(anyhow!("Database not found at {}", path.display()));
583 }
584
585 let deadline = Instant::now() + timeout;
586 let mut backoff = Duration::from_millis(4);
587 loop {
588 match FrankenStorage::open_readonly(path) {
589 Ok(storage) => return Ok(storage),
590 Err(err) if retryable_franken_anyhow(&err) => {
591 let now = Instant::now();
592 if now >= deadline {
593 return Err(err);
594 }
595 let remaining = deadline.saturating_duration_since(now);
596 sleep_with_franken_retry_backoff(
597 &mut backoff,
598 remaining,
599 Duration::from_millis(128),
600 );
601 }
602 Err(err) => return Err(err),
603 }
604 }
605}
606
607pub(crate) fn open_franken_raw_connection_with_timeout(
608 path: &Path,
609 timeout: Duration,
610) -> Result<FrankenConnection> {
611 if !path.exists() {
612 return Err(anyhow!("Database not found at {}", path.display()));
613 }
614
615 let path_str = path.to_string_lossy().to_string();
616 let deadline = Instant::now() + timeout;
617 let mut backoff = Duration::from_millis(4);
618 loop {
619 let _doctor_guard = acquire_doctor_mutation_db_open_guard(path, timeout)?;
620 match FrankenConnection::open(&path_str)
621 .with_context(|| format!("opening raw frankensqlite db at {}", path.display()))
622 {
623 Ok(conn) => return Ok(conn),
624 Err(err) if retryable_franken_anyhow(&err) => {
625 let now = Instant::now();
626 if now >= deadline {
627 return Err(err);
628 }
629 let remaining = deadline.saturating_duration_since(now);
630 sleep_with_franken_retry_backoff(
631 &mut backoff,
632 remaining,
633 Duration::from_millis(128),
634 );
635 }
636 Err(err) => return Err(err),
637 }
638 }
639}
640
641pub(crate) fn open_franken_raw_readonly_connection_with_timeout(
642 path: &Path,
643 timeout: Duration,
644) -> Result<FrankenConnection> {
645 if !path.exists() {
646 return Err(anyhow!("Database not found at {}", path.display()));
647 }
648
649 let path_str = path.to_string_lossy().to_string();
650 let deadline = Instant::now() + timeout;
651 let mut backoff = Duration::from_millis(4);
652 loop {
653 let _doctor_guard = acquire_doctor_mutation_db_open_guard(path, timeout)?;
654 match open_franken_with_flags(&path_str, FrankenOpenFlags::SQLITE_OPEN_READ_ONLY)
655 .with_context(|| {
656 format!(
657 "opening raw frankensqlite db readonly at {}",
658 path.display()
659 )
660 }) {
661 Ok(conn) => return Ok(conn),
662 Err(err) if retryable_franken_anyhow(&err) => {
663 let now = Instant::now();
664 if now >= deadline {
665 return Err(err);
666 }
667 let remaining = deadline.saturating_duration_since(now);
668 sleep_with_franken_retry_backoff(
669 &mut backoff,
670 remaining,
671 Duration::from_millis(128),
672 );
673 }
674 Err(err) => return Err(err),
675 }
676 }
677}
678
679pub(crate) fn retryable_franken_error(err: &frankensqlite::FrankenError) -> bool {
680 matches!(
681 err,
682 frankensqlite::FrankenError::Busy
683 | frankensqlite::FrankenError::BusyRecovery
684 | frankensqlite::FrankenError::BusySnapshot { .. }
685 | frankensqlite::FrankenError::DatabaseLocked { .. }
686 | frankensqlite::FrankenError::LockFailed { .. }
687 | frankensqlite::FrankenError::WriteConflict { .. }
688 | frankensqlite::FrankenError::SerializationFailure { .. }
689 ) || retryable_storage_error_message(&err.to_string())
690}
691
692pub(crate) fn retryable_storage_error_message(message: &str) -> bool {
693 let lower = message.to_ascii_lowercase();
694 lower.contains("busy")
695 || lower.contains("locked")
696 || lower.contains("locking")
697 || lower.contains("contention")
698 || lower.contains("temporarily unavailable")
699 || lower.contains("would block")
700}
701
702pub(crate) fn retryable_franken_anyhow(err: &anyhow::Error) -> bool {
703 err.chain().any(|cause| {
704 cause
705 .downcast_ref::<frankensqlite::FrankenError>()
706 .is_some_and(retryable_franken_error)
707 || retryable_storage_error_message(&cause.to_string())
708 })
709}
710
711impl Drop for LazyFrankenDb {
712 fn drop(&mut self) {
713 let Some(mut conn) = self.conn.get_mut().take() else {
714 return;
715 };
716 conn.0.close_best_effort_in_place();
717 }
718}
719
720#[derive(Debug, Clone)]
729pub struct ConnectionManagerConfig {
730 pub reader_count: usize,
732 pub max_writers: usize,
734}
735
736impl Default for ConnectionManagerConfig {
737 fn default() -> Self {
738 let cpus = std::thread::available_parallelism()
739 .map(|n| n.get())
740 .unwrap_or(4);
741 Self {
742 reader_count: 4,
743 max_writers: cpus,
744 }
745 }
746}
747
748pub struct FrankenConnectionManager {
758 db_path: PathBuf,
759 readers: Vec<parking_lot::Mutex<SendFrankenConnection>>,
760 reader_idx: std::sync::atomic::AtomicUsize,
761 writer_tokens: (
764 crossbeam_channel::Sender<()>,
765 crossbeam_channel::Receiver<()>,
766 ),
767 config: ConnectionManagerConfig,
768}
769
770unsafe impl Send for FrankenConnectionManager {}
775unsafe impl Sync for FrankenConnectionManager {}
776
777impl FrankenConnectionManager {
778 pub fn new(db_path: impl Into<PathBuf>, config: ConnectionManagerConfig) -> Result<Self> {
783 let db_path = db_path.into();
784 let path_str = db_path.to_string_lossy().to_string();
785
786 let reader_count = config.reader_count.max(1);
787 let mut readers = Vec::with_capacity(reader_count);
788 for _ in 0..reader_count {
789 let conn = FrankenConnection::open(&path_str)
790 .with_context(|| format!("opening reader connection at {}", db_path.display()))?;
791 let _ = conn.execute("PRAGMA busy_timeout = 5000;"); let _ = conn.execute("PRAGMA cache_size = -16384;"); readers.push(parking_lot::Mutex::new(SendFrankenConnection::new(conn)));
795 }
796
797 let max_writers = config.max_writers.max(1);
798
799 let (tx, rx) = crossbeam_channel::bounded(max_writers);
803 for _ in 0..max_writers {
804 tx.send(())
805 .map_err(|_| anyhow!("writer token channel closed during initialization"))?;
806 }
807
808 Ok(Self {
809 db_path,
810 readers,
811 reader_idx: std::sync::atomic::AtomicUsize::new(0),
812 writer_tokens: (tx, rx),
813 config: ConnectionManagerConfig {
814 reader_count,
815 max_writers,
816 },
817 })
818 }
819
820 pub fn reader(&self) -> parking_lot::MutexGuard<'_, SendFrankenConnection> {
825 let idx = self
826 .reader_idx
827 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
828 self.readers[idx % self.readers.len()].lock()
829 }
830
831 pub fn writer(&self) -> Result<WriterGuard<'_>> {
837 self.writer_tokens
838 .1
839 .recv()
840 .map_err(|_| anyhow!("writer token channel closed"))?;
841 let path_str = self.db_path.to_string_lossy().to_string();
842 let conn = match FrankenConnection::open(&path_str) {
843 Ok(c) => c,
844 Err(e) => {
845 let _ = self.writer_tokens.0.send(());
846 return Err(anyhow::Error::from(e).context(format!(
847 "opening writer connection at {}",
848 self.db_path.display()
849 )));
850 }
851 };
852 let storage = FrankenStorage::new(conn, self.db_path.clone());
853 if let Err(e) = storage.apply_config() {
854 let _ = self.writer_tokens.0.send(());
855 return Err(e);
856 }
857 Ok(WriterGuard {
858 storage,
859 mgr: self,
860 committed: false,
861 })
862 }
863
864 pub fn concurrent_writer(&self) -> Result<WriterGuard<'_>> {
869 self.writer_tokens
870 .1
871 .recv()
872 .map_err(|_| anyhow!("writer token channel closed"))?;
873 let path_str = self.db_path.to_string_lossy().to_string();
874 let conn = match FrankenConnection::open(&path_str) {
875 Ok(c) => c,
876 Err(e) => {
877 let _ = self.writer_tokens.0.send(());
878 return Err(anyhow::Error::from(e).context(format!(
879 "opening concurrent writer at {}",
880 self.db_path.display()
881 )));
882 }
883 };
884 let storage = FrankenStorage::new(conn, self.db_path.clone());
885 if let Err(e) = storage.apply_config() {
886 let _ = self.writer_tokens.0.send(());
887 return Err(e);
888 }
889 let _ = storage.raw().execute("PRAGMA cache_size = -4096;");
891 Ok(WriterGuard {
892 storage,
893 mgr: self,
894 committed: false,
895 })
896 }
897
898 pub fn db_path(&self) -> &Path {
900 &self.db_path
901 }
902
903 pub fn reader_count(&self) -> usize {
905 self.readers.len()
906 }
907
908 pub fn max_writers(&self) -> usize {
910 self.config.max_writers
911 }
912}
913
914impl Drop for FrankenConnectionManager {
915 fn drop(&mut self) {
916 for reader in &mut self.readers {
917 reader.get_mut().0.close_best_effort_in_place();
918 }
919 }
920}
921
922pub struct WriterGuard<'a> {
927 storage: FrankenStorage,
928 mgr: &'a FrankenConnectionManager,
929 committed: bool,
930}
931
932impl<'a> WriterGuard<'a> {
933 pub fn storage(&self) -> &FrankenStorage {
935 &self.storage
936 }
937
938 pub fn mark_committed(&mut self) {
943 self.committed = true;
944 }
945}
946
947impl Drop for WriterGuard<'_> {
948 fn drop(&mut self) {
949 if !self.committed {
950 let _ = self.storage.raw().execute("ROLLBACK;");
952 }
953 self.storage.close_best_effort_in_place();
954 let _ = self.mgr.writer_tokens.0.send(());
956 }
957}
958
959fn serialize_json_to_msgpack(value: &serde_json::Value) -> Option<Vec<u8>> {
968 if value.is_null() || value.as_object().is_some_and(|o| o.is_empty()) {
969 return None;
970 }
971 rmp_serde::to_vec(value).ok()
972}
973
974fn deserialize_msgpack_to_json(bytes: &[u8]) -> serde_json::Value {
977 if bytes.is_empty() {
978 return serde_json::Value::Object(serde_json::Map::new());
979 }
980 rmp_serde::from_slice(bytes).unwrap_or_else(|e| {
981 tracing::debug!(
982 error = %e,
983 bytes_len = bytes.len(),
984 "Failed to deserialize metadata - returning empty object"
985 );
986 serde_json::Value::Object(serde_json::Map::new())
987 })
988}
989
990fn franken_read_metadata_compat(
992 row: &FrankenRow,
993 json_idx: usize,
994 bin_idx: usize,
995) -> serde_json::Value {
996 if let Ok(Some(bytes)) = row.get_typed::<Option<Vec<u8>>>(bin_idx)
998 && !bytes.is_empty()
999 {
1000 return deserialize_msgpack_to_json(&bytes);
1001 }
1002
1003 if let Ok(Some(json_str)) = row.get_typed::<Option<String>>(json_idx) {
1005 return serde_json::from_str(&json_str)
1006 .unwrap_or_else(|_| serde_json::Value::Object(serde_json::Map::new()));
1007 }
1008
1009 serde_json::Value::Object(serde_json::Map::new())
1010}
1011
1012fn franken_read_message_extra_compat(
1013 row: &FrankenRow,
1014 json_idx: usize,
1015 bin_idx: usize,
1016) -> serde_json::Value {
1017 if let Ok(Some(bytes)) = row.get_typed::<Option<Vec<u8>>>(bin_idx)
1018 && !bytes.is_empty()
1019 {
1020 return deserialize_msgpack_to_json(&bytes);
1021 }
1022
1023 if let Ok(Some(json_str)) = row.get_typed::<Option<String>>(json_idx) {
1024 return serde_json::from_str(&json_str).unwrap_or(serde_json::Value::Null);
1025 }
1026
1027 serde_json::Value::Null
1028}
1029
1030#[derive(Debug, Error)]
1036pub enum MigrationError {
1037 #[error("Rebuild required: {reason}")]
1039 RebuildRequired {
1040 reason: String,
1041 backup_path: Option<std::path::PathBuf>,
1042 },
1043
1044 #[error("Database error: {0}")]
1046 Database(#[from] frankensqlite::FrankenError),
1047
1048 #[error("I/O error: {0}")]
1050 Io(#[from] std::io::Error),
1051
1052 #[error("{0}")]
1054 Other(String),
1055}
1056
1057impl From<anyhow::Error> for MigrationError {
1058 fn from(e: anyhow::Error) -> Self {
1059 MigrationError::Other(e.to_string())
1060 }
1061}
1062
1063const MAX_BACKUPS: usize = 3;
1065const BACKUP_VACUUM_BUSY_TIMEOUT_PRAGMA: &str = "PRAGMA busy_timeout = 30000;";
1066
1067const USER_DATA_FILES: &[&str] = &["bookmarks.db", "tui_state.json", "sources.toml", ".env"];
1069
1070pub fn is_user_data_file(path: &Path) -> bool {
1072 path.file_name()
1073 .and_then(|n| n.to_str())
1074 .map(|name| USER_DATA_FILES.contains(&name))
1075 .unwrap_or(false)
1076}
1077
1078pub const FTS5_REGISTER_SQL: &str = "\
1085 CREATE VIRTUAL TABLE IF NOT EXISTS fts_messages USING fts5(\
1086 content, title, agent, workspace, source_path, \
1087 created_at UNINDEXED, \
1088 content='', tokenize='porter'\
1089 )";
1090
1091const FTS_FRANKEN_REBUILD_META_KEY: &str = "fts_frankensqlite_rebuild_generation";
1092const FTS_FRANKEN_REBUILD_FINGERPRINT_META_KEY: &str = "fts_frankensqlite_archive_fingerprint";
1093const FTS_FRANKEN_REBUILD_GENERATION: i64 = 1;
1094const DAILY_STATS_HEALTH_META_KEY: &str = "daily_stats_archive_fingerprint";
1095const DAILY_STATS_HEALTH_GENERATION_META_KEY: &str = "daily_stats_health_generation";
1096const DAILY_STATS_HEALTH_GENERATION: i64 = 1;
1097
1098pub const FTS5_DELETE_ALL_SQL: &str =
1102 "INSERT INTO fts_messages(fts_messages) VALUES('delete-all');";
1103
1104#[cfg(test)]
1105pub(crate) fn materialize_fresh_fts_schema_via_rusqlite(db_path: &Path) -> Result<()> {
1106 let storage = FrankenStorage::open(db_path).with_context(|| {
1112 format!(
1113 "opening frankensqlite db at {} for FTS materialization",
1114 db_path.display()
1115 )
1116 })?;
1117 storage.rebuild_fts_via_frankensqlite().map(|_| ())
1118}
1119
1120#[cfg(test)]
1121pub(crate) fn rebuild_fts_via_rusqlite(db_path: &Path) -> Result<usize> {
1122 let storage = FrankenStorage::open(db_path).with_context(|| {
1123 format!(
1124 "opening frankensqlite db at {} for FTS rebuild",
1125 db_path.display()
1126 )
1127 })?;
1128 let inserted = storage.rebuild_fts_via_frankensqlite()?;
1129 storage.record_fts_franken_rebuild_generation()?;
1130 Ok(inserted)
1131}
1132
1133pub(crate) fn ensure_fts_consistency_via_rusqlite(db_path: &Path) -> Result<FtsConsistencyRepair> {
1134 let storage = FrankenStorage::open(db_path).with_context(|| {
1138 format!(
1139 "opening frankensqlite db at {} for FTS consistency check",
1140 db_path.display()
1141 )
1142 })?;
1143 storage.ensure_search_fallback_fts_consistency()
1144}
1145
1146pub fn create_backup(db_path: &Path) -> Result<Option<std::path::PathBuf>, MigrationError> {
1150 if !bundle_path_exists(db_path)? {
1151 return Ok(None);
1152 }
1153
1154 if !copyable_bundle_file_exists(db_path)? {
1155 return Ok(None);
1156 }
1157 let _ = copyable_bundle_sidecar_sources(db_path)?;
1158
1159 let backup_path = unique_backup_path(db_path);
1160 let vacuum_stage_path = vacuum_stage_backup_path(&backup_path);
1161
1162 match vacuum_into_backup_stage(db_path, &vacuum_stage_path) {
1165 Ok(()) => {
1166 fs::rename(&vacuum_stage_path, &backup_path)?;
1167 }
1168 Err(err) if backup_vacuum_error_requires_consistent_retry(&err) => {
1169 tracing::warn!(
1170 db_path = %db_path.display(),
1171 error = %err,
1172 "create_backup: VACUUM INTO hit transient contention; refusing raw WAL bundle copy"
1173 );
1174 return Err(MigrationError::Database(err));
1175 }
1176 Err(err) => {
1177 tracing::warn!(
1178 db_path = %db_path.display(),
1179 error = %err,
1180 "create_backup: VACUUM INTO failed; falling back to raw evidence copy"
1181 );
1182 }
1183 }
1184
1185 if backup_path.exists() {
1186 sync_file_if_exists(&backup_path)?;
1187 if let Some(parent) = backup_path.parent() {
1188 sync_parent_directory(parent)?;
1189 }
1190 return Ok(Some(backup_path));
1191 }
1192
1193 copy_database_bundle(db_path, &backup_path)?;
1198
1199 Ok(Some(backup_path))
1200}
1201
1202fn vacuum_into_backup_stage(
1203 db_path: &Path,
1204 stage_path: &Path,
1205) -> std::result::Result<(), frankensqlite::FrankenError> {
1206 let mut conn = open_franken_with_flags(
1207 &db_path.to_string_lossy(),
1208 FrankenOpenFlags::SQLITE_OPEN_READ_ONLY,
1209 )?;
1210 let result = (|| {
1211 conn.execute(BACKUP_VACUUM_BUSY_TIMEOUT_PRAGMA)?;
1212 let path_str = stage_path.to_string_lossy();
1213 conn.execute_compat("VACUUM INTO ?", fparams![path_str.as_ref()])?;
1214 Ok(())
1215 })();
1216 if let Err(close_err) = conn.close_in_place() {
1217 tracing::warn!(
1218 error = %close_err,
1219 db_path = %db_path.display(),
1220 "create_backup: close_in_place failed after VACUUM INTO; falling back to best-effort close"
1221 );
1222 conn.close_best_effort_in_place();
1223 }
1224 result
1225}
1226
1227fn backup_vacuum_error_requires_consistent_retry(err: &frankensqlite::FrankenError) -> bool {
1228 retryable_franken_error(err)
1229}
1230
1231#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1232pub struct DatabaseBundleMoveResult {
1233 pub database: bool,
1234 pub wal: bool,
1235 pub shm: bool,
1236}
1237
1238impl DatabaseBundleMoveResult {
1239 pub fn moved_any(&self) -> bool {
1240 self.database || self.wal || self.shm
1241 }
1242}
1243
1244fn database_sidecar_path(path: &Path, suffix: &str) -> PathBuf {
1245 PathBuf::from(format!("{}{}", path.to_string_lossy(), suffix))
1246}
1247
1248pub(crate) fn move_database_bundle(
1255 source_root: &Path,
1256 destination_root: &Path,
1257) -> std::io::Result<DatabaseBundleMoveResult> {
1258 let mut moved = DatabaseBundleMoveResult::default();
1259 if let Some(parent) = destination_root.parent() {
1260 fs::create_dir_all(parent)?;
1261 sync_parent_directory(parent)?;
1262 }
1263
1264 if bundle_path_exists(source_root)? {
1265 fs::rename(source_root, destination_root)?;
1266 moved.database = true;
1267 }
1268
1269 let wal_source = database_sidecar_path(source_root, "-wal");
1270 if bundle_path_exists(&wal_source)? {
1271 fs::rename(&wal_source, database_sidecar_path(destination_root, "-wal"))?;
1272 moved.wal = true;
1273 }
1274
1275 let shm_source = database_sidecar_path(source_root, "-shm");
1276 if bundle_path_exists(&shm_source)? {
1277 fs::rename(&shm_source, database_sidecar_path(destination_root, "-shm"))?;
1278 moved.shm = true;
1279 }
1280
1281 if moved.moved_any() {
1282 if let Some(parent) = source_root.parent() {
1283 sync_parent_directory(parent)?;
1284 }
1285 if let Some(parent) = destination_root.parent() {
1286 sync_parent_directory(parent)?;
1287 }
1288 }
1289
1290 Ok(moved)
1291}
1292
1293fn bundle_path_exists(path: &Path) -> std::io::Result<bool> {
1294 match fs::symlink_metadata(path) {
1295 Ok(_) => Ok(true),
1296 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
1297 Err(err) => Err(err),
1298 }
1299}
1300
1301fn copy_database_bundle(source_root: &Path, destination_root: &Path) -> Result<()> {
1302 if let Some(parent) = destination_root.parent() {
1303 fs::create_dir_all(parent).with_context(|| {
1304 format!(
1305 "creating destination directory for database bundle copy: {}",
1306 parent.display()
1307 )
1308 })?;
1309 sync_parent_directory(parent)
1310 .with_context(|| format!("syncing destination directory {}", parent.display()))?;
1311 }
1312
1313 if !copyable_bundle_file_exists(source_root)? {
1314 bail!(
1315 "database bundle root is missing before copy: {}",
1316 source_root.display()
1317 );
1318 }
1319
1320 let sidecars = copyable_bundle_sidecar_sources(source_root)?;
1321
1322 fs::copy(source_root, destination_root).with_context(|| {
1323 format!(
1324 "copying database bundle {} -> {}",
1325 source_root.display(),
1326 destination_root.display()
1327 )
1328 })?;
1329 sync_file_if_exists(destination_root).with_context(|| {
1330 format!(
1331 "syncing copied database bundle {}",
1332 destination_root.display()
1333 )
1334 })?;
1335
1336 for (source_sidecar, suffix) in sidecars {
1337 let destination_sidecar = database_sidecar_path(destination_root, suffix);
1338 fs::copy(&source_sidecar, &destination_sidecar).with_context(|| {
1339 format!(
1340 "copying database bundle sidecar {} -> {}",
1341 source_sidecar.display(),
1342 destination_sidecar.display()
1343 )
1344 })?;
1345 sync_file_if_exists(&destination_sidecar).with_context(|| {
1346 format!(
1347 "syncing copied database bundle sidecar {}",
1348 destination_sidecar.display()
1349 )
1350 })?;
1351 }
1352
1353 if let Some(parent) = destination_root.parent() {
1354 sync_parent_directory(parent)
1355 .with_context(|| format!("syncing destination directory {}", parent.display()))?;
1356 }
1357
1358 Ok(())
1359}
1360
1361fn copyable_bundle_sidecar_sources(source_root: &Path) -> Result<Vec<(PathBuf, &'static str)>> {
1362 let mut sidecars = Vec::new();
1363 for suffix in ["-wal", "-shm"] {
1364 let source_sidecar = database_sidecar_path(source_root, suffix);
1365 if copyable_bundle_file_exists(&source_sidecar)? {
1366 sidecars.push((source_sidecar, suffix));
1367 }
1368 }
1369 Ok(sidecars)
1370}
1371
1372fn copyable_bundle_file_exists(path: &Path) -> Result<bool> {
1373 match fs::symlink_metadata(path) {
1374 Ok(metadata) => {
1375 let file_type = metadata.file_type();
1376 if file_type.is_symlink() {
1377 bail!(
1378 "refusing to copy database bundle symlink: {}",
1379 path.display()
1380 );
1381 }
1382 if !file_type.is_file() {
1383 bail!(
1384 "refusing to copy non-file database bundle path: {}",
1385 path.display()
1386 );
1387 }
1388 Ok(true)
1389 }
1390 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
1391 Err(err) => Err(err).with_context(|| {
1392 format!(
1393 "checking database bundle path before copy: {}",
1394 path.display()
1395 )
1396 }),
1397 }
1398}
1399
1400pub(crate) fn remove_database_files(path: &Path) -> std::io::Result<()> {
1402 let mut removed_any = false;
1403
1404 match fs::remove_file(path) {
1405 Ok(()) => removed_any = true,
1406 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
1407 Err(err) => return Err(err),
1408 }
1409
1410 for suffix in ["-wal", "-shm"] {
1412 match fs::remove_file(database_sidecar_path(path, suffix)) {
1413 Ok(()) => removed_any = true,
1414 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
1415 Err(err) => return Err(err),
1416 }
1417 }
1418
1419 if removed_any && let Some(parent) = path.parent() {
1420 sync_parent_directory(parent)?;
1421 }
1422
1423 Ok(())
1424}
1425
1426#[cfg(not(windows))]
1427fn sync_parent_directory(path: &Path) -> std::io::Result<()> {
1428 fs::File::open(path)?.sync_all()
1429}
1430
1431#[cfg(windows)]
1432fn sync_parent_directory(_path: &Path) -> std::io::Result<()> {
1433 Ok(())
1434}
1435
1436fn sync_file_if_exists(path: &Path) -> std::io::Result<()> {
1437 if path.exists() {
1438 fs::File::open(path)?.sync_all()?;
1439 }
1440 Ok(())
1441}
1442
1443pub fn cleanup_old_backups(db_path: &Path, keep_count: usize) -> Result<(), std::io::Error> {
1445 let parent = match db_path.parent() {
1446 Some(p) => p,
1447 None => return Ok(()),
1448 };
1449
1450 let db_name = db_path.file_name().and_then(|n| n.to_str()).unwrap_or("db");
1451
1452 let prefix = format!("{}.backup.", db_name);
1453
1454 let mut backups: Vec<(std::path::PathBuf, SystemTime)> = Vec::new();
1456
1457 if let Ok(entries) = fs::read_dir(parent) {
1458 for entry in entries.flatten() {
1459 let path = entry.path();
1460 if let Some(name) = path.file_name().and_then(|n| n.to_str())
1461 && is_backup_root_name(name, &prefix)
1462 && let Ok(meta) = fs::metadata(&path)
1463 && meta.is_file()
1464 && let Ok(mtime) = meta.modified()
1465 {
1466 backups.push((path, mtime));
1467 }
1468 }
1469 }
1470
1471 backups.sort_by_key(|entry| std::cmp::Reverse(entry.1));
1473
1474 for (path, _) in backups.into_iter().skip(keep_count) {
1476 let _ = fs::remove_file(&path);
1477
1478 let _ = fs::remove_file(database_sidecar_path(&path, "-wal"));
1480 let _ = fs::remove_file(database_sidecar_path(&path, "-shm"));
1481 }
1482
1483 Ok(())
1484}
1485
1486#[derive(Debug, Clone)]
1487pub(crate) struct HistoricalDatabaseBundle {
1488 root_path: PathBuf,
1489 total_bytes: u64,
1490 modified_at_ms: i64,
1491 supports_direct_readonly: bool,
1492 probe: HistoricalBundleProbe,
1493}
1494
1495#[derive(Debug, Clone, Copy, Default)]
1496struct HistoricalBundleProbe {
1497 schema_version: Option<i64>,
1498 fts_schema_rows: Option<i64>,
1499 fts_queryable: bool,
1500 max_message_id: i64,
1501}
1502
1503#[cfg(test)]
1504#[allow(dead_code)]
1505#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1506pub(crate) struct SqliteDatabaseHealthProbe {
1507 pub schema_version: Option<i64>,
1508 pub quick_check_ok: bool,
1509 pub fts_schema_rows: i64,
1510 pub fts_queryable: bool,
1511 pub message_count: i64,
1512 pub max_message_id: i64,
1513}
1514
1515#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1516pub(crate) enum FtsConsistencyRepair {
1517 AlreadyHealthy {
1518 rows: usize,
1519 },
1520 IncrementalCatchUp {
1521 inserted_rows: usize,
1522 total_rows: usize,
1523 },
1524 Rebuilt {
1525 inserted_rows: usize,
1526 },
1527}
1528
1529#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
1530pub struct HistoricalSalvageOutcome {
1531 pub bundles_considered: usize,
1532 pub bundles_imported: usize,
1533 pub conversations_imported: usize,
1534 pub messages_imported: usize,
1535}
1536
1537impl HistoricalSalvageOutcome {
1538 pub(crate) fn accumulate(&mut self, other: Self) {
1539 self.bundles_considered += other.bundles_considered;
1540 self.bundles_imported += other.bundles_imported;
1541 self.conversations_imported += other.conversations_imported;
1542 self.messages_imported += other.messages_imported;
1543 }
1544}
1545
1546#[derive(Debug)]
1547struct HistoricalReadConnection {
1548 conn: FrankenConnection,
1549 method: &'static str,
1550 _tempdir: Option<tempfile::TempDir>,
1551}
1552
1553const HISTORICAL_RECOVERY_CORE_SCHEMA: &str = r"
1554CREATE TABLE sources (
1555 id TEXT PRIMARY KEY,
1556 kind TEXT,
1557 host_label TEXT,
1558 machine_id TEXT,
1559 platform TEXT,
1560 config_json TEXT,
1561 created_at INTEGER,
1562 updated_at INTEGER
1563);
1564CREATE TABLE agents (
1565 id INTEGER PRIMARY KEY,
1566 slug TEXT,
1567 name TEXT,
1568 version TEXT,
1569 kind TEXT,
1570 created_at INTEGER,
1571 updated_at INTEGER
1572);
1573CREATE TABLE workspaces (
1574 id INTEGER PRIMARY KEY,
1575 path TEXT,
1576 display_name TEXT
1577);
1578CREATE TABLE conversations (
1579 id INTEGER PRIMARY KEY,
1580 agent_id INTEGER,
1581 workspace_id INTEGER,
1582 source_id TEXT,
1583 external_id TEXT,
1584 title TEXT,
1585 source_path TEXT,
1586 started_at INTEGER,
1587 ended_at INTEGER,
1588 approx_tokens INTEGER,
1589 metadata_json TEXT,
1590 origin_host TEXT,
1591 metadata_bin BLOB,
1592 total_input_tokens INTEGER,
1593 total_output_tokens INTEGER,
1594 total_cache_read_tokens INTEGER,
1595 total_cache_creation_tokens INTEGER,
1596 grand_total_tokens INTEGER,
1597 estimated_cost_usd REAL,
1598 primary_model TEXT,
1599 api_call_count INTEGER,
1600 tool_call_count INTEGER,
1601 user_message_count INTEGER,
1602 assistant_message_count INTEGER,
1603 last_message_idx INTEGER,
1604 last_message_created_at INTEGER
1605);
1606CREATE TABLE messages (
1607 id INTEGER PRIMARY KEY,
1608 conversation_id INTEGER,
1609 idx INTEGER,
1610 role TEXT,
1611 author TEXT,
1612 created_at INTEGER,
1613 content TEXT,
1614 extra_json TEXT,
1615 extra_bin BLOB
1616);
1617CREATE TABLE snippets (
1618 id INTEGER PRIMARY KEY,
1619 message_id INTEGER,
1620 file_path TEXT,
1621 start_line INTEGER,
1622 end_line INTEGER,
1623 language TEXT,
1624 snippet_text TEXT
1625);
1626";
1627const HISTORICAL_SALVAGE_LEDGER_VERSION: u32 = 2;
1628const HISTORICAL_SALVAGE_PROGRESS_VERSION: u32 = 1;
1629const SOURCE_PATH_MERGE_START_TOLERANCE_MS: i64 = 5 * 60 * 1000;
1630
1631#[derive(Debug, Clone, Serialize, Deserialize)]
1632struct HistoricalBundleProgress {
1633 progress_version: u32,
1634 path: String,
1635 bytes: u64,
1636 modified_at_ms: i64,
1637 method: String,
1638 last_completed_source_row_id: i64,
1639 conversations_imported: usize,
1640 messages_imported: usize,
1641 updated_at_ms: i64,
1642}
1643
1644#[derive(Debug, Clone)]
1645struct HistoricalBatchEntry {
1646 source_row_id: i64,
1647 agent_id: i64,
1648 workspace_id: Option<i64>,
1649 conversation: Conversation,
1650}
1651
1652#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1653struct HistoricalBatchImportTotals {
1654 inserted_source_rows: usize,
1655 inserted_messages: usize,
1656}
1657
1658fn historical_bundle_root_paths(db_path: &Path) -> Vec<PathBuf> {
1659 let mut roots = Vec::new();
1660 let Some(parent) = db_path.parent() else {
1661 return roots;
1662 };
1663 let db_name = db_path
1664 .file_name()
1665 .and_then(|n| n.to_str())
1666 .unwrap_or("agent_search.db");
1667 let db_stem = db_path
1668 .file_stem()
1669 .and_then(|n| n.to_str())
1670 .unwrap_or("agent_search");
1671
1672 let mut push_root = |path: PathBuf| {
1673 if path == db_path {
1674 return;
1675 }
1676 if !roots.iter().any(|existing| existing == &path) {
1677 roots.push(path);
1678 }
1679 };
1680
1681 if let Ok(entries) = fs::read_dir(parent) {
1682 for entry in entries.flatten() {
1683 let path = entry.path();
1684 let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
1685 continue;
1686 };
1687 if has_db_sidecar_suffix(name) {
1688 continue;
1689 }
1690 if name.starts_with(&format!("{db_name}.backup."))
1691 || name.starts_with(&format!("{db_stem}.corrupt."))
1692 {
1693 push_root(path);
1694 }
1695 }
1696 }
1697
1698 let backups_dir = parent.join("backups");
1699 if let Ok(entries) = fs::read_dir(backups_dir) {
1700 for entry in entries.flatten() {
1701 let path = entry.path();
1702 let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
1703 continue;
1704 };
1705 if has_db_sidecar_suffix(name) {
1706 continue;
1707 }
1708 if name.starts_with(&format!("{db_name}.")) && name.ends_with(".bak") {
1709 push_root(path);
1710 }
1711 }
1712 }
1713
1714 push_named_database_children(&mut roots, db_path, &parent.join("repair-lab"), db_name);
1715 push_named_database_children(&mut roots, db_path, &parent.join("snapshots"), db_name);
1716
1717 roots
1718}
1719
1720fn push_named_database_children(
1721 roots: &mut Vec<PathBuf>,
1722 canonical_db_path: &Path,
1723 dir: &Path,
1724 db_name: &str,
1725) {
1726 if let Ok(entries) = fs::read_dir(dir) {
1727 for entry in entries.flatten() {
1728 let candidate = entry.path().join(db_name);
1729 if candidate == canonical_db_path {
1730 continue;
1731 }
1732 if candidate.exists() && !roots.iter().any(|existing| existing == &candidate) {
1733 roots.push(candidate);
1734 }
1735 }
1736 }
1737}
1738
1739fn file_mtime_ms(path: &Path) -> i64 {
1740 fs::metadata(path)
1741 .and_then(|meta| meta.modified())
1742 .ok()
1743 .and_then(|ts| ts.duration_since(UNIX_EPOCH).ok())
1744 .map(|d| d.as_millis() as i64)
1745 .unwrap_or(0)
1746}
1747
1748fn bundle_total_bytes(root_path: &Path) -> u64 {
1749 let mut total = fs::metadata(root_path).map(|meta| meta.len()).unwrap_or(0);
1750 for suffix in ["-wal", "-shm"] {
1751 let sidecar = database_sidecar_path(root_path, suffix);
1752 total = total.saturating_add(fs::metadata(sidecar).map(|meta| meta.len()).unwrap_or(0));
1753 }
1754 total
1755}
1756
1757pub(crate) fn discover_historical_database_bundles(
1758 db_path: &Path,
1759) -> Vec<HistoricalDatabaseBundle> {
1760 let mut bundles: Vec<_> = historical_bundle_root_paths(db_path)
1761 .into_iter()
1762 .filter(|root| root.exists())
1763 .map(|root_path| {
1764 let modified_at_ms = file_mtime_ms(&root_path);
1765 let total_bytes = bundle_total_bytes(&root_path);
1766 let supports_direct_readonly = historical_bundle_supports_direct_readonly(&root_path);
1767 let probe = probe_historical_bundle(&root_path, supports_direct_readonly);
1768 HistoricalDatabaseBundle {
1769 modified_at_ms,
1770 total_bytes,
1771 supports_direct_readonly,
1772 root_path,
1773 probe,
1774 }
1775 })
1776 .filter(|bundle| bundle.total_bytes > 0)
1777 .collect();
1778
1779 fn bundle_priority(path: &Path) -> i32 {
1780 let path_str = path.to_string_lossy();
1781 if path_str.contains("/repair-lab/replay-") {
1782 return 5;
1783 }
1784 if path_str.contains("/repair-lab/") {
1785 return 4;
1786 }
1787 if path_str.contains("/snapshots/") {
1788 return 3;
1789 }
1790 if path_str.contains(".corrupt.") || path_str.contains("failed-baseline-seed") {
1791 return 0;
1792 }
1793 1
1794 }
1795
1796 fn bundle_health_rank(bundle: &HistoricalDatabaseBundle) -> i32 {
1797 let fts_clean = match bundle.probe.fts_schema_rows {
1820 Some(1) => bundle.probe.fts_queryable,
1821 Some(0) => bundle.probe.schema_version == Some(CURRENT_SCHEMA_VERSION),
1822 _ => false,
1823 };
1824
1825 let clean_schema14_fts =
1826 bundle.probe.schema_version == Some(CURRENT_SCHEMA_VERSION) && fts_clean;
1827 if clean_schema14_fts {
1828 return 5;
1829 }
1830
1831 if fts_clean {
1832 return 4;
1833 }
1834
1835 if bundle.probe.schema_version == Some(CURRENT_SCHEMA_VERSION)
1836 && bundle.supports_direct_readonly
1837 {
1838 return 3;
1839 }
1840
1841 if bundle.supports_direct_readonly {
1842 return 2;
1843 }
1844
1845 1
1846 }
1847
1848 bundles.sort_by(|left, right| {
1849 bundle_health_rank(right)
1850 .cmp(&bundle_health_rank(left))
1851 .then_with(|| right.probe.max_message_id.cmp(&left.probe.max_message_id))
1852 .then_with(|| bundle_priority(&right.root_path).cmp(&bundle_priority(&left.root_path)))
1853 .then_with(|| {
1854 right
1855 .supports_direct_readonly
1856 .cmp(&left.supports_direct_readonly)
1857 })
1858 .then_with(|| right.total_bytes.cmp(&left.total_bytes))
1859 .then_with(|| right.modified_at_ms.cmp(&left.modified_at_ms))
1860 .then_with(|| right.root_path.cmp(&left.root_path))
1861 });
1862 bundles
1863}
1864
1865fn probe_historical_bundle(
1866 root_path: &Path,
1867 supports_direct_readonly: bool,
1868) -> HistoricalBundleProbe {
1869 if !supports_direct_readonly {
1870 return HistoricalBundleProbe::default();
1871 }
1872
1873 let Ok(conn) = open_historical_bundle_readonly(root_path) else {
1874 return HistoricalBundleProbe::default();
1875 };
1876
1877 let schema_version = read_meta_schema_version(&conn).ok().flatten();
1878 let fts_schema_rows: Option<i64> = conn
1879 .query_row_map(
1880 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
1881 fparams![],
1882 |row| row.get_typed(0),
1883 )
1884 .ok();
1885 let fts_queryable =
1886 historical_bundle_fts_queryable_via_frankensqlite(root_path, fts_schema_rows);
1887 let max_message_id: i64 = conn
1888 .query_row_map(
1889 "SELECT COALESCE(MAX(id), 0) FROM messages",
1890 fparams![],
1891 |row| row.get_typed(0),
1892 )
1893 .unwrap_or(0);
1894
1895 HistoricalBundleProbe {
1896 schema_version,
1897 fts_schema_rows,
1898 fts_queryable,
1899 max_message_id,
1900 }
1901}
1902
1903fn historical_bundle_fts_queryable_via_frankensqlite(
1904 root_path: &Path,
1905 fts_schema_rows: Option<i64>,
1906) -> bool {
1907 matches!(fts_schema_rows, Some(1))
1908 && FrankenStorage::open_readonly(root_path)
1909 .map(|storage| {
1910 storage
1911 .raw()
1912 .query("SELECT rowid FROM fts_messages LIMIT 1")
1913 .is_ok()
1914 })
1915 .unwrap_or(false)
1916}
1917
1918fn historical_bundle_supports_direct_readonly(root_path: &Path) -> bool {
1919 open_historical_bundle_readonly(root_path)
1920 .and_then(|conn| historical_bundle_has_queryable_core_tables(&conn))
1921 .is_ok()
1922}
1923
1924fn historical_table_exists(conn: &FrankenConnection, table: &str) -> Result<bool> {
1925 let found: Option<i64> = conn
1926 .query_row_map(
1927 "SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = ?1 LIMIT 1",
1928 fparams![table],
1929 |row| row.get_typed(0),
1930 )
1931 .optional()
1932 .with_context(|| format!("checking for historical table {table}"))?;
1933 Ok(found.is_some())
1934}
1935
1936fn probe_historical_table_reads(conn: &FrankenConnection, table: &str) -> Result<()> {
1937 if !historical_table_exists(conn, table)? {
1938 return Err(anyhow!(
1939 "historical database missing required table {table}"
1940 ));
1941 }
1942
1943 let sql = format!("SELECT rowid FROM {table} LIMIT 1");
1944 let _: Option<i64> = conn
1945 .query_row_map(&sql, fparams![], |row| row.get_typed(0))
1946 .optional()
1947 .with_context(|| format!("probing rows from historical table {table}"))?;
1948 Ok(())
1949}
1950
1951fn historical_bundle_has_queryable_core_tables(conn: &FrankenConnection) -> Result<()> {
1952 probe_historical_table_reads(conn, "conversations")?;
1953 probe_historical_table_reads(conn, "messages")?;
1954 Ok(())
1955}
1956
1957fn open_historical_bundle_readonly(root_path: &Path) -> Result<FrankenConnection> {
1958 let path_str = root_path.to_string_lossy();
1959 let flags = FrankenOpenFlags::SQLITE_OPEN_READ_ONLY;
1960 let conn = open_franken_with_flags(&path_str, flags)
1961 .with_context(|| format!("opening historical database {}", root_path.display()))?;
1962 Ok(conn)
1963}
1964
1965fn is_recoverable_insert_line(line: &str) -> bool {
1966 [
1967 "sources",
1968 "agents",
1969 "workspaces",
1970 "conversations",
1971 "messages",
1972 "snippets",
1973 ]
1974 .iter()
1975 .any(|table| {
1976 line.starts_with(&format!("INSERT INTO '{table}'"))
1977 || line.starts_with(&format!("INSERT OR IGNORE INTO '{table}'"))
1978 || line.starts_with(&format!("INSERT INTO \"{table}\""))
1979 || line.starts_with(&format!("INSERT OR IGNORE INTO \"{table}\""))
1980 })
1981}
1982
1983fn recover_historical_bundle_via_sqlite3(
1984 bundle: &HistoricalDatabaseBundle,
1985) -> Result<HistoricalReadConnection> {
1986 let tempdir = tempfile::TempDir::new().context("creating temporary salvage directory")?;
1987 let recovered_db = tempdir.path().join("historical-recovered.db");
1988 let temp_conn = FrankenConnection::open(recovered_db.to_string_lossy().as_ref())
1989 .with_context(|| format!("creating recovered database {}", recovered_db.display()))?;
1990 temp_conn
1991 .execute_batch(HISTORICAL_RECOVERY_CORE_SCHEMA)
1992 .with_context(|| format!("initializing recovered schema {}", recovered_db.display()))?;
1993 drop(temp_conn);
1994
1995 let bundle_uri = format!("file:{}?immutable=1", bundle.root_path.to_string_lossy());
1996 let mut recover = Command::new("sqlite3")
1997 .arg(&bundle_uri)
1998 .arg(".recover")
1999 .stdout(Stdio::piped())
2000 .spawn()
2001 .with_context(|| {
2002 format!(
2003 "launching sqlite3 .recover for historical bundle {}",
2004 bundle.root_path.display()
2005 )
2006 })?;
2007 let recover_stdout = recover
2008 .stdout
2009 .take()
2010 .context("capturing sqlite3 .recover stdout")?;
2011
2012 let mut importer = Command::new("sqlite3")
2013 .arg(&recovered_db)
2014 .stdin(Stdio::piped())
2015 .spawn()
2016 .with_context(|| {
2017 format!(
2018 "launching sqlite3 importer for recovered bundle {}",
2019 recovered_db.display()
2020 )
2021 })?;
2022
2023 {
2024 let importer_stdin = importer
2025 .stdin
2026 .as_mut()
2027 .context("opening sqlite3 importer stdin")?;
2028 importer_stdin
2029 .write_all(b"BEGIN;\n")
2030 .context("starting recovery import transaction")?;
2031
2032 let reader = BufReader::new(recover_stdout);
2033 for line in reader.lines() {
2034 let line = line.context("reading sqlite3 .recover output")?;
2035 if is_recoverable_insert_line(&line) {
2036 importer_stdin
2037 .write_all(line.as_bytes())
2038 .context("writing recovered INSERT")?;
2039 importer_stdin
2040 .write_all(b"\n")
2041 .context("writing recovered INSERT newline")?;
2042 }
2043 }
2044
2045 importer_stdin
2046 .write_all(b"COMMIT;\n")
2047 .context("committing recovery import transaction")?;
2048 }
2049
2050 let recover_status = recover
2051 .wait()
2052 .context("waiting for sqlite3 .recover process")?;
2053 if !recover_status.success() {
2054 anyhow::bail!(
2055 "sqlite3 .recover exited with status {} for {}",
2056 recover_status,
2057 bundle.root_path.display()
2058 );
2059 }
2060
2061 let importer_status = importer
2062 .wait()
2063 .context("waiting for sqlite3 recovery importer")?;
2064 if !importer_status.success() {
2065 anyhow::bail!(
2066 "sqlite3 recovery importer exited with status {} for {}",
2067 importer_status,
2068 recovered_db.display()
2069 );
2070 }
2071
2072 let conn = open_historical_bundle_readonly(&recovered_db)?;
2073 historical_bundle_has_queryable_core_tables(&conn)?;
2074 Ok(HistoricalReadConnection {
2075 conn,
2076 method: "sqlite3-recover",
2077 _tempdir: Some(tempdir),
2078 })
2079}
2080
2081fn open_historical_bundle_for_salvage(
2082 bundle: &HistoricalDatabaseBundle,
2083) -> Result<HistoricalReadConnection> {
2084 match open_historical_bundle_readonly(&bundle.root_path) {
2085 Ok(conn) => {
2086 if historical_bundle_has_queryable_core_tables(&conn).is_ok() {
2087 return Ok(HistoricalReadConnection {
2088 conn,
2089 method: "direct-readonly",
2090 _tempdir: None,
2091 });
2092 }
2093 }
2094 Err(err) => {
2095 tracing::warn!(
2096 path = %bundle.root_path.display(),
2097 error = %err,
2098 "historical bundle direct open failed; falling back to sqlite3 .recover"
2099 );
2100 }
2101 }
2102
2103 recover_historical_bundle_via_sqlite3(bundle)
2104}
2105
2106fn historical_bundle_counts(conn: &FrankenConnection) -> Result<(usize, usize)> {
2107 let conversations: i64 =
2108 conn.query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
2109 row.get_typed(0)
2110 })?;
2111 let messages: i64 = conn.query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
2112 row.get_typed(0)
2113 })?;
2114 Ok((
2115 usize::try_from(conversations.max(0)).unwrap_or(usize::MAX),
2116 usize::try_from(messages.max(0)).unwrap_or(usize::MAX),
2117 ))
2118}
2119
2120fn clear_seeded_runtime_meta(conn: &FrankenConnection) -> Result<()> {
2121 conn.execute(
2122 "DELETE FROM meta
2123 WHERE key LIKE 'historical_bundle_salvaged:%'
2124 OR key IN ('last_scan_ts', 'last_indexed_at', 'last_embedded_message_id')",
2125 )?;
2126 Ok(())
2127}
2128
2129fn record_historical_bundle_import(
2130 conn: &FrankenConnection,
2131 bundle: &HistoricalDatabaseBundle,
2132 method: &str,
2133 conversations_imported: usize,
2134 messages_imported: usize,
2135) -> Result<()> {
2136 let key = FrankenStorage::historical_bundle_meta_key(bundle);
2137 let value = serde_json::json!({
2138 "salvage_version": HISTORICAL_SALVAGE_LEDGER_VERSION,
2139 "path": bundle.root_path.display().to_string(),
2140 "bytes": bundle.total_bytes,
2141 "modified_at_ms": bundle.modified_at_ms,
2142 "method": method,
2143 "conversations_imported": conversations_imported,
2144 "messages_imported": messages_imported,
2145 "recorded_at_ms": FrankenStorage::now_millis(),
2146 });
2147 let value_str = serde_json::to_string(&value)?;
2148 conn.execute_compat(
2149 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
2150 fparams![key, value_str],
2151 )?;
2152 Ok(())
2153}
2154
2155fn finalize_seeded_canonical_bundle_via_rusqlite(
2156 canonical_db_path: &Path,
2157 bundle: &HistoricalDatabaseBundle,
2158 conversations_imported: usize,
2159 messages_imported: usize,
2160) -> Result<()> {
2161 let _fts_repair =
2162 ensure_fts_consistency_via_rusqlite(canonical_db_path).with_context(|| {
2163 format!(
2164 "repairing staged canonical FTS consistency before finalization: {}",
2165 canonical_db_path.display()
2166 )
2167 })?;
2168
2169 let path_str = canonical_db_path.to_string_lossy();
2170 let conn = FrankenConnection::open(path_str.as_ref()).with_context(|| {
2171 format!(
2172 "opening seeded canonical database for post-seed finalization: {}",
2173 canonical_db_path.display()
2174 )
2175 })?;
2176 conn.execute("PRAGMA busy_timeout = 30000;")
2177 .with_context(|| {
2178 format!(
2179 "configuring busy timeout for seeded canonical database {}",
2180 canonical_db_path.display()
2181 )
2182 })?;
2183 let schema_version = read_meta_schema_version(&conn)?;
2184
2185 if let Some(version) = schema_version
2186 && version < CURRENT_SCHEMA_VERSION
2187 && version != 13
2188 {
2189 anyhow::bail!(
2190 "seeded canonical bundle schema_version {version} is too old for baseline import and cannot be finalized automatically"
2191 );
2192 }
2193
2194 clear_seeded_runtime_meta(&conn)?;
2195
2196 conn.execute_compat(
2197 "INSERT OR REPLACE INTO meta(key, value) VALUES('schema_version', ?1)",
2198 fparams![CURRENT_SCHEMA_VERSION.to_string()],
2199 )?;
2200
2201 conn.execute_compat(
2202 "INSERT OR IGNORE INTO _schema_migrations(version, name) VALUES(?1, 'fts_contentless')",
2203 fparams![CURRENT_SCHEMA_VERSION],
2204 )?;
2205 record_historical_bundle_import(
2206 &conn,
2207 bundle,
2208 "baseline-bulk-sql-copy",
2209 conversations_imported,
2210 messages_imported,
2211 )?;
2212 Ok(())
2213}
2214
2215fn read_meta_schema_version(conn: &FrankenConnection) -> Result<Option<i64>> {
2216 let version: Option<String> = conn
2217 .query_row_map(
2218 "SELECT value FROM meta WHERE key = 'schema_version'",
2219 fparams![],
2220 |row| row.get_typed(0),
2221 )
2222 .optional()?;
2223 Ok(version.and_then(|raw| raw.parse::<i64>().ok()))
2224}
2225
2226#[cfg(test)]
2227fn franken_fts_schema_rows(conn: &FrankenConnection) -> Result<i64> {
2228 conn.query_row_map(
2229 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
2230 fparams![],
2231 |row| row.get_typed(0),
2232 )
2233 .context("counting sqlite_master rows for fts_messages via frankensqlite")
2234}
2235
2236#[cfg(test)]
2237fn franken_fts_limit_probe(conn: &FrankenConnection) -> bool {
2238 conn.query("SELECT rowid FROM fts_messages LIMIT 1").is_ok()
2239}
2240
2241#[cfg(test)]
2242#[allow(dead_code)]
2243pub(crate) fn probe_database_health_via_frankensqlite(
2244 db_path: &Path,
2245) -> Result<SqliteDatabaseHealthProbe> {
2246 let path_str = db_path.to_string_lossy();
2247 let conn = FrankenConnection::open(path_str.as_ref()).with_context(|| {
2248 format!(
2249 "opening frankensqlite db at {} for database health probe",
2250 db_path.display()
2251 )
2252 })?;
2253 conn.execute_batch("PRAGMA busy_timeout = 30000;")
2254 .with_context(|| {
2255 format!(
2256 "configuring busy timeout for database health probe at {}",
2257 db_path.display()
2258 )
2259 })?;
2260
2261 let schema_version = read_meta_schema_version(&conn)?;
2262 let quick_check_status: String = conn
2263 .query_row_map("PRAGMA quick_check(1)", fparams![], |row| row.get_typed(0))
2264 .with_context(|| format!("running PRAGMA quick_check(1) for {}", db_path.display()))?;
2265 let quick_check_ok = quick_check_status.trim().eq_ignore_ascii_case("ok");
2266 let fts_schema_rows = franken_fts_schema_rows(&conn)?;
2267 let fts_queryable = fts_schema_rows == 1 && franken_fts_limit_probe(&conn);
2268
2269 if !quick_check_ok {
2270 return Ok(SqliteDatabaseHealthProbe {
2271 schema_version,
2272 quick_check_ok,
2273 fts_schema_rows,
2274 fts_queryable,
2275 message_count: 0,
2276 max_message_id: 0,
2277 });
2278 }
2279
2280 let message_count: i64 = conn
2281 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
2282 row.get_typed(0)
2283 })
2284 .context("counting messages during frankensqlite database health probe")?;
2285 let max_message_id: i64 = conn
2286 .query_row_map(
2287 "SELECT COALESCE(MAX(id), 0) FROM messages",
2288 fparams![],
2289 |row| row.get_typed(0),
2290 )
2291 .context("reading max message id during frankensqlite database health probe")?;
2292
2293 Ok(SqliteDatabaseHealthProbe {
2294 schema_version,
2295 quick_check_ok,
2296 fts_schema_rows,
2297 fts_queryable,
2298 message_count,
2299 max_message_id,
2300 })
2301}
2302
2303struct StagedHistoricalSeed {
2304 tempdir: tempfile::TempDir,
2305 db_path: PathBuf,
2306}
2307
2308fn stage_historical_bundle_for_seed(
2309 canonical_db_path: &Path,
2310 bundle: &HistoricalDatabaseBundle,
2311) -> Result<StagedHistoricalSeed> {
2312 let canonical_parent = canonical_db_path.parent().unwrap_or_else(|| Path::new("."));
2313 fs::create_dir_all(canonical_parent).with_context(|| {
2314 format!(
2315 "creating canonical database directory before bulk historical seed import: {}",
2316 canonical_parent.display()
2317 )
2318 })?;
2319 let tempdir = tempfile::TempDir::new_in(canonical_parent)
2320 .context("creating temporary baseline seed directory")?;
2321 let staged_seed_db = tempdir.path().join("baseline-seed-output.db");
2322 copy_database_bundle(&bundle.root_path, &staged_seed_db)?;
2323
2324 Ok(StagedHistoricalSeed {
2325 tempdir,
2326 db_path: staged_seed_db,
2327 })
2328}
2329
2330fn promote_staged_historical_seed(
2331 canonical_db_path: &Path,
2332 staged_seed: &StagedHistoricalSeed,
2333) -> Result<()> {
2334 let canonical_backup = staged_seed
2335 .tempdir
2336 .path()
2337 .join("pre-seed-canonical-backup.db");
2338 let had_canonical = canonical_db_path.exists()
2339 || database_sidecar_path(canonical_db_path, "-wal").exists()
2340 || database_sidecar_path(canonical_db_path, "-shm").exists();
2341
2342 if had_canonical {
2343 move_database_bundle(canonical_db_path, &canonical_backup).with_context(|| {
2344 format!(
2345 "backing up canonical database before promoting staged historical seed import: {}",
2346 canonical_db_path.display()
2347 )
2348 })?;
2349 }
2350
2351 if let Err(err) =
2352 move_database_bundle(&staged_seed.db_path, canonical_db_path).with_context(|| {
2353 format!(
2354 "promoting staged historical seed database bundle {} into canonical path {}",
2355 staged_seed.db_path.display(),
2356 canonical_db_path.display()
2357 )
2358 })
2359 {
2360 if had_canonical {
2361 let _ = move_database_bundle(&canonical_backup, canonical_db_path);
2362 }
2363 return Err(err);
2364 }
2365
2366 Ok(())
2367}
2368
2369pub(crate) fn seed_canonical_from_best_historical_bundle(
2370 canonical_db_path: &Path,
2371) -> Result<Option<HistoricalSalvageOutcome>> {
2372 let ordered_bundles = discover_historical_database_bundles(canonical_db_path);
2373 let mut last_seed_error: Option<anyhow::Error> = None;
2374 for bundle in ordered_bundles
2375 .into_iter()
2376 .filter(|bundle| bundle.supports_direct_readonly)
2377 {
2378 if let Some(version) = bundle.probe.schema_version
2379 && version < 13
2380 {
2381 let err = anyhow!(
2382 "historical bundle {} schema_version {version} is too old for baseline import",
2383 bundle.root_path.display()
2384 );
2385 tracing::warn!(
2386 path = %bundle.root_path.display(),
2387 schema_version = version,
2388 "historical bundle is too old for baseline seed import"
2389 );
2390 last_seed_error = Some(err);
2391 continue;
2392 }
2393
2394 let source = open_historical_bundle_for_salvage(&bundle).with_context(|| {
2395 format!(
2396 "opening historical seed bundle {} for baseline import",
2397 bundle.root_path.display()
2398 )
2399 })?;
2400 let (conversations_imported, messages_imported) = historical_bundle_counts(&source.conn)?;
2401
2402 let staged_seed = match stage_historical_bundle_for_seed(canonical_db_path, &bundle) {
2403 Ok(staged_seed) => staged_seed,
2404 Err(err) => {
2405 tracing::warn!(
2406 path = %bundle.root_path.display(),
2407 error = %err,
2408 "bulk baseline seed staging from historical bundle failed; trying next candidate"
2409 );
2410 last_seed_error = Some(err);
2411 continue;
2412 }
2413 };
2414
2415 if let Err(err) = finalize_seeded_canonical_bundle_via_rusqlite(
2416 &staged_seed.db_path,
2417 &bundle,
2418 conversations_imported,
2419 messages_imported,
2420 ) {
2421 tracing::warn!(
2422 path = %bundle.root_path.display(),
2423 error = %err,
2424 "finalizing staged historical seed import failed; trying next candidate"
2425 );
2426 last_seed_error = Some(err);
2427 continue;
2428 }
2429
2430 if let Err(err) = promote_staged_historical_seed(canonical_db_path, &staged_seed) {
2431 tracing::warn!(
2432 path = %bundle.root_path.display(),
2433 error = %err,
2434 "promoting staged historical seed import failed; trying next candidate"
2435 );
2436 last_seed_error = Some(err);
2437 continue;
2438 }
2439
2440 tracing::info!(
2441 path = %bundle.root_path.display(),
2442 conversations_imported,
2443 messages_imported,
2444 "seeded empty canonical database from largest healthy historical bundle"
2445 );
2446
2447 return Ok(Some(HistoricalSalvageOutcome {
2448 bundles_considered: 0,
2449 bundles_imported: 1,
2450 conversations_imported,
2451 messages_imported,
2452 }));
2453 }
2454 if let Some(err) = last_seed_error {
2455 return Err(err);
2456 }
2457 Ok(None)
2458}
2459
2460fn parse_json_column(value: Option<String>) -> serde_json::Value {
2461 value
2462 .and_then(|raw| serde_json::from_str(&raw).ok())
2463 .unwrap_or(serde_json::Value::Null)
2464}
2465
2466const HISTORICAL_RAW_JSON_SENTINEL_KEY: &str = "__cass_historical_raw_json__";
2467
2468fn wrap_historical_raw_json(raw: String) -> serde_json::Value {
2469 serde_json::json!({ HISTORICAL_RAW_JSON_SENTINEL_KEY: raw })
2470}
2471
2472fn historical_raw_json(value: &serde_json::Value) -> Option<&str> {
2473 match value {
2474 serde_json::Value::Object(map) if map.len() == 1 => map
2475 .get(HISTORICAL_RAW_JSON_SENTINEL_KEY)
2476 .and_then(serde_json::Value::as_str),
2477 _ => None,
2478 }
2479}
2480
2481fn parse_historical_json_column(value: Option<String>) -> serde_json::Value {
2482 match value {
2483 Some(raw) if raw.trim().is_empty() => serde_json::Value::Null,
2484 Some(raw) => wrap_historical_raw_json(raw),
2485 None => serde_json::Value::Null,
2486 }
2487}
2488
2489fn historical_salvage_debug_enabled() -> bool {
2490 std::env::var_os("CASS_DEBUG_HISTORICAL_SALVAGE").is_some()
2491}
2492
2493#[derive(Debug, Clone, Copy)]
2494struct HistoricalImportBatchLimits {
2495 conversations: usize,
2496 messages: usize,
2497 payload_chars: usize,
2498}
2499
2500fn env_positive_usize(key: &str) -> Option<usize> {
2501 dotenvy::var(key)
2502 .ok()
2503 .and_then(|value| value.parse::<usize>().ok())
2504 .filter(|value| *value > 0)
2505}
2506
2507fn historical_import_batch_limits() -> HistoricalImportBatchLimits {
2508 let cpu_count = std::thread::available_parallelism()
2509 .map(std::num::NonZeroUsize::get)
2510 .unwrap_or(1);
2511
2512 let default_limits = if cpu_count >= 32 {
2513 HistoricalImportBatchLimits {
2514 conversations: 128,
2515 messages: 16_384,
2516 payload_chars: 12_000_000,
2517 }
2518 } else {
2519 HistoricalImportBatchLimits {
2520 conversations: 32,
2521 messages: 4_096,
2522 payload_chars: 3_000_000,
2523 }
2524 };
2525
2526 HistoricalImportBatchLimits {
2527 conversations: env_positive_usize("CASS_HISTORICAL_IMPORT_BATCH_CONVERSATIONS")
2528 .unwrap_or(default_limits.conversations),
2529 messages: env_positive_usize("CASS_HISTORICAL_IMPORT_BATCH_MESSAGES")
2530 .unwrap_or(default_limits.messages),
2531 payload_chars: env_positive_usize("CASS_HISTORICAL_IMPORT_BATCH_CHARS")
2532 .unwrap_or(default_limits.payload_chars),
2533 }
2534}
2535
2536fn json_value_size_hint(value: &serde_json::Value) -> usize {
2537 if let Some(raw) = historical_raw_json(value) {
2538 return raw.len();
2539 }
2540 match value {
2541 serde_json::Value::Null => 0,
2542 other => serde_json::to_string(other)
2543 .map(|raw| raw.len())
2544 .unwrap_or(0),
2545 }
2546}
2547
2548fn message_payload_size_hint(message: &Message) -> usize {
2549 message
2550 .content
2551 .len()
2552 .saturating_add(json_value_size_hint(&message.extra_json))
2553}
2554
2555fn is_backup_root_name(name: &str, prefix: &str) -> bool {
2556 name.starts_with(prefix) && !name.ends_with("-wal") && !name.ends_with("-shm")
2557}
2558
2559fn has_db_sidecar_suffix(name: &str) -> bool {
2566 const SIDECAR_SUFFIXES: &[&str] = &[
2567 "-wal",
2568 "-shm",
2569 "-lock-shared",
2570 "-lock-reserved",
2571 "-lock-pending",
2572 ];
2573 SIDECAR_SUFFIXES.iter().any(|suffix| name.ends_with(suffix))
2574}
2575
2576pub const CURRENT_SCHEMA_VERSION: i64 = 20;
2578const MIN_IN_PLACE_MIGRATION_SCHEMA_VERSION: i64 = 13;
2579
2580#[derive(Debug, Clone)]
2582pub enum SchemaCheck {
2583 Compatible,
2585 NeedsMigration,
2587 NeedsRebuild(String),
2589}
2590
2591fn schema_check_error_requires_rebuild(err: &frankensqlite::FrankenError) -> bool {
2592 matches!(
2596 err,
2597 frankensqlite::FrankenError::DatabaseCorrupt { .. }
2598 | frankensqlite::FrankenError::WalCorrupt { .. }
2599 | frankensqlite::FrankenError::NotADatabase { .. }
2600 | frankensqlite::FrankenError::ShortRead { .. }
2601 )
2602}
2603
2604fn unique_backup_path(path: &Path) -> PathBuf {
2605 static NEXT_NONCE: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0);
2606
2607 let timestamp = SystemTime::now()
2608 .duration_since(UNIX_EPOCH)
2609 .map(|d| d.as_nanos())
2610 .unwrap_or(0);
2611 let nonce = NEXT_NONCE.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
2612 let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("db");
2613
2614 path.with_file_name(format!(
2615 "{file_name}.backup.{}.{}.{}",
2616 std::process::id(),
2617 timestamp,
2618 nonce
2619 ))
2620}
2621
2622fn vacuum_stage_backup_path(backup_path: &Path) -> PathBuf {
2623 let file_name = backup_path
2624 .file_name()
2625 .and_then(|name| name.to_str())
2626 .unwrap_or("db.backup");
2627 backup_path.with_file_name(format!(".{file_name}.vacuum-in-progress"))
2628}
2629
2630fn check_schema_compatibility(
2634 path: &Path,
2635) -> std::result::Result<SchemaCheck, frankensqlite::FrankenError> {
2636 let mut conn = open_franken_with_flags(
2637 &path.to_string_lossy(),
2638 FrankenOpenFlags::SQLITE_OPEN_READ_ONLY,
2639 )?;
2640
2641 let result = (|| {
2642 let meta_exists: i32 = conn.query_row_map(
2644 "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='meta'",
2645 fparams![],
2646 |row| row.get_typed(0),
2647 )?;
2648
2649 if meta_exists == 0 {
2650 let table_count: i32 = conn.query_row_map(
2653 "SELECT COUNT(*) FROM sqlite_master WHERE type='table'",
2654 fparams![],
2655 |row| row.get_typed(0),
2656 )?;
2657
2658 if table_count == 0 {
2659 return Ok(SchemaCheck::NeedsMigration);
2661 }
2662
2663 return Ok(SchemaCheck::NeedsRebuild(
2665 "Database missing schema version metadata".to_string(),
2666 ));
2667 }
2668
2669 let version: Option<i64> = conn
2671 .query_row_map(
2672 "SELECT value FROM meta WHERE key = 'schema_version'",
2673 fparams![],
2674 |row| Ok(row.get_typed::<String>(0)?.parse().ok()),
2675 )
2676 .ok()
2677 .flatten();
2678
2679 match version {
2680 Some(v) if v == SCHEMA_VERSION => Ok(SchemaCheck::Compatible),
2681 Some(v) if (MIN_IN_PLACE_MIGRATION_SCHEMA_VERSION..SCHEMA_VERSION).contains(&v) => {
2682 Ok(SchemaCheck::NeedsMigration)
2683 }
2684 Some(v) if v > 0 && v < MIN_IN_PLACE_MIGRATION_SCHEMA_VERSION => {
2685 Ok(SchemaCheck::NeedsRebuild(format!(
2686 "Schema version {} is too old for in-place migration; supported upgrade path starts at version {}",
2687 v, MIN_IN_PLACE_MIGRATION_SCHEMA_VERSION
2688 )))
2689 }
2690 Some(v) => {
2691 Ok(SchemaCheck::NeedsRebuild(format!(
2693 "Schema version {} is newer than supported version {}",
2694 v, SCHEMA_VERSION
2695 )))
2696 }
2697 None => Ok(SchemaCheck::NeedsRebuild(
2698 "Schema version not found or invalid".to_string(),
2699 )),
2700 }
2701 })();
2702
2703 if let Err(close_err) = conn.close_in_place() {
2704 tracing::warn!(
2705 error = %close_err,
2706 db_path = %path.display(),
2707 "check_schema_compatibility: close_in_place failed; falling back to best-effort close"
2708 );
2709 conn.close_best_effort_in_place();
2710 }
2711
2712 result
2713}
2714
2715const SCHEMA_VERSION: i64 = CURRENT_SCHEMA_VERSION;
2716
2717#[cfg(test)]
2718const MIGRATION_V1: &str = r"
2719PRAGMA foreign_keys = ON;
2720
2721CREATE TABLE IF NOT EXISTS meta (
2722 key TEXT PRIMARY KEY,
2723 value TEXT NOT NULL
2724);
2725
2726CREATE TABLE IF NOT EXISTS agents (
2727 id INTEGER PRIMARY KEY,
2728 slug TEXT NOT NULL UNIQUE,
2729 name TEXT NOT NULL,
2730 version TEXT,
2731 kind TEXT NOT NULL,
2732 created_at INTEGER NOT NULL,
2733 updated_at INTEGER NOT NULL
2734);
2735
2736CREATE TABLE IF NOT EXISTS workspaces (
2737 id INTEGER PRIMARY KEY,
2738 path TEXT NOT NULL UNIQUE,
2739 display_name TEXT
2740);
2741
2742CREATE TABLE IF NOT EXISTS conversations (
2743 id INTEGER PRIMARY KEY,
2744 agent_id INTEGER NOT NULL REFERENCES agents(id),
2745 workspace_id INTEGER REFERENCES workspaces(id),
2746 external_id TEXT,
2747 title TEXT,
2748 source_path TEXT NOT NULL,
2749 started_at INTEGER,
2750 ended_at INTEGER,
2751 approx_tokens INTEGER,
2752 metadata_json TEXT,
2753 UNIQUE(agent_id, external_id)
2754);
2755
2756CREATE TABLE IF NOT EXISTS messages (
2757 id INTEGER PRIMARY KEY,
2758 conversation_id INTEGER NOT NULL REFERENCES conversations(id) ON DELETE CASCADE,
2759 idx INTEGER NOT NULL,
2760 role TEXT NOT NULL,
2761 author TEXT,
2762 created_at INTEGER,
2763 content TEXT NOT NULL,
2764 extra_json TEXT,
2765 UNIQUE(conversation_id, idx)
2766);
2767
2768CREATE TABLE IF NOT EXISTS snippets (
2769 id INTEGER PRIMARY KEY,
2770 message_id INTEGER NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
2771 file_path TEXT,
2772 start_line INTEGER,
2773 end_line INTEGER,
2774 language TEXT,
2775 snippet_text TEXT
2776);
2777
2778CREATE TABLE IF NOT EXISTS tags (
2779 id INTEGER PRIMARY KEY,
2780 name TEXT NOT NULL UNIQUE
2781);
2782
2783CREATE TABLE IF NOT EXISTS conversation_tags (
2784 conversation_id INTEGER NOT NULL REFERENCES conversations(id) ON DELETE CASCADE,
2785 tag_id INTEGER NOT NULL REFERENCES tags(id) ON DELETE CASCADE,
2786 PRIMARY KEY (conversation_id, tag_id)
2787);
2788
2789CREATE INDEX IF NOT EXISTS idx_conversations_agent_started
2790 ON conversations(agent_id, started_at DESC);
2791
2792CREATE INDEX IF NOT EXISTS idx_messages_conv_idx
2793 ON messages(conversation_id, idx);
2794
2795";
2796
2797#[cfg(test)]
2798const MIGRATION_V2: &str = r"
2799CREATE VIRTUAL TABLE IF NOT EXISTS fts_messages USING fts5(
2800 content,
2801 title,
2802 agent,
2803 workspace,
2804 source_path,
2805 created_at UNINDEXED,
2806 message_id UNINDEXED,
2807 tokenize='porter'
2808);
2809INSERT INTO fts_messages(content, title, agent, workspace, source_path, created_at, message_id)
2810SELECT
2811 m.content,
2812 c.title,
2813 a.slug,
2814 w.path,
2815 c.source_path,
2816 m.created_at,
2817 m.id
2818FROM messages m
2819JOIN conversations c ON m.conversation_id = c.id
2820JOIN agents a ON c.agent_id = a.id
2821LEFT JOIN workspaces w ON c.workspace_id = w.id;
2822";
2823
2824#[cfg(test)]
2825#[allow(dead_code)]
2826const MIGRATION_V3: &str = r"
2827DROP TABLE IF EXISTS fts_messages;
2828CREATE VIRTUAL TABLE fts_messages USING fts5(
2829 content,
2830 title,
2831 agent,
2832 workspace,
2833 source_path,
2834 created_at UNINDEXED,
2835 message_id UNINDEXED,
2836 tokenize='porter'
2837);
2838INSERT INTO fts_messages(content, title, agent, workspace, source_path, created_at, message_id)
2839SELECT
2840 m.content,
2841 c.title,
2842 a.slug,
2843 w.path,
2844 c.source_path,
2845 m.created_at,
2846 m.id
2847FROM messages m
2848JOIN conversations c ON m.conversation_id = c.id
2849JOIN agents a ON c.agent_id = a.id
2850LEFT JOIN workspaces w ON c.workspace_id = w.id;
2851";
2852
2853#[cfg(test)]
2854const MIGRATION_V4: &str = r"
2855-- Sources table for tracking where conversations come from
2856CREATE TABLE IF NOT EXISTS sources (
2857 id TEXT PRIMARY KEY, -- source_id (e.g., 'local', 'work-laptop')
2858 kind TEXT NOT NULL, -- 'local', 'ssh', etc.
2859 host_label TEXT, -- display label
2860 machine_id TEXT, -- optional stable machine id
2861 platform TEXT, -- 'macos', 'linux', 'windows'
2862 config_json TEXT, -- JSON blob for extra config (SSH params, path rewrites)
2863 created_at INTEGER NOT NULL,
2864 updated_at INTEGER NOT NULL
2865);
2866
2867-- Bootstrap: Insert the default 'local' source
2868INSERT OR IGNORE INTO sources (id, kind, host_label, created_at, updated_at)
2869VALUES ('local', 'local', NULL, strftime('%s','now')*1000, strftime('%s','now')*1000);
2870";
2871
2872#[cfg(test)]
2873const MIGRATION_V5: &str = r"
2874-- Add provenance columns to conversations table
2875-- SQLite cannot alter unique constraints, so we need to recreate the table
2876
2877-- Create new table with provenance columns and updated unique constraint
2878CREATE TABLE conversations_new (
2879 id INTEGER PRIMARY KEY,
2880 agent_id INTEGER NOT NULL REFERENCES agents(id),
2881 workspace_id INTEGER REFERENCES workspaces(id),
2882 source_id TEXT NOT NULL DEFAULT 'local' REFERENCES sources(id),
2883 external_id TEXT,
2884 title TEXT,
2885 source_path TEXT NOT NULL,
2886 started_at INTEGER,
2887 ended_at INTEGER,
2888 approx_tokens INTEGER,
2889 metadata_json TEXT,
2890 origin_host TEXT,
2891 UNIQUE(source_id, agent_id, external_id)
2892);
2893
2894-- Copy data from old table (all existing conversations get source_id='local')
2895INSERT INTO conversations_new (id, agent_id, workspace_id, source_id, external_id, title,
2896 source_path, started_at, ended_at, approx_tokens, metadata_json, origin_host)
2897SELECT id, agent_id, workspace_id, 'local', external_id, title,
2898 source_path, started_at, ended_at, approx_tokens, metadata_json, NULL
2899FROM conversations;
2900
2901-- Drop old table and rename new
2902DROP TABLE conversations;
2903ALTER TABLE conversations_new RENAME TO conversations;
2904
2905-- Recreate indexes
2906CREATE INDEX IF NOT EXISTS idx_conversations_agent_started ON conversations(agent_id, started_at DESC);
2907CREATE INDEX IF NOT EXISTS idx_conversations_source_id ON conversations(source_id);
2908";
2909
2910#[cfg(test)]
2911const MIGRATION_V6: &str = r"
2912-- Optimize lookup by source_path (used by TUI detail view)
2913CREATE INDEX IF NOT EXISTS idx_conversations_source_path ON conversations(source_path);
2914";
2915
2916#[cfg(test)]
2917const MIGRATION_V7: &str = r"
2918-- Add binary columns for MessagePack serialization (Opt 3.1)
2919-- Binary format is 50-70% smaller than JSON and faster to parse
2920ALTER TABLE conversations ADD COLUMN metadata_bin BLOB;
2921ALTER TABLE messages ADD COLUMN extra_bin BLOB;
2922";
2923
2924#[cfg(test)]
2925const MIGRATION_V8: &str = r"
2926-- Opt 3.2: Daily stats materialized table for O(1) time-range histograms
2927-- Provides fast aggregated queries for stats/dashboard without full table scans
2928
2929CREATE TABLE IF NOT EXISTS daily_stats (
2930 day_id INTEGER NOT NULL, -- Days since 2020-01-01 (Unix epoch + offset)
2931 agent_slug TEXT NOT NULL, -- 'all' for totals, or specific agent slug
2932 source_id TEXT NOT NULL DEFAULT 'all', -- 'all' for totals, or specific source
2933 session_count INTEGER NOT NULL DEFAULT 0,
2934 message_count INTEGER NOT NULL DEFAULT 0,
2935 total_chars INTEGER NOT NULL DEFAULT 0,
2936 last_updated INTEGER NOT NULL,
2937 PRIMARY KEY (day_id, agent_slug, source_id)
2938);
2939
2940CREATE INDEX IF NOT EXISTS idx_daily_stats_agent ON daily_stats(agent_slug, day_id);
2941CREATE INDEX IF NOT EXISTS idx_daily_stats_source ON daily_stats(source_id, day_id);
2942";
2943
2944#[cfg(test)]
2945const MIGRATION_V9: &str = r"
2946-- Background embedding jobs tracking table
2947CREATE TABLE IF NOT EXISTS embedding_jobs (
2948 id INTEGER PRIMARY KEY AUTOINCREMENT,
2949 db_path TEXT NOT NULL,
2950 model_id TEXT NOT NULL,
2951 status TEXT NOT NULL DEFAULT 'pending',
2952 total_docs INTEGER NOT NULL DEFAULT 0,
2953 completed_docs INTEGER NOT NULL DEFAULT 0,
2954 error_message TEXT,
2955 created_at TEXT NOT NULL DEFAULT (datetime('now')),
2956 started_at TEXT,
2957 completed_at TEXT
2958);
2959
2960-- Only one pending or running job per (db_path, model_id) at a time.
2961-- Multiple completed/failed/cancelled jobs are allowed for history.
2962CREATE UNIQUE INDEX IF NOT EXISTS idx_embedding_jobs_active
2963ON embedding_jobs(db_path, model_id)
2964WHERE status IN ('pending', 'running');
2965";
2966
2967#[cfg(test)]
2968const MIGRATION_V10: &str = r"
2969-- Token analytics: per-message token usage ledger
2970CREATE TABLE IF NOT EXISTS token_usage (
2971 id INTEGER PRIMARY KEY AUTOINCREMENT,
2972 message_id INTEGER NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
2973 conversation_id INTEGER NOT NULL,
2974 agent_id INTEGER NOT NULL,
2975 workspace_id INTEGER,
2976 source_id TEXT NOT NULL DEFAULT 'local',
2977
2978 -- Timing
2979 timestamp_ms INTEGER NOT NULL,
2980 day_id INTEGER NOT NULL,
2981
2982 -- Model identification
2983 model_name TEXT,
2984 model_family TEXT,
2985 model_tier TEXT,
2986 service_tier TEXT,
2987 provider TEXT,
2988
2989 -- Token counts (nullable — not all agents provide all fields)
2990 input_tokens INTEGER,
2991 output_tokens INTEGER,
2992 cache_read_tokens INTEGER,
2993 cache_creation_tokens INTEGER,
2994 thinking_tokens INTEGER,
2995 total_tokens INTEGER,
2996
2997 -- Cost estimation
2998 estimated_cost_usd REAL,
2999
3000 -- Message context
3001 role TEXT NOT NULL,
3002 content_chars INTEGER NOT NULL,
3003 has_tool_calls INTEGER NOT NULL DEFAULT 0,
3004 tool_call_count INTEGER NOT NULL DEFAULT 0,
3005
3006 -- Data quality
3007 data_source TEXT NOT NULL DEFAULT 'api',
3008
3009 UNIQUE(message_id)
3010);
3011
3012CREATE INDEX IF NOT EXISTS idx_token_usage_day ON token_usage(day_id, agent_id);
3013CREATE INDEX IF NOT EXISTS idx_token_usage_conv ON token_usage(conversation_id);
3014CREATE INDEX IF NOT EXISTS idx_token_usage_model ON token_usage(model_family, day_id);
3015CREATE INDEX IF NOT EXISTS idx_token_usage_workspace ON token_usage(workspace_id, day_id);
3016CREATE INDEX IF NOT EXISTS idx_token_usage_timestamp ON token_usage(timestamp_ms);
3017
3018-- Token analytics: pre-aggregated daily rollups
3019CREATE TABLE IF NOT EXISTS token_daily_stats (
3020 day_id INTEGER NOT NULL,
3021 agent_slug TEXT NOT NULL,
3022 source_id TEXT NOT NULL DEFAULT 'all',
3023 model_family TEXT NOT NULL DEFAULT 'all',
3024
3025 api_call_count INTEGER NOT NULL DEFAULT 0,
3026 user_message_count INTEGER NOT NULL DEFAULT 0,
3027 assistant_message_count INTEGER NOT NULL DEFAULT 0,
3028 tool_message_count INTEGER NOT NULL DEFAULT 0,
3029
3030 total_input_tokens INTEGER NOT NULL DEFAULT 0,
3031 total_output_tokens INTEGER NOT NULL DEFAULT 0,
3032 total_cache_read_tokens INTEGER NOT NULL DEFAULT 0,
3033 total_cache_creation_tokens INTEGER NOT NULL DEFAULT 0,
3034 total_thinking_tokens INTEGER NOT NULL DEFAULT 0,
3035 grand_total_tokens INTEGER NOT NULL DEFAULT 0,
3036
3037 total_content_chars INTEGER NOT NULL DEFAULT 0,
3038 total_tool_calls INTEGER NOT NULL DEFAULT 0,
3039
3040 estimated_cost_usd REAL NOT NULL DEFAULT 0.0,
3041
3042 session_count INTEGER NOT NULL DEFAULT 0,
3043
3044 last_updated INTEGER NOT NULL,
3045
3046 PRIMARY KEY (day_id, agent_slug, source_id, model_family)
3047);
3048
3049CREATE INDEX IF NOT EXISTS idx_token_daily_stats_agent ON token_daily_stats(agent_slug, day_id);
3050CREATE INDEX IF NOT EXISTS idx_token_daily_stats_model ON token_daily_stats(model_family, day_id);
3051
3052-- Model pricing lookup table
3053CREATE TABLE IF NOT EXISTS model_pricing (
3054 model_pattern TEXT NOT NULL,
3055 provider TEXT NOT NULL,
3056 input_cost_per_mtok REAL NOT NULL,
3057 output_cost_per_mtok REAL NOT NULL,
3058 cache_read_cost_per_mtok REAL,
3059 cache_creation_cost_per_mtok REAL,
3060 effective_date TEXT NOT NULL,
3061 PRIMARY KEY (model_pattern, effective_date)
3062);
3063
3064-- Seed with current pricing (as of 2026-02)
3065INSERT OR IGNORE INTO model_pricing VALUES
3066 ('claude-opus-4%', 'anthropic', 15.0, 75.0, 1.5, 18.75, '2025-10-01'),
3067 ('claude-sonnet-4%', 'anthropic', 3.0, 15.0, 0.3, 3.75, '2025-10-01'),
3068 ('claude-haiku-4%', 'anthropic', 0.80, 4.0, 0.08, 1.0, '2025-10-01'),
3069 ('gpt-4o%', 'openai', 2.50, 10.0, NULL, NULL, '2025-01-01'),
3070 ('gpt-4-turbo%', 'openai', 10.0, 30.0, NULL, NULL, '2024-04-01'),
3071 ('gpt-4.1%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
3072 ('o3%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
3073 ('o4-mini%', 'openai', 1.10, 4.40, NULL, NULL, '2025-04-01'),
3074 ('gemini-2%flash%', 'google', 0.075, 0.30, NULL, NULL, '2025-01-01'),
3075 ('gemini-2%pro%', 'google', 1.25, 10.0, NULL, NULL, '2025-01-01');
3076
3077-- Extend conversations table with token summary columns
3078ALTER TABLE conversations ADD COLUMN total_input_tokens INTEGER;
3079ALTER TABLE conversations ADD COLUMN total_output_tokens INTEGER;
3080ALTER TABLE conversations ADD COLUMN total_cache_read_tokens INTEGER;
3081ALTER TABLE conversations ADD COLUMN total_cache_creation_tokens INTEGER;
3082ALTER TABLE conversations ADD COLUMN grand_total_tokens INTEGER;
3083ALTER TABLE conversations ADD COLUMN estimated_cost_usd REAL;
3084ALTER TABLE conversations ADD COLUMN primary_model TEXT;
3085ALTER TABLE conversations ADD COLUMN api_call_count INTEGER;
3086ALTER TABLE conversations ADD COLUMN tool_call_count INTEGER;
3087ALTER TABLE conversations ADD COLUMN user_message_count INTEGER;
3088ALTER TABLE conversations ADD COLUMN assistant_message_count INTEGER;
3089";
3090
3091const MIGRATION_V14: &str = r"
3092-- Switch FTS5 from internal-content to contentless mode (CASS #163).
3093-- Drop the old V13 internal-content fts_messages first so that
3094-- sqlite_schema does not contain two conflicting CREATE VIRTUAL TABLE
3095-- entries, which makes the database completely unreadable.
3096-- The current contentless table is recreated lazily after open() only when the
3097-- frankensqlite FTS consistency check finds it missing or malformed.
3098DROP TABLE IF EXISTS fts_messages;
3099";
3100
3101const MIGRATION_V15_TAIL_STATE_TABLE: &str = r"
3102CREATE TABLE IF NOT EXISTS conversation_tail_state (
3103 -- Deliberately no FOREIGN KEY: this hot row is maintained by insert/append
3104 -- paths, and FK metadata keeps frankensqlite off the direct rowid update path.
3105 conversation_id INTEGER PRIMARY KEY,
3106 ended_at INTEGER,
3107 last_message_idx INTEGER,
3108 last_message_created_at INTEGER
3109);
3110";
3111
3112const MIGRATION_V16: &str = r"
3113-- UNIQUE(conversation_id, idx) already creates sqlite_autoindex_messages_1,
3114-- which covers the same lookup/order key as idx_messages_conv_idx. Keeping both
3115-- doubles message insert index maintenance on the hot indexing path.
3116DROP INDEX IF EXISTS idx_messages_conv_idx;
3117";
3118
3119const MIGRATION_V17: &str = r"
3120-- Drop the global messages(created_at) secondary index from the ingest hot
3121-- path. Search/time filters are served by the derived search layer and
3122-- conversation/analytics indexes, while this index is maintained on every
3123-- message insert.
3124DROP INDEX IF EXISTS idx_messages_created;
3125";
3126
3127const MIGRATION_V18: &str = r"
3128-- Move append-tail state out of the wide, indexed conversations row. The hot
3129-- append path updates this cache for every appended conversation; keeping it in
3130-- a tiny rowid table avoids rewriting the large conversation record.
3131CREATE TABLE IF NOT EXISTS conversation_tail_state (
3132 -- Deliberately no FOREIGN KEY: this hot row is maintained by insert/append
3133 -- paths, and FK metadata keeps frankensqlite off the direct rowid update path.
3134 conversation_id INTEGER PRIMARY KEY,
3135 ended_at INTEGER,
3136 last_message_idx INTEGER,
3137 last_message_created_at INTEGER
3138);
3139
3140INSERT OR REPLACE INTO conversation_tail_state (
3141 conversation_id, ended_at, last_message_idx, last_message_created_at
3142)
3143SELECT id, ended_at, last_message_idx, last_message_created_at
3144FROM conversations
3145WHERE ended_at IS NOT NULL
3146 OR last_message_idx IS NOT NULL
3147 OR last_message_created_at IS NOT NULL;
3148";
3149
3150const MIGRATION_V19: &str = r"
3151-- Materialize external conversation provenance into one compact lookup key.
3152-- This keeps the hot append/new-conversation probe on a single primary-key
3153-- lookup instead of a composite conversations-table predicate.
3154CREATE TABLE IF NOT EXISTS conversation_external_lookup (
3155 lookup_key TEXT PRIMARY KEY,
3156 conversation_id INTEGER NOT NULL
3157);
3158
3159INSERT OR REPLACE INTO conversation_external_lookup (lookup_key, conversation_id)
3160SELECT
3161 CAST(length(source_id) AS TEXT) || ':' || source_id || ':' ||
3162 CAST(agent_id AS TEXT) || ':' ||
3163 CAST(length(external_id) AS TEXT) || ':' || external_id,
3164 id
3165FROM conversations
3166WHERE external_id IS NOT NULL;
3167";
3168
3169const MIGRATION_V20: &str = r"
3170-- Fuse external conversation lookup with append-tail state. Append-heavy
3171-- workloads can resolve both the conversation id and tail plan from one
3172-- primary-key probe.
3173CREATE TABLE IF NOT EXISTS conversation_external_tail_lookup (
3174 lookup_key TEXT PRIMARY KEY,
3175 conversation_id INTEGER NOT NULL,
3176 ended_at INTEGER,
3177 last_message_idx INTEGER,
3178 last_message_created_at INTEGER
3179);
3180
3181INSERT OR REPLACE INTO conversation_external_tail_lookup (
3182 lookup_key,
3183 conversation_id,
3184 ended_at,
3185 last_message_idx,
3186 last_message_created_at
3187)
3188SELECT
3189 CAST(length(c.source_id) AS TEXT) || ':' || c.source_id || ':' ||
3190 CAST(c.agent_id AS TEXT) || ':' ||
3191 CAST(length(c.external_id) AS TEXT) || ':' || c.external_id,
3192 c.id,
3193 ts.ended_at,
3194 ts.last_message_idx,
3195 ts.last_message_created_at
3196FROM conversations c
3197LEFT JOIN conversation_tail_state ts ON ts.conversation_id = c.id
3198WHERE c.external_id IS NOT NULL;
3199";
3200
3201#[derive(Debug, Clone)]
3203pub struct EmbeddingJobRow {
3204 pub id: i64,
3205 pub db_path: String,
3206 pub model_id: String,
3207 pub status: String,
3208 pub total_docs: i64,
3209 pub completed_docs: i64,
3210 pub error_message: Option<String>,
3211 pub created_at: String,
3212 pub started_at: Option<String>,
3213 pub completed_at: Option<String>,
3214}
3215
3216#[derive(Debug, Clone)]
3223pub struct LexicalRebuildConversationRow {
3224 pub id: Option<i64>,
3225 pub agent_slug: String,
3226 pub workspace: Option<PathBuf>,
3227 pub external_id: Option<String>,
3228 pub title: Option<String>,
3229 pub source_path: PathBuf,
3230 pub started_at: Option<i64>,
3231 pub ended_at: Option<i64>,
3232 pub source_id: String,
3233 pub origin_host: Option<String>,
3234}
3235
3236#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3239pub struct LexicalRebuildConversationFootprintRow {
3240 pub conversation_id: i64,
3241 pub message_count: usize,
3242 pub message_bytes: usize,
3243}
3244
3245pub(crate) const LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE: usize = 4 * 1024;
3246const LEXICAL_REBUILD_FOOTPRINT_POINT_TAIL_FALLBACK_LIMIT: usize = 64;
3247
3248fn lexical_rebuild_tail_metadata_coverage_is_sufficient(
3249 total_conversations: usize,
3250 covered_conversations: usize,
3251) -> bool {
3252 total_conversations == 0
3253 || total_conversations.saturating_sub(covered_conversations.min(total_conversations))
3254 <= LEXICAL_REBUILD_FOOTPRINT_POINT_TAIL_FALLBACK_LIMIT
3255}
3256
3257fn lexical_rebuild_message_count_from_tail_idx(last_message_idx: Option<i64>) -> Option<usize> {
3258 let last_message_idx = u64::try_from(last_message_idx?).ok()?;
3259 let high_water = last_message_idx.checked_add(1)?;
3260 usize::try_from(high_water).ok()
3261}
3262
3263fn lexical_rebuild_conversation_footprint_from_count(
3264 conversation_id: i64,
3265 message_count: usize,
3266) -> LexicalRebuildConversationFootprintRow {
3267 LexicalRebuildConversationFootprintRow {
3268 conversation_id,
3269 message_count,
3270 message_bytes: message_count
3271 .saturating_mul(LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE),
3272 }
3273}
3274
3275#[derive(Debug, Clone)]
3277pub struct LexicalRebuildMessageRow {
3278 pub conversation_id: i64,
3279 pub id: i64,
3280 pub idx: i64,
3281 pub role: String,
3282 pub author: Option<String>,
3283 pub created_at: Option<i64>,
3284 pub content: String,
3285}
3286
3287#[derive(Debug, Clone, PartialEq, Eq)]
3291pub struct LexicalRebuildGroupedMessageRow {
3292 pub idx: i64,
3293 pub is_tool_role: bool,
3294 pub created_at: Option<i64>,
3295 pub content: String,
3296}
3297
3298pub type LexicalRebuildGroupedMessageRows = SmallVec<[LexicalRebuildGroupedMessageRow; 32]>;
3299
3300pub type SqliteStorage = FrankenStorage;
3302
3303pub struct FrankenStorage {
3305 conn: FrankenConnection,
3306 db_path: PathBuf,
3307 ephemeral_writer_preflight_verified: AtomicBool,
3308 index_writer_checkpoint_pages: AtomicI64,
3309 index_writer_busy_timeout_ms: AtomicU64,
3310 cached_ephemeral_writer: parking_lot::Mutex<CachedEphemeralWriter>,
3311 ensured_agents: Arc<parking_lot::Mutex<HashMap<EnsuredAgentKey, i64>>>,
3312 ensured_workspaces: Arc<parking_lot::Mutex<HashMap<EnsuredWorkspaceKey, i64>>>,
3313 ensured_conversation_sources: Arc<parking_lot::Mutex<HashSet<EnsuredConversationSourceKey>>>,
3314 ensured_daily_stats_keys: Arc<parking_lot::Mutex<HashSet<EnsuredDailyStatsKey>>>,
3315 fts_messages_present_cache: AtomicI8,
3316}
3317
3318const DEFAULT_WAL_AUTOCHECKPOINT_PAGES: i64 = 4096;
3322const UNSET_INDEX_WRITER_CHECKPOINT_PAGES: i64 = i64::MIN;
3323const UNSET_INDEX_WRITER_BUSY_TIMEOUT_MS: u64 = 0;
3324const FTS_MESSAGES_PRESENT_UNKNOWN: i8 = 0;
3325const FTS_MESSAGES_PRESENT_ABSENT: i8 = 1;
3326const FTS_MESSAGES_PRESENT_PRESENT: i8 = 2;
3327
3328enum CachedEphemeralWriter {
3329 Uninitialized,
3330 Cached(Box<SendFrankenConnection>),
3331 InUse,
3332}
3333
3334#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3335struct EnsuredAgentKey {
3336 slug: String,
3337 name: String,
3338 version: Option<String>,
3339 kind: String,
3340}
3341
3342impl EnsuredAgentKey {
3343 fn from_agent(agent: &Agent) -> Self {
3344 Self {
3345 slug: agent.slug.clone(),
3346 name: agent.name.clone(),
3347 version: agent.version.clone(),
3348 kind: agent_kind_str(agent.kind.clone()),
3349 }
3350 }
3351}
3352
3353#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3354struct EnsuredWorkspaceKey {
3355 path: String,
3356 display_name: Option<String>,
3357}
3358
3359impl EnsuredWorkspaceKey {
3360 fn new(path: String, display_name: Option<&str>) -> Self {
3361 Self {
3362 path,
3363 display_name: display_name.map(str::to_owned),
3364 }
3365 }
3366}
3367
3368#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3369struct EnsuredConversationSourceKey {
3370 id: String,
3371 kind: SourceKind,
3372 host_label: Option<String>,
3373}
3374
3375impl EnsuredConversationSourceKey {
3376 fn from_source(source: &Source) -> Self {
3377 Self {
3378 id: source.id.clone(),
3379 kind: source.kind,
3380 host_label: source.host_label.clone(),
3381 }
3382 }
3383}
3384
3385#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3386struct EnsuredDailyStatsKey {
3387 day_id: i64,
3388 agent_slug: String,
3389 source_id: String,
3390}
3391
3392impl EnsuredDailyStatsKey {
3393 fn new(day_id: i64, agent_slug: &str, source_id: &str) -> Self {
3394 Self {
3395 day_id,
3396 agent_slug: agent_slug.to_owned(),
3397 source_id: source_id.to_owned(),
3398 }
3399 }
3400}
3401
3402const AUTOCOMMIT_RETAIN_OFF_PRAGMAS: [&str; 2] = [
3403 "PRAGMA fsqlite.autocommit_retain = OFF;",
3404 "PRAGMA autocommit_retain = OFF;",
3405];
3406
3407fn disable_autocommit_retain<E>(
3408 mut execute: impl FnMut(&'static str) -> std::result::Result<(), E>,
3409) -> Result<&'static str>
3410where
3411 E: std::fmt::Display,
3412{
3413 let mut failures = Vec::new();
3414 for pragma in AUTOCOMMIT_RETAIN_OFF_PRAGMAS {
3415 match execute(pragma) {
3416 Ok(()) => return Ok(pragma),
3417 Err(err) => {
3418 let error = err.to_string();
3419 tracing::debug!(
3420 %pragma,
3421 error = %error,
3422 "autocommit_retain PRAGMA variant not supported"
3423 );
3424 failures.push(format!("{pragma}: {error}"));
3425 }
3426 }
3427 }
3428
3429 Err(anyhow!(
3430 "failed to disable autocommit_retain on frankensqlite connection; \
3431 refusing to keep a long-lived MVCC connection that may accumulate \
3432 unbounded write snapshots. Upgrade frankensqlite to a version that \
3433 supports one of these PRAGMAs or use a short-lived connection path. \
3434 attempts: {}",
3435 failures.join("; ")
3436 ))
3437}
3438
3439impl FrankenStorage {
3440 fn new(conn: FrankenConnection, db_path: PathBuf) -> Self {
3441 Self::new_with_shared_caches(
3442 conn,
3443 db_path,
3444 Arc::new(parking_lot::Mutex::new(HashMap::new())),
3445 Arc::new(parking_lot::Mutex::new(HashMap::new())),
3446 Arc::new(parking_lot::Mutex::new(HashSet::new())),
3447 Arc::new(parking_lot::Mutex::new(HashSet::new())),
3448 )
3449 }
3450
3451 fn new_with_shared_caches(
3452 conn: FrankenConnection,
3453 db_path: PathBuf,
3454 ensured_agents: Arc<parking_lot::Mutex<HashMap<EnsuredAgentKey, i64>>>,
3455 ensured_workspaces: Arc<parking_lot::Mutex<HashMap<EnsuredWorkspaceKey, i64>>>,
3456 ensured_conversation_sources: Arc<
3457 parking_lot::Mutex<HashSet<EnsuredConversationSourceKey>>,
3458 >,
3459 ensured_daily_stats_keys: Arc<parking_lot::Mutex<HashSet<EnsuredDailyStatsKey>>>,
3460 ) -> Self {
3461 Self {
3462 conn,
3463 db_path,
3464 ephemeral_writer_preflight_verified: AtomicBool::new(false),
3465 index_writer_checkpoint_pages: AtomicI64::new(UNSET_INDEX_WRITER_CHECKPOINT_PAGES),
3466 index_writer_busy_timeout_ms: AtomicU64::new(UNSET_INDEX_WRITER_BUSY_TIMEOUT_MS),
3467 cached_ephemeral_writer: parking_lot::Mutex::new(CachedEphemeralWriter::Uninitialized),
3468 ensured_agents,
3469 ensured_workspaces,
3470 ensured_conversation_sources,
3471 ensured_daily_stats_keys,
3472 fts_messages_present_cache: AtomicI8::new(FTS_MESSAGES_PRESENT_UNKNOWN),
3473 }
3474 }
3475
3476 fn apply_open_stage_busy_timeout(&self) {
3477 if let Err(err) = self.conn.execute("PRAGMA busy_timeout = 5000;") {
3478 tracing::debug!(
3479 error = %err,
3480 "failed to apply open-stage busy_timeout before migrations"
3481 );
3482 }
3483 }
3484
3485 pub fn open(path: &Path) -> Result<Self> {
3491 if let Some(parent) = path.parent() {
3492 fs::create_dir_all(parent)
3493 .with_context(|| format!("creating db directory {}", parent.display()))?;
3494 }
3495
3496 let path_str = path.to_string_lossy().to_string();
3497 let _doctor_guard =
3498 acquire_doctor_mutation_db_open_guard(path, DOCTOR_MUTATION_DB_OPEN_LOCK_TIMEOUT)?;
3499 let conn = FrankenConnection::open(&path_str)
3500 .with_context(|| format!("opening frankensqlite db at {}", path.display()))?;
3501 let storage = Self::new(conn, path.to_path_buf());
3502 storage.apply_open_stage_busy_timeout();
3503 storage.run_migrations()?;
3504 storage.repair_missing_current_schema_objects()?;
3505 storage.apply_config()?;
3506 Ok(storage)
3507 }
3508
3509 pub fn open_writer(path: &Path) -> Result<Self> {
3515 Self::open_writer_with_shared_caches(
3516 path,
3517 Arc::new(parking_lot::Mutex::new(HashMap::new())),
3518 Arc::new(parking_lot::Mutex::new(HashMap::new())),
3519 Arc::new(parking_lot::Mutex::new(HashSet::new())),
3520 Arc::new(parking_lot::Mutex::new(HashSet::new())),
3521 )
3522 }
3523
3524 fn open_writer_with_shared_caches(
3525 path: &Path,
3526 ensured_agents: Arc<parking_lot::Mutex<HashMap<EnsuredAgentKey, i64>>>,
3527 ensured_workspaces: Arc<parking_lot::Mutex<HashMap<EnsuredWorkspaceKey, i64>>>,
3528 ensured_conversation_sources: Arc<
3529 parking_lot::Mutex<HashSet<EnsuredConversationSourceKey>>,
3530 >,
3531 ensured_daily_stats_keys: Arc<parking_lot::Mutex<HashSet<EnsuredDailyStatsKey>>>,
3532 ) -> Result<Self> {
3533 let path_str = path.to_string_lossy().to_string();
3534 let _doctor_guard =
3535 acquire_doctor_mutation_db_open_guard(path, DOCTOR_MUTATION_DB_OPEN_LOCK_TIMEOUT)?;
3536 let conn = FrankenConnection::open(&path_str)
3537 .with_context(|| format!("opening frankensqlite writer at {}", path.display()))?;
3538 let storage = Self::new_with_shared_caches(
3539 conn,
3540 path.to_path_buf(),
3541 ensured_agents,
3542 ensured_workspaces,
3543 ensured_conversation_sources,
3544 ensured_daily_stats_keys,
3545 );
3546 storage.apply_config()?;
3547 Ok(storage)
3548 }
3549
3550 pub(crate) fn acquire_cached_ephemeral_writer(&self) -> Result<(Self, bool)> {
3551 let mut cached = self.cached_ephemeral_writer.lock();
3552 match std::mem::replace(&mut *cached, CachedEphemeralWriter::InUse) {
3553 CachedEphemeralWriter::Cached(conn) => {
3554 let (conn, checkpoint_pages, busy_timeout_ms) = (*conn).into_parts();
3555 let writer = Self::new_with_shared_caches(
3556 conn,
3557 self.db_path.clone(),
3558 Arc::clone(&self.ensured_agents),
3559 Arc::clone(&self.ensured_workspaces),
3560 Arc::clone(&self.ensured_conversation_sources),
3561 Arc::clone(&self.ensured_daily_stats_keys),
3562 );
3563 writer
3564 .index_writer_checkpoint_pages
3565 .store(checkpoint_pages, Ordering::Relaxed);
3566 writer
3567 .index_writer_busy_timeout_ms
3568 .store(busy_timeout_ms, Ordering::Relaxed);
3569 Ok((writer, true))
3570 }
3571 CachedEphemeralWriter::Uninitialized => {
3572 drop(cached);
3573 match Self::open_writer_with_shared_caches(
3574 &self.db_path,
3575 Arc::clone(&self.ensured_agents),
3576 Arc::clone(&self.ensured_workspaces),
3577 Arc::clone(&self.ensured_conversation_sources),
3578 Arc::clone(&self.ensured_daily_stats_keys),
3579 ) {
3580 Ok(writer) => Ok((writer, true)),
3581 Err(err) => {
3582 let mut cached = self.cached_ephemeral_writer.lock();
3583 if matches!(&*cached, CachedEphemeralWriter::InUse) {
3584 *cached = CachedEphemeralWriter::Uninitialized;
3585 }
3586 Err(err)
3587 }
3588 }
3589 }
3590 CachedEphemeralWriter::InUse => {
3591 *cached = CachedEphemeralWriter::InUse;
3592 drop(cached);
3593 Ok((
3594 Self::open_writer_with_shared_caches(
3595 &self.db_path,
3596 Arc::clone(&self.ensured_agents),
3597 Arc::clone(&self.ensured_workspaces),
3598 Arc::clone(&self.ensured_conversation_sources),
3599 Arc::clone(&self.ensured_daily_stats_keys),
3600 )?,
3601 false,
3602 ))
3603 }
3604 }
3605 }
3606
3607 pub(crate) fn release_cached_ephemeral_writer(&self, writer: Self) {
3608 let checkpoint_pages = writer.index_writer_checkpoint_pages.load(Ordering::Relaxed);
3609 let busy_timeout_ms = writer.index_writer_busy_timeout_ms.load(Ordering::Relaxed);
3610 let conn = writer.into_raw();
3611 let mut cached = self.cached_ephemeral_writer.lock();
3612 debug_assert!(
3613 matches!(&*cached, CachedEphemeralWriter::InUse),
3614 "cached ephemeral writer state should be in-use when releasing"
3615 );
3616 *cached = CachedEphemeralWriter::Cached(Box::new(
3617 SendFrankenConnection::new_with_index_writer_state(
3618 conn,
3619 checkpoint_pages,
3620 busy_timeout_ms,
3621 ),
3622 ));
3623 }
3624
3625 pub(crate) fn discard_cached_ephemeral_writer(&self, mut writer: Self) {
3626 writer.close_best_effort_in_place();
3627 let mut cached = self.cached_ephemeral_writer.lock();
3628 if matches!(&*cached, CachedEphemeralWriter::InUse) {
3629 *cached = CachedEphemeralWriter::Uninitialized;
3630 }
3631 }
3632
3633 fn cached_agent_id(&self, key: &EnsuredAgentKey) -> Option<i64> {
3634 self.ensured_agents.lock().get(key).copied()
3635 }
3636
3637 fn mark_agent_ensured(&self, key: EnsuredAgentKey, id: i64) {
3638 self.ensured_agents.lock().insert(key, id);
3639 }
3640
3641 fn cached_workspace_id(&self, key: &EnsuredWorkspaceKey) -> Option<i64> {
3642 self.ensured_workspaces.lock().get(key).copied()
3643 }
3644
3645 fn mark_workspace_ensured(&self, key: EnsuredWorkspaceKey, id: i64) {
3646 self.ensured_workspaces.lock().insert(key, id);
3647 }
3648
3649 fn conversation_source_already_ensured(&self, key: &EnsuredConversationSourceKey) -> bool {
3650 self.ensured_conversation_sources.lock().contains(key)
3651 }
3652
3653 fn mark_conversation_source_ensured(&self, key: EnsuredConversationSourceKey) {
3654 self.ensured_conversation_sources.lock().insert(key);
3655 }
3656
3657 fn daily_stats_key_already_ensured(&self, key: &EnsuredDailyStatsKey) -> bool {
3658 self.ensured_daily_stats_keys.lock().contains(key)
3659 }
3660
3661 fn daily_stats_keys_already_ensured(&self, keys: &[EnsuredDailyStatsKey; 4]) -> bool {
3662 let ensured = self.ensured_daily_stats_keys.lock();
3663 keys.iter().all(|key| ensured.contains(key))
3664 }
3665
3666 fn mark_daily_stats_key_ensured(&self, key: EnsuredDailyStatsKey) {
3667 self.ensured_daily_stats_keys.lock().insert(key);
3668 }
3669
3670 fn fts_messages_present_cached(&self, tx: &FrankenTransaction<'_>) -> bool {
3671 match self.fts_messages_present_cache.load(Ordering::Acquire) {
3672 FTS_MESSAGES_PRESENT_PRESENT => return true,
3673 FTS_MESSAGES_PRESENT_ABSENT => return false,
3674 _ => {}
3675 }
3676
3677 let present = tx
3678 .query_row_map(
3679 "SELECT COUNT(*) FROM sqlite_master
3680 WHERE name = 'fts_messages'
3681 AND rootpage > 0",
3682 fparams![],
3683 |row| row.get_typed::<i64>(0),
3684 )
3685 .map(|count| count > 0)
3686 .unwrap_or_else(|err| {
3687 tracing::debug!(
3688 error = %err,
3689 "failed to probe fts_messages presence; skipping db-resident FTS maintenance"
3690 );
3691 false
3692 });
3693 self.set_fts_messages_present_cache(present);
3694 present
3695 }
3696
3697 fn set_fts_messages_present_cache(&self, present: bool) {
3698 self.fts_messages_present_cache.store(
3699 if present {
3700 FTS_MESSAGES_PRESENT_PRESENT
3701 } else {
3702 FTS_MESSAGES_PRESENT_ABSENT
3703 },
3704 Ordering::Release,
3705 );
3706 }
3707
3708 fn invalidate_fts_messages_present_cache(&self) {
3709 self.fts_messages_present_cache
3710 .store(FTS_MESSAGES_PRESENT_UNKNOWN, Ordering::Release);
3711 }
3712
3713 fn invalidate_conversation_source_cache(&self, source_id: &str) {
3714 self.ensured_conversation_sources
3715 .lock()
3716 .retain(|key| key.id != source_id);
3717 }
3718
3719 fn close_cached_ephemeral_writer_best_effort_in_place(&mut self) {
3720 let cached = self.cached_ephemeral_writer.get_mut();
3721 if let CachedEphemeralWriter::Cached(conn) =
3722 std::mem::replace(cached, CachedEphemeralWriter::Uninitialized)
3723 {
3724 let mut conn = conn;
3725 conn.0.close_best_effort_in_place();
3726 }
3727 }
3728
3729 fn close_cached_ephemeral_writer_without_checkpoint_in_place(&mut self) -> Result<()> {
3730 let cached = self.cached_ephemeral_writer.get_mut();
3731 match std::mem::replace(cached, CachedEphemeralWriter::Uninitialized) {
3732 CachedEphemeralWriter::Cached(mut conn) => conn
3733 .0
3734 .close_without_checkpoint_in_place()
3735 .with_context(|| "closing cached frankensqlite writer without final checkpoint"),
3736 CachedEphemeralWriter::Uninitialized | CachedEphemeralWriter::InUse => Ok(()),
3737 }
3738 }
3739
3740 pub fn open_readonly(path: &Path) -> Result<Self> {
3742 Self::open_readonly_with_doctor_lock_timeout(path, DOCTOR_MUTATION_DB_OPEN_LOCK_TIMEOUT)
3743 }
3744
3745 pub fn open_readonly_with_doctor_lock_timeout(path: &Path, timeout: Duration) -> Result<Self> {
3750 let path_str = path.to_string_lossy().to_string();
3751 let _doctor_guard = acquire_doctor_mutation_db_open_guard(path, timeout)?;
3752 let conn = open_franken_with_flags(&path_str, FrankenOpenFlags::SQLITE_OPEN_READ_ONLY)
3753 .with_context(|| format!("opening frankensqlite db readonly at {}", path.display()))?;
3754 let storage = Self::new(conn, path.to_path_buf());
3755 storage.apply_readonly_config()?;
3756 Ok(storage)
3757 }
3758
3759 pub fn close(self) -> Result<()> {
3760 let mut this = self;
3761 this.close_cached_ephemeral_writer_best_effort_in_place();
3762 this.conn
3763 .close()
3764 .with_context(|| "closing frankensqlite connection")
3765 }
3766
3767 pub fn close_without_checkpoint(self) -> Result<()> {
3768 let mut this = self;
3769 this.close_cached_ephemeral_writer_without_checkpoint_in_place()?;
3770 this.conn
3771 .close_without_checkpoint()
3772 .with_context(|| "closing frankensqlite connection without final checkpoint")
3773 }
3774
3775 pub fn close_best_effort_in_place(&mut self) {
3776 self.close_cached_ephemeral_writer_best_effort_in_place();
3777 self.conn.close_best_effort_in_place();
3778 }
3779
3780 pub fn close_without_checkpoint_in_place(&mut self) -> Result<()> {
3781 self.close_cached_ephemeral_writer_without_checkpoint_in_place()?;
3782 self.conn
3783 .close_without_checkpoint_in_place()
3784 .with_context(|| "closing frankensqlite connection without final checkpoint")
3785 }
3786
3787 pub fn raw(&self) -> &FrankenConnection {
3789 &self.conn
3790 }
3791
3792 pub fn into_raw(self) -> FrankenConnection {
3795 let mut this = self;
3796 this.close_cached_ephemeral_writer_best_effort_in_place();
3797 this.conn
3798 }
3799
3800 pub fn apply_config(&self) -> Result<()> {
3807 self.conn
3811 .execute("PRAGMA journal_mode = WAL;")
3812 .with_context(|| "setting journal_mode")?;
3813 self.conn
3814 .execute("PRAGMA synchronous = NORMAL;")
3815 .with_context(|| "setting synchronous")?;
3816
3817 self.conn
3819 .execute("PRAGMA cache_size = -65536;")
3820 .with_context(|| "setting cache_size")?;
3821
3822 self.conn
3824 .execute("PRAGMA foreign_keys = ON;")
3825 .with_context(|| "setting foreign_keys")?;
3826
3827 self.conn
3829 .execute("PRAGMA busy_timeout = 5000;")
3830 .with_context(|| "setting busy_timeout")?;
3831
3832 let checkpoint_pragma =
3840 format!("PRAGMA wal_autocheckpoint = {DEFAULT_WAL_AUTOCHECKPOINT_PAGES};");
3841 let _ = self.conn.execute(&checkpoint_pragma);
3842 self.index_writer_checkpoint_pages
3843 .store(DEFAULT_WAL_AUTOCHECKPOINT_PAGES, Ordering::Relaxed);
3844 let _ = self.conn.execute("PRAGMA fsqlite.concurrent_mode = ON;");
3847 let _ = self.conn.execute("PRAGMA concurrent_mode = ON;");
3848 let autocommit_pragma =
3859 disable_autocommit_retain(|pragma| self.conn.execute(pragma).map(|_| ()))?;
3860 tracing::debug!(
3861 pragma = autocommit_pragma,
3862 "disabled frankensqlite autocommit_retain for storage connection"
3863 );
3864
3865 Ok(())
3866 }
3867
3868 fn apply_readonly_config(&self) -> Result<()> {
3869 self.conn
3870 .execute("PRAGMA query_only = 1;")
3871 .with_context(|| "setting query_only")?;
3872 self.conn
3873 .execute("PRAGMA busy_timeout = 5000;")
3874 .with_context(|| "setting busy_timeout")?;
3875 self.conn
3876 .execute("PRAGMA cache_size = -65536;")
3877 .with_context(|| "setting cache_size")?;
3878 self.conn
3879 .execute("PRAGMA foreign_keys = ON;")
3880 .with_context(|| "setting foreign_keys")?;
3881 Ok(())
3882 }
3883
3884 pub fn run_migrations(&self) -> Result<()> {
3902 transition_from_meta_version(&self.conn)?;
3903
3904 let base_result = build_cass_migrations_before_tail_cache()
3905 .run(&self.conn)
3906 .with_context(|| "running base schema migrations")?;
3907
3908 let mut applied = base_result.applied;
3909 if apply_conversation_tail_state_cache_migration(&self.conn)
3910 .with_context(|| "running conversation tail-state cache migration")?
3911 {
3912 applied.push(15);
3913 }
3914
3915 let post_result = build_cass_migrations_after_tail_cache()
3916 .run(&self.conn)
3917 .with_context(|| "running post-tail-cache schema migrations")?;
3918 applied.extend(post_result.applied);
3919
3920 let current = self.schema_version()?;
3921 if !applied.is_empty() {
3922 info!(
3923 applied = ?applied,
3924 current,
3925 was_fresh = base_result.was_fresh,
3926 "frankensqlite schema migrations applied"
3927 );
3928 }
3929
3930 self.sync_meta_schema_version(current)?;
3932
3933 Ok(())
3934 }
3935
3936 fn repair_missing_current_schema_objects(&self) -> Result<()> {
3941 let mut missing_tables = Vec::new();
3942 for &(table_name, probe_sql) in REQUIRED_CURRENT_SCHEMA_TABLE_PROBES {
3943 if let Err(err) = self.conn.query(probe_sql) {
3944 if error_indicates_missing_table(&err) {
3945 missing_tables.push(table_name);
3946 continue;
3947 }
3948 return Err(err).with_context(|| {
3949 format!("probing required schema table {table_name} for completeness")
3950 });
3951 }
3952 }
3953
3954 if !missing_tables.is_empty() {
3955 info!(
3956 missing_tables = ?missing_tables,
3957 "repairing missing current-schema tables on an already-versioned cass database"
3958 );
3959
3960 for batch in current_schema_repair_batches_for_missing_tables(&missing_tables)? {
3961 self.conn
3962 .execute_batch(batch.sql)
3963 .with_context(|| format!("repairing current-schema batch {}", batch.name))?;
3964 }
3965
3966 for &(table_name, probe_sql) in REQUIRED_CURRENT_SCHEMA_TABLE_PROBES {
3967 if !missing_tables.contains(&table_name) {
3968 continue;
3969 }
3970 self.conn
3971 .query(probe_sql)
3972 .with_context(|| format!("verifying repaired schema table {table_name}"))?;
3973 }
3974 }
3975 self.repair_missing_conversation_token_columns()?;
3976 Ok(())
3977 }
3978
3979 fn repair_missing_conversation_token_columns(&self) -> Result<()> {
3980 let columns = franken_table_column_names(&self.conn, "conversations")
3981 .with_context(|| "inspecting conversations columns for token-summary repair")?;
3982 let mut missing_columns = Vec::new();
3983 for &(column_name, column_type) in REQUIRED_CONVERSATION_TOKEN_COLUMNS {
3984 if columns.contains(column_name) {
3985 continue;
3986 }
3987 let sql = format!("ALTER TABLE conversations ADD COLUMN {column_name} {column_type};");
3988 self.conn.execute(&sql).with_context(|| {
3989 format!("adding missing conversations.{column_name} token-summary column")
3990 })?;
3991 missing_columns.push(column_name);
3992 }
3993 if !missing_columns.is_empty() {
3994 tracing::warn!(
3995 target: "cass::schema_repair",
3996 db_path = %self.db_path.display(),
3997 missing_columns = ?missing_columns,
3998 "cass#222: repaired missing conversations token-summary columns"
3999 );
4000 }
4001 Ok(())
4002 }
4003
4004 pub(crate) fn cleanup_orphan_fk_rows(&self) -> Result<OrphanFkCleanupReport> {
4023 let mut report = OrphanFkCleanupReport::default();
4024 let orphan_message_ids = match collect_orphan_message_ids(&self.conn) {
4025 Ok(ids) => ids,
4026 Err(err) if error_indicates_missing_table(&err) => {
4027 tracing::debug!(
4028 target: "cass::fk_repair",
4029 child_table = "messages",
4030 error = %err,
4031 "skipping orphan-message probe (table or column unavailable)"
4032 );
4033 Vec::new()
4034 }
4035 Err(err) => return Err(err),
4036 };
4037 if !orphan_message_ids.is_empty() {
4038 report.record("messages", orphan_message_ids.len() as i64);
4039 }
4040
4041 if !orphan_message_ids.is_empty() {
4042 delete_orphan_message_ids_bisecting_oom(&self.conn, &orphan_message_ids)
4043 .context("deleting orphan message rows and dependent children")?;
4044 }
4045
4046 for entry in ORPHAN_DIRECT_CHILD_TABLES {
4047 loop {
4048 let ids = match collect_direct_orphan_id_page(&self.conn, entry) {
4049 Ok(ids) => ids,
4050 Err(err)
4051 if error_indicates_missing_table(&err)
4052 || error_indicates_missing_column(&err) =>
4053 {
4054 tracing::debug!(
4058 target: "cass::fk_repair",
4059 child_table = entry.child_table,
4060 error = %err,
4061 "skipping orphan probe (table or column unavailable)"
4062 );
4063 break;
4064 }
4065 Err(err) => {
4066 return Err(err).with_context(|| {
4067 format!("probing orphan rows in {}", entry.child_table)
4068 });
4069 }
4070 };
4071 if ids.is_empty() {
4072 break;
4073 }
4074
4075 let deleted = delete_direct_orphan_ids_bisecting_oom(&self.conn, entry, &ids)
4076 .with_context(|| format!("deleting orphan rows from {}", entry.child_table))?;
4077 if deleted == 0 {
4078 break;
4079 }
4080 report.record(
4081 entry.child_table,
4082 i64::try_from(deleted).unwrap_or(i64::MAX),
4083 );
4084 }
4085 }
4086
4087 if report.total == 0 {
4088 return Ok(report);
4089 }
4090
4091 tracing::warn!(
4096 target: "cass::fk_repair",
4097 db_path = %self.db_path.display(),
4098 total_orphans = report.total,
4099 per_table = ?report.per_table,
4100 "cass#202: removed orphan rows left behind by interrupted index transactions"
4101 );
4102
4103 Ok(report)
4104 }
4105
4106 pub fn schema_version(&self) -> Result<i64> {
4108 let rows = self
4109 .conn
4110 .query("SELECT MAX(version) FROM _schema_migrations;")
4111 .with_context(|| "reading schema version from _schema_migrations")?;
4112
4113 if let Some(row) = rows.first()
4114 && let Ok(v) = row.get_typed::<Option<i64>>(0)
4115 {
4116 return Ok(v.unwrap_or(0));
4117 }
4118 Ok(0)
4119 }
4120
4121 fn sync_meta_schema_version(&self, version: i64) -> Result<()> {
4123 if self.conn.query("SELECT key FROM meta LIMIT 1;").is_err() {
4126 return Ok(());
4127 }
4128
4129 if let Ok(rows) = self
4131 .conn
4132 .query("SELECT value FROM meta WHERE key = 'schema_version';")
4133 && let Some(row) = rows.first()
4134 && let Ok(val) = row.get_typed::<String>(0)
4135 && val == version.to_string()
4136 {
4137 return Ok(()); }
4139
4140 self.conn
4141 .execute_compat(
4142 "INSERT OR REPLACE INTO meta(key, value) VALUES('schema_version', ?1);",
4143 &[ParamValue::from(version.to_string())],
4144 )
4145 .with_context(|| "syncing meta schema_version")?;
4146
4147 Ok(())
4148 }
4149
4150 pub fn database_path(&self) -> Result<PathBuf> {
4152 Ok(self.db_path.clone())
4153 }
4154
4155 pub(crate) fn ephemeral_writer_preflight_verified(&self) -> bool {
4156 self.ephemeral_writer_preflight_verified
4157 .load(Ordering::Relaxed)
4158 }
4159
4160 pub(crate) fn mark_ephemeral_writer_preflight_verified(&self) {
4161 self.ephemeral_writer_preflight_verified
4162 .store(true, Ordering::Relaxed);
4163 }
4164
4165 pub(crate) fn index_writer_checkpoint_pages(&self) -> Option<i64> {
4166 let pages = self.index_writer_checkpoint_pages.load(Ordering::Relaxed);
4167 (pages != UNSET_INDEX_WRITER_CHECKPOINT_PAGES).then_some(pages)
4168 }
4169
4170 pub(crate) fn mark_index_writer_checkpoint_pages(&self, pages: i64) {
4171 self.index_writer_checkpoint_pages
4172 .store(pages, Ordering::Relaxed);
4173 }
4174
4175 pub(crate) fn index_writer_busy_timeout_ms(&self) -> Option<u64> {
4176 let timeout_ms = self.index_writer_busy_timeout_ms.load(Ordering::Relaxed);
4177 (timeout_ms != UNSET_INDEX_WRITER_BUSY_TIMEOUT_MS).then_some(timeout_ms)
4178 }
4179
4180 pub(crate) fn mark_index_writer_busy_timeout_ms(&self, timeout_ms: u64) {
4181 self.index_writer_busy_timeout_ms
4182 .store(timeout_ms, Ordering::Relaxed);
4183 }
4184
4185 pub fn open_or_rebuild(path: &Path) -> std::result::Result<Self, MigrationError> {
4187 if let Some(parent) = path.parent() {
4188 fs::create_dir_all(parent)?;
4189 }
4190
4191 if path.exists() {
4192 let check_result = check_schema_compatibility(path);
4193 match check_result {
4194 Ok(SchemaCheck::Compatible) | Ok(SchemaCheck::NeedsMigration) => {
4195 }
4197 Ok(SchemaCheck::NeedsRebuild(reason)) => {
4198 let backup_path = create_backup(path)?;
4199 cleanup_old_backups(path, MAX_BACKUPS)?;
4200 remove_database_files(path)?;
4201 return Err(MigrationError::RebuildRequired {
4202 reason,
4203 backup_path,
4204 });
4205 }
4206 Err(err) if schema_check_error_requires_rebuild(&err) => {
4207 let backup_path = create_backup(path)?;
4208 cleanup_old_backups(path, MAX_BACKUPS)?;
4209 remove_database_files(path)?;
4210 return Err(MigrationError::RebuildRequired {
4211 reason: format!("Database appears corrupted: {err}"),
4212 backup_path,
4213 });
4214 }
4215 Err(err) => return Err(MigrationError::Database(err)),
4216 }
4217 }
4218
4219 let storage = Self::open(path).map_err(|e| MigrationError::Other(e.to_string()))?;
4220 Ok(storage)
4221 }
4222}
4223
4224fn build_cass_migrations_before_tail_cache() -> MigrationRunner {
4240 MigrationRunner::new()
4241 .add(13, "full_schema_v13", MIGRATION_FRESH_SCHEMA)
4242 .add(14, "fts_contentless", MIGRATION_V14)
4243}
4244
4245fn build_cass_migrations_after_tail_cache() -> MigrationRunner {
4246 MigrationRunner::new()
4247 .add(16, "drop_redundant_message_conv_idx", MIGRATION_V16)
4248 .add(17, "drop_message_created_idx", MIGRATION_V17)
4249 .add(18, "conversation_tail_state_hot_table", MIGRATION_V18)
4250 .add(19, "conversation_external_lookup", MIGRATION_V19)
4251 .add(20, "conversation_external_tail_lookup", MIGRATION_V20)
4252}
4253
4254fn schema_migration_is_applied(conn: &FrankenConnection, version: i64) -> Result<bool> {
4255 let rows = conn
4256 .query_with_params(
4257 "SELECT 1 FROM _schema_migrations WHERE version = ?1 LIMIT 1;",
4258 &[SqliteValue::from(version)],
4259 )
4260 .with_context(|| format!("checking schema migration version {version}"))?;
4261 Ok(!rows.is_empty())
4262}
4263
4264fn apply_conversation_tail_state_cache_migration(conn: &FrankenConnection) -> Result<bool> {
4265 conn.execute("BEGIN IMMEDIATE;")
4266 .with_context(|| "starting v15 conversation tail-state migration transaction")?;
4267
4268 let result = (|| -> Result<bool> {
4269 if schema_migration_is_applied(conn, 15)? {
4270 conn.execute("COMMIT;")
4271 .with_context(|| "committing already-applied v15 migration transaction")?;
4272 return Ok(false);
4273 }
4274
4275 let started = Instant::now();
4276 let conversation_columns = franken_table_column_names(conn, "conversations")
4277 .with_context(|| "inspecting conversations columns before v15 migration")?;
4278 if !conversation_columns.contains("last_message_idx") {
4279 conn.execute("ALTER TABLE conversations ADD COLUMN last_message_idx INTEGER;")
4280 .with_context(|| "adding v15 conversations.last_message_idx column")?;
4281 }
4282 if !conversation_columns.contains("last_message_created_at") {
4283 conn.execute("ALTER TABLE conversations ADD COLUMN last_message_created_at INTEGER;")
4284 .with_context(|| "adding v15 conversations.last_message_created_at column")?;
4285 }
4286 conn.execute_batch(MIGRATION_V15_TAIL_STATE_TABLE)
4287 .with_context(|| "applying v15 conversation tail-state table schema")?;
4288 conn.execute_compat(
4289 "INSERT INTO _schema_migrations (version, name) VALUES (?1, ?2);",
4290 fparams![15_i64, "conversation_tail_state_cache"],
4291 )
4292 .with_context(|| "recording v15 conversation tail-state migration")?;
4293 conn.execute("COMMIT;")
4294 .with_context(|| "committing v15 conversation tail-state migration")?;
4295 info!(
4296 elapsed_ms = started.elapsed().as_millis(),
4297 "applied v15 conversation tail-state cache migration"
4298 );
4299 Ok(true)
4300 })();
4301
4302 if result.is_err() {
4303 let _ = conn.execute("ROLLBACK;");
4304 }
4305
4306 result
4307}
4308
4309fn franken_table_column_names(
4310 conn: &FrankenConnection,
4311 table_name: &str,
4312) -> Result<HashSet<String>> {
4313 if !table_name
4314 .chars()
4315 .all(|c| c.is_ascii_alphanumeric() || c == '_')
4316 {
4317 return Err(anyhow!(
4318 "unsafe table name for PRAGMA table_info: {table_name}"
4319 ));
4320 }
4321
4322 conn.query_map_collect(
4323 &format!("PRAGMA table_info({table_name})"),
4324 fparams![],
4325 |row: &FrankenRow| row.get_typed::<String>(1),
4326 )
4327 .with_context(|| format!("reading PRAGMA table_info({table_name})"))
4328 .map(|columns| columns.into_iter().collect())
4329}
4330
4331const MIGRATION_FRESH_SCHEMA: &str = r"
4341-- Core tables (V1)
4342CREATE TABLE IF NOT EXISTS meta (
4343 key TEXT PRIMARY KEY,
4344 value TEXT NOT NULL
4345);
4346
4347CREATE TABLE IF NOT EXISTS agents (
4348 id INTEGER PRIMARY KEY,
4349 slug TEXT NOT NULL UNIQUE,
4350 name TEXT NOT NULL,
4351 version TEXT,
4352 kind TEXT NOT NULL,
4353 created_at INTEGER NOT NULL,
4354 updated_at INTEGER NOT NULL
4355);
4356
4357CREATE TABLE IF NOT EXISTS workspaces (
4358 id INTEGER PRIMARY KEY,
4359 path TEXT NOT NULL UNIQUE,
4360 display_name TEXT
4361);
4362
4363-- Sources (V4)
4364CREATE TABLE IF NOT EXISTS sources (
4365 id TEXT PRIMARY KEY,
4366 kind TEXT NOT NULL,
4367 host_label TEXT,
4368 machine_id TEXT,
4369 platform TEXT,
4370 config_json TEXT,
4371 created_at INTEGER NOT NULL,
4372 updated_at INTEGER NOT NULL
4373);
4374
4375INSERT OR IGNORE INTO sources (id, kind, host_label, created_at, updated_at)
4376VALUES ('local', 'local', NULL, strftime('%s','now')*1000, strftime('%s','now')*1000);
4377
4378-- Conversations: V1 base + V5 provenance + V7 metadata_bin + V10 token summary
4379CREATE TABLE IF NOT EXISTS conversations (
4380 id INTEGER PRIMARY KEY,
4381 agent_id INTEGER NOT NULL REFERENCES agents(id),
4382 workspace_id INTEGER REFERENCES workspaces(id),
4383 source_id TEXT NOT NULL DEFAULT 'local' REFERENCES sources(id),
4384 external_id TEXT,
4385 title TEXT,
4386 source_path TEXT NOT NULL,
4387 started_at INTEGER,
4388 ended_at INTEGER,
4389 approx_tokens INTEGER,
4390 metadata_json TEXT,
4391 origin_host TEXT,
4392 metadata_bin BLOB,
4393 total_input_tokens INTEGER,
4394 total_output_tokens INTEGER,
4395 total_cache_read_tokens INTEGER,
4396 total_cache_creation_tokens INTEGER,
4397 grand_total_tokens INTEGER,
4398 estimated_cost_usd REAL,
4399 primary_model TEXT,
4400 api_call_count INTEGER,
4401 tool_call_count INTEGER,
4402 user_message_count INTEGER,
4403 assistant_message_count INTEGER,
4404 -- V15 columns are included in the fresh schema so fresh DB creation does
4405 -- not need ALTER TABLE on conversations. That ALTER path can duplicate
4406 -- provenance autoindex state in frankensqlite when the named unique
4407 -- provenance index already exists.
4408 last_message_idx INTEGER,
4409 last_message_created_at INTEGER
4410);
4411
4412-- Named unique index avoids autoindex issues if table is ever recreated
4413CREATE UNIQUE INDEX IF NOT EXISTS idx_conversations_provenance
4414 ON conversations(source_id, agent_id, external_id);
4415
4416-- Messages: V1 base + V7 extra_bin
4417CREATE TABLE IF NOT EXISTS messages (
4418 id INTEGER PRIMARY KEY,
4419 conversation_id INTEGER NOT NULL REFERENCES conversations(id) ON DELETE CASCADE,
4420 idx INTEGER NOT NULL,
4421 role TEXT NOT NULL,
4422 author TEXT,
4423 created_at INTEGER,
4424 content TEXT NOT NULL,
4425 extra_json TEXT,
4426 extra_bin BLOB,
4427 UNIQUE(conversation_id, idx)
4428);
4429
4430CREATE TABLE IF NOT EXISTS snippets (
4431 id INTEGER PRIMARY KEY,
4432 message_id INTEGER NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
4433 file_path TEXT,
4434 start_line INTEGER,
4435 end_line INTEGER,
4436 language TEXT,
4437 snippet_text TEXT
4438);
4439
4440CREATE TABLE IF NOT EXISTS tags (
4441 id INTEGER PRIMARY KEY,
4442 name TEXT NOT NULL UNIQUE
4443);
4444
4445CREATE TABLE IF NOT EXISTS conversation_tags (
4446 conversation_id INTEGER NOT NULL REFERENCES conversations(id) ON DELETE CASCADE,
4447 tag_id INTEGER NOT NULL REFERENCES tags(id) ON DELETE CASCADE,
4448 PRIMARY KEY (conversation_id, tag_id)
4449);
4450
4451-- Daily stats (V8)
4452CREATE TABLE IF NOT EXISTS daily_stats (
4453 day_id INTEGER NOT NULL,
4454 agent_slug TEXT NOT NULL,
4455 source_id TEXT NOT NULL DEFAULT 'all',
4456 session_count INTEGER NOT NULL DEFAULT 0,
4457 message_count INTEGER NOT NULL DEFAULT 0,
4458 total_chars INTEGER NOT NULL DEFAULT 0,
4459 last_updated INTEGER NOT NULL,
4460 PRIMARY KEY (day_id, agent_slug, source_id)
4461);
4462
4463-- Embedding jobs (V9)
4464CREATE TABLE IF NOT EXISTS embedding_jobs (
4465 id INTEGER PRIMARY KEY AUTOINCREMENT,
4466 db_path TEXT NOT NULL,
4467 model_id TEXT NOT NULL,
4468 status TEXT NOT NULL DEFAULT 'pending',
4469 total_docs INTEGER NOT NULL DEFAULT 0,
4470 completed_docs INTEGER NOT NULL DEFAULT 0,
4471 error_message TEXT,
4472 created_at TEXT NOT NULL DEFAULT (datetime('now')),
4473 started_at TEXT,
4474 completed_at TEXT
4475);
4476
4477CREATE UNIQUE INDEX IF NOT EXISTS idx_embedding_jobs_active
4478ON embedding_jobs(db_path, model_id)
4479WHERE status IN ('pending', 'running');
4480
4481-- Token usage ledger (V10)
4482CREATE TABLE IF NOT EXISTS token_usage (
4483 id INTEGER PRIMARY KEY AUTOINCREMENT,
4484 message_id INTEGER NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
4485 conversation_id INTEGER NOT NULL,
4486 agent_id INTEGER NOT NULL,
4487 workspace_id INTEGER,
4488 source_id TEXT NOT NULL DEFAULT 'local',
4489 timestamp_ms INTEGER NOT NULL,
4490 day_id INTEGER NOT NULL,
4491 model_name TEXT,
4492 model_family TEXT,
4493 model_tier TEXT,
4494 service_tier TEXT,
4495 provider TEXT,
4496 input_tokens INTEGER,
4497 output_tokens INTEGER,
4498 cache_read_tokens INTEGER,
4499 cache_creation_tokens INTEGER,
4500 thinking_tokens INTEGER,
4501 total_tokens INTEGER,
4502 estimated_cost_usd REAL,
4503 role TEXT NOT NULL,
4504 content_chars INTEGER NOT NULL,
4505 has_tool_calls INTEGER NOT NULL DEFAULT 0,
4506 tool_call_count INTEGER NOT NULL DEFAULT 0,
4507 data_source TEXT NOT NULL DEFAULT 'api',
4508 UNIQUE(message_id)
4509);
4510
4511-- Token daily stats (V10)
4512CREATE TABLE IF NOT EXISTS token_daily_stats (
4513 day_id INTEGER NOT NULL,
4514 agent_slug TEXT NOT NULL,
4515 source_id TEXT NOT NULL DEFAULT 'all',
4516 model_family TEXT NOT NULL DEFAULT 'all',
4517 api_call_count INTEGER NOT NULL DEFAULT 0,
4518 user_message_count INTEGER NOT NULL DEFAULT 0,
4519 assistant_message_count INTEGER NOT NULL DEFAULT 0,
4520 tool_message_count INTEGER NOT NULL DEFAULT 0,
4521 total_input_tokens INTEGER NOT NULL DEFAULT 0,
4522 total_output_tokens INTEGER NOT NULL DEFAULT 0,
4523 total_cache_read_tokens INTEGER NOT NULL DEFAULT 0,
4524 total_cache_creation_tokens INTEGER NOT NULL DEFAULT 0,
4525 total_thinking_tokens INTEGER NOT NULL DEFAULT 0,
4526 grand_total_tokens INTEGER NOT NULL DEFAULT 0,
4527 total_content_chars INTEGER NOT NULL DEFAULT 0,
4528 total_tool_calls INTEGER NOT NULL DEFAULT 0,
4529 estimated_cost_usd REAL NOT NULL DEFAULT 0.0,
4530 session_count INTEGER NOT NULL DEFAULT 0,
4531 last_updated INTEGER NOT NULL,
4532 PRIMARY KEY (day_id, agent_slug, source_id, model_family)
4533);
4534
4535-- Model pricing (V10)
4536CREATE TABLE IF NOT EXISTS model_pricing (
4537 model_pattern TEXT NOT NULL,
4538 provider TEXT NOT NULL,
4539 input_cost_per_mtok REAL NOT NULL,
4540 output_cost_per_mtok REAL NOT NULL,
4541 cache_read_cost_per_mtok REAL,
4542 cache_creation_cost_per_mtok REAL,
4543 effective_date TEXT NOT NULL,
4544 PRIMARY KEY (model_pattern, effective_date)
4545);
4546
4547INSERT OR IGNORE INTO model_pricing VALUES
4548 ('claude-opus-4%', 'anthropic', 15.0, 75.0, 1.5, 18.75, '2025-10-01'),
4549 ('claude-sonnet-4%', 'anthropic', 3.0, 15.0, 0.3, 3.75, '2025-10-01'),
4550 ('claude-haiku-4%', 'anthropic', 0.80, 4.0, 0.08, 1.0, '2025-10-01'),
4551 ('gpt-4o%', 'openai', 2.50, 10.0, NULL, NULL, '2025-01-01'),
4552 ('gpt-4-turbo%', 'openai', 10.0, 30.0, NULL, NULL, '2024-04-01'),
4553 ('gpt-4.1%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
4554 ('o3%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
4555 ('o4-mini%', 'openai', 1.10, 4.40, NULL, NULL, '2025-04-01'),
4556 ('gemini-2%flash%', 'google', 0.075, 0.30, NULL, NULL, '2025-01-01'),
4557 ('gemini-2%pro%', 'google', 1.25, 10.0, NULL, NULL, '2025-01-01');
4558
4559-- Message metrics: V11 base + V12 model dimensions
4560CREATE TABLE IF NOT EXISTS message_metrics (
4561 message_id INTEGER PRIMARY KEY REFERENCES messages(id) ON DELETE CASCADE,
4562 created_at_ms INTEGER NOT NULL,
4563 hour_id INTEGER NOT NULL,
4564 day_id INTEGER NOT NULL,
4565 agent_slug TEXT NOT NULL,
4566 workspace_id INTEGER NOT NULL DEFAULT 0,
4567 source_id TEXT NOT NULL DEFAULT 'local',
4568 role TEXT NOT NULL,
4569 content_chars INTEGER NOT NULL,
4570 content_tokens_est INTEGER NOT NULL,
4571 api_input_tokens INTEGER,
4572 api_output_tokens INTEGER,
4573 api_cache_read_tokens INTEGER,
4574 api_cache_creation_tokens INTEGER,
4575 api_thinking_tokens INTEGER,
4576 api_service_tier TEXT,
4577 api_data_source TEXT NOT NULL DEFAULT 'estimated',
4578 tool_call_count INTEGER NOT NULL DEFAULT 0,
4579 has_tool_calls INTEGER NOT NULL DEFAULT 0,
4580 has_plan INTEGER NOT NULL DEFAULT 0,
4581 model_name TEXT,
4582 model_family TEXT NOT NULL DEFAULT 'unknown',
4583 model_tier TEXT NOT NULL DEFAULT 'unknown',
4584 provider TEXT NOT NULL DEFAULT 'unknown'
4585);
4586
4587-- Hourly rollups: V11 base + V13 plan columns
4588CREATE TABLE IF NOT EXISTS usage_hourly (
4589 hour_id INTEGER NOT NULL,
4590 agent_slug TEXT NOT NULL,
4591 workspace_id INTEGER NOT NULL DEFAULT 0,
4592 source_id TEXT NOT NULL DEFAULT 'local',
4593 message_count INTEGER NOT NULL DEFAULT 0,
4594 user_message_count INTEGER NOT NULL DEFAULT 0,
4595 assistant_message_count INTEGER NOT NULL DEFAULT 0,
4596 tool_call_count INTEGER NOT NULL DEFAULT 0,
4597 plan_message_count INTEGER NOT NULL DEFAULT 0,
4598 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
4599 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4600 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
4601 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
4602 api_tokens_total INTEGER NOT NULL DEFAULT 0,
4603 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
4604 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
4605 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
4606 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
4607 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
4608 last_updated INTEGER NOT NULL DEFAULT 0,
4609 plan_content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4610 plan_api_tokens_total INTEGER NOT NULL DEFAULT 0,
4611 PRIMARY KEY (hour_id, agent_slug, workspace_id, source_id)
4612);
4613
4614-- Daily rollups: V11 base + V13 plan columns
4615CREATE TABLE IF NOT EXISTS usage_daily (
4616 day_id INTEGER NOT NULL,
4617 agent_slug TEXT NOT NULL,
4618 workspace_id INTEGER NOT NULL DEFAULT 0,
4619 source_id TEXT NOT NULL DEFAULT 'local',
4620 message_count INTEGER NOT NULL DEFAULT 0,
4621 user_message_count INTEGER NOT NULL DEFAULT 0,
4622 assistant_message_count INTEGER NOT NULL DEFAULT 0,
4623 tool_call_count INTEGER NOT NULL DEFAULT 0,
4624 plan_message_count INTEGER NOT NULL DEFAULT 0,
4625 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
4626 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4627 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
4628 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
4629 api_tokens_total INTEGER NOT NULL DEFAULT 0,
4630 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
4631 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
4632 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
4633 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
4634 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
4635 last_updated INTEGER NOT NULL DEFAULT 0,
4636 plan_content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4637 plan_api_tokens_total INTEGER NOT NULL DEFAULT 0,
4638 PRIMARY KEY (day_id, agent_slug, workspace_id, source_id)
4639);
4640
4641-- Model daily rollups (V12)
4642CREATE TABLE IF NOT EXISTS usage_models_daily (
4643 day_id INTEGER NOT NULL,
4644 agent_slug TEXT NOT NULL,
4645 workspace_id INTEGER NOT NULL DEFAULT 0,
4646 source_id TEXT NOT NULL DEFAULT 'local',
4647 model_family TEXT NOT NULL DEFAULT 'unknown',
4648 model_tier TEXT NOT NULL DEFAULT 'unknown',
4649 message_count INTEGER NOT NULL DEFAULT 0,
4650 user_message_count INTEGER NOT NULL DEFAULT 0,
4651 assistant_message_count INTEGER NOT NULL DEFAULT 0,
4652 tool_call_count INTEGER NOT NULL DEFAULT 0,
4653 plan_message_count INTEGER NOT NULL DEFAULT 0,
4654 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
4655 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4656 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
4657 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
4658 api_tokens_total INTEGER NOT NULL DEFAULT 0,
4659 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
4660 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
4661 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
4662 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
4663 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
4664 last_updated INTEGER NOT NULL DEFAULT 0,
4665 PRIMARY KEY (day_id, agent_slug, workspace_id, source_id, model_family, model_tier)
4666);
4667
4668-- All indexes
4669CREATE INDEX IF NOT EXISTS idx_conversations_agent_started ON conversations(agent_id, started_at DESC);
4670CREATE INDEX IF NOT EXISTS idx_conversations_source_id ON conversations(source_id);
4671CREATE INDEX IF NOT EXISTS idx_conversations_source_path ON conversations(source_path);
4672CREATE INDEX IF NOT EXISTS idx_daily_stats_agent ON daily_stats(agent_slug, day_id);
4673CREATE INDEX IF NOT EXISTS idx_daily_stats_source ON daily_stats(source_id, day_id);
4674CREATE INDEX IF NOT EXISTS idx_token_usage_day ON token_usage(day_id, agent_id);
4675CREATE INDEX IF NOT EXISTS idx_token_usage_conv ON token_usage(conversation_id);
4676CREATE INDEX IF NOT EXISTS idx_token_usage_model ON token_usage(model_family, day_id);
4677CREATE INDEX IF NOT EXISTS idx_token_usage_workspace ON token_usage(workspace_id, day_id);
4678CREATE INDEX IF NOT EXISTS idx_token_usage_timestamp ON token_usage(timestamp_ms);
4679CREATE INDEX IF NOT EXISTS idx_token_daily_stats_agent ON token_daily_stats(agent_slug, day_id);
4680CREATE INDEX IF NOT EXISTS idx_token_daily_stats_model ON token_daily_stats(model_family, day_id);
4681CREATE INDEX IF NOT EXISTS idx_mm_hour ON message_metrics(hour_id);
4682CREATE INDEX IF NOT EXISTS idx_mm_day ON message_metrics(day_id);
4683CREATE INDEX IF NOT EXISTS idx_mm_agent_hour ON message_metrics(agent_slug, hour_id);
4684CREATE INDEX IF NOT EXISTS idx_mm_agent_day ON message_metrics(agent_slug, day_id);
4685CREATE INDEX IF NOT EXISTS idx_mm_workspace_hour ON message_metrics(workspace_id, hour_id);
4686CREATE INDEX IF NOT EXISTS idx_mm_source_hour ON message_metrics(source_id, hour_id);
4687CREATE INDEX IF NOT EXISTS idx_mm_model_family_day ON message_metrics(model_family, day_id);
4688CREATE INDEX IF NOT EXISTS idx_mm_provider_day ON message_metrics(provider, day_id);
4689CREATE INDEX IF NOT EXISTS idx_uh_agent ON usage_hourly(agent_slug, hour_id);
4690CREATE INDEX IF NOT EXISTS idx_uh_workspace ON usage_hourly(workspace_id, hour_id);
4691CREATE INDEX IF NOT EXISTS idx_uh_source ON usage_hourly(source_id, hour_id);
4692CREATE INDEX IF NOT EXISTS idx_ud_agent ON usage_daily(agent_slug, day_id);
4693CREATE INDEX IF NOT EXISTS idx_ud_workspace ON usage_daily(workspace_id, day_id);
4694CREATE INDEX IF NOT EXISTS idx_ud_source ON usage_daily(source_id, day_id);
4695CREATE INDEX IF NOT EXISTS idx_umd_model_day ON usage_models_daily(model_family, day_id);
4696CREATE INDEX IF NOT EXISTS idx_umd_agent_day ON usage_models_daily(agent_slug, day_id);
4697CREATE INDEX IF NOT EXISTS idx_umd_workspace_day ON usage_models_daily(workspace_id, day_id);
4698CREATE INDEX IF NOT EXISTS idx_umd_source_day ON usage_models_daily(source_id, day_id);
4699";
4700
4701#[derive(Clone, Copy)]
4702struct SchemaRepairBatch {
4703 name: &'static str,
4704 tables: &'static [&'static str],
4705 sql: &'static str,
4706}
4707
4708const CURRENT_SCHEMA_REPAIR_SOURCES_SQL: &str = r"
4709CREATE TABLE IF NOT EXISTS sources (
4710 id TEXT PRIMARY KEY,
4711 kind TEXT NOT NULL,
4712 host_label TEXT,
4713 machine_id TEXT,
4714 platform TEXT,
4715 config_json TEXT,
4716 created_at INTEGER NOT NULL,
4717 updated_at INTEGER NOT NULL
4718);
4719
4720INSERT OR IGNORE INTO sources (id, kind, host_label, created_at, updated_at)
4721VALUES ('local', 'local', NULL, strftime('%s','now')*1000, strftime('%s','now')*1000);
4722";
4723
4724const CURRENT_SCHEMA_REPAIR_DAILY_STATS_SQL: &str = r"
4725CREATE TABLE IF NOT EXISTS daily_stats (
4726 day_id INTEGER NOT NULL,
4727 agent_slug TEXT NOT NULL,
4728 source_id TEXT NOT NULL DEFAULT 'all',
4729 session_count INTEGER NOT NULL DEFAULT 0,
4730 message_count INTEGER NOT NULL DEFAULT 0,
4731 total_chars INTEGER NOT NULL DEFAULT 0,
4732 last_updated INTEGER NOT NULL,
4733 PRIMARY KEY (day_id, agent_slug, source_id)
4734);
4735
4736CREATE INDEX IF NOT EXISTS idx_daily_stats_agent ON daily_stats(agent_slug, day_id);
4737CREATE INDEX IF NOT EXISTS idx_daily_stats_source ON daily_stats(source_id, day_id);
4738";
4739
4740const CURRENT_SCHEMA_REPAIR_CONVERSATION_EXTERNAL_LOOKUP_SQL: &str = r"
4741CREATE TABLE IF NOT EXISTS conversation_external_lookup (
4742 lookup_key TEXT PRIMARY KEY,
4743 conversation_id INTEGER NOT NULL
4744);
4745
4746INSERT OR REPLACE INTO conversation_external_lookup (lookup_key, conversation_id)
4747SELECT
4748 CAST(length(source_id) AS TEXT) || ':' || source_id || ':' ||
4749 CAST(agent_id AS TEXT) || ':' ||
4750 CAST(length(external_id) AS TEXT) || ':' || external_id,
4751 id
4752FROM conversations
4753WHERE external_id IS NOT NULL;
4754";
4755
4756const CURRENT_SCHEMA_REPAIR_CONVERSATION_EXTERNAL_TAIL_LOOKUP_SQL: &str = r"
4757CREATE TABLE IF NOT EXISTS conversation_tail_state (
4758 conversation_id INTEGER PRIMARY KEY,
4759 ended_at INTEGER,
4760 last_message_idx INTEGER,
4761 last_message_created_at INTEGER
4762);
4763
4764CREATE TABLE IF NOT EXISTS conversation_external_tail_lookup (
4765 lookup_key TEXT PRIMARY KEY,
4766 conversation_id INTEGER NOT NULL,
4767 ended_at INTEGER,
4768 last_message_idx INTEGER,
4769 last_message_created_at INTEGER
4770);
4771
4772INSERT OR REPLACE INTO conversation_external_tail_lookup (
4773 lookup_key,
4774 conversation_id,
4775 ended_at,
4776 last_message_idx,
4777 last_message_created_at
4778)
4779SELECT
4780 CAST(length(c.source_id) AS TEXT) || ':' || c.source_id || ':' ||
4781 CAST(c.agent_id AS TEXT) || ':' ||
4782 CAST(length(c.external_id) AS TEXT) || ':' || c.external_id,
4783 c.id,
4784 ts.ended_at,
4785 ts.last_message_idx,
4786 ts.last_message_created_at
4787FROM conversations c
4788LEFT JOIN conversation_tail_state ts ON ts.conversation_id = c.id
4789WHERE c.external_id IS NOT NULL;
4790";
4791
4792const CURRENT_SCHEMA_REPAIR_EMBEDDING_JOBS_SQL: &str = r"
4793CREATE TABLE IF NOT EXISTS embedding_jobs (
4794 id INTEGER PRIMARY KEY AUTOINCREMENT,
4795 db_path TEXT NOT NULL,
4796 model_id TEXT NOT NULL,
4797 status TEXT NOT NULL DEFAULT 'pending',
4798 total_docs INTEGER NOT NULL DEFAULT 0,
4799 completed_docs INTEGER NOT NULL DEFAULT 0,
4800 error_message TEXT,
4801 created_at TEXT NOT NULL DEFAULT (datetime('now')),
4802 started_at TEXT,
4803 completed_at TEXT
4804);
4805
4806CREATE UNIQUE INDEX IF NOT EXISTS idx_embedding_jobs_active
4807ON embedding_jobs(db_path, model_id)
4808WHERE status IN ('pending', 'running');
4809";
4810
4811const CURRENT_SCHEMA_REPAIR_TOKEN_ANALYTICS_SQL: &str = r"
4812CREATE TABLE IF NOT EXISTS token_usage (
4813 id INTEGER PRIMARY KEY AUTOINCREMENT,
4814 message_id INTEGER NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
4815 conversation_id INTEGER NOT NULL,
4816 agent_id INTEGER NOT NULL,
4817 workspace_id INTEGER,
4818 source_id TEXT NOT NULL DEFAULT 'local',
4819 timestamp_ms INTEGER NOT NULL,
4820 day_id INTEGER NOT NULL,
4821 model_name TEXT,
4822 model_family TEXT,
4823 model_tier TEXT,
4824 service_tier TEXT,
4825 provider TEXT,
4826 input_tokens INTEGER,
4827 output_tokens INTEGER,
4828 cache_read_tokens INTEGER,
4829 cache_creation_tokens INTEGER,
4830 thinking_tokens INTEGER,
4831 total_tokens INTEGER,
4832 estimated_cost_usd REAL,
4833 role TEXT NOT NULL,
4834 content_chars INTEGER NOT NULL,
4835 has_tool_calls INTEGER NOT NULL DEFAULT 0,
4836 tool_call_count INTEGER NOT NULL DEFAULT 0,
4837 data_source TEXT NOT NULL DEFAULT 'api',
4838 UNIQUE(message_id)
4839);
4840
4841CREATE INDEX IF NOT EXISTS idx_token_usage_day ON token_usage(day_id, agent_id);
4842CREATE INDEX IF NOT EXISTS idx_token_usage_conv ON token_usage(conversation_id);
4843CREATE INDEX IF NOT EXISTS idx_token_usage_model ON token_usage(model_family, day_id);
4844CREATE INDEX IF NOT EXISTS idx_token_usage_workspace ON token_usage(workspace_id, day_id);
4845CREATE INDEX IF NOT EXISTS idx_token_usage_timestamp ON token_usage(timestamp_ms);
4846
4847CREATE TABLE IF NOT EXISTS token_daily_stats (
4848 day_id INTEGER NOT NULL,
4849 agent_slug TEXT NOT NULL,
4850 source_id TEXT NOT NULL DEFAULT 'all',
4851 model_family TEXT NOT NULL DEFAULT 'all',
4852 api_call_count INTEGER NOT NULL DEFAULT 0,
4853 user_message_count INTEGER NOT NULL DEFAULT 0,
4854 assistant_message_count INTEGER NOT NULL DEFAULT 0,
4855 tool_message_count INTEGER NOT NULL DEFAULT 0,
4856 total_input_tokens INTEGER NOT NULL DEFAULT 0,
4857 total_output_tokens INTEGER NOT NULL DEFAULT 0,
4858 total_cache_read_tokens INTEGER NOT NULL DEFAULT 0,
4859 total_cache_creation_tokens INTEGER NOT NULL DEFAULT 0,
4860 total_thinking_tokens INTEGER NOT NULL DEFAULT 0,
4861 grand_total_tokens INTEGER NOT NULL DEFAULT 0,
4862 total_content_chars INTEGER NOT NULL DEFAULT 0,
4863 total_tool_calls INTEGER NOT NULL DEFAULT 0,
4864 estimated_cost_usd REAL NOT NULL DEFAULT 0.0,
4865 session_count INTEGER NOT NULL DEFAULT 0,
4866 last_updated INTEGER NOT NULL,
4867 PRIMARY KEY (day_id, agent_slug, source_id, model_family)
4868);
4869
4870CREATE INDEX IF NOT EXISTS idx_token_daily_stats_agent ON token_daily_stats(agent_slug, day_id);
4871CREATE INDEX IF NOT EXISTS idx_token_daily_stats_model ON token_daily_stats(model_family, day_id);
4872
4873CREATE TABLE IF NOT EXISTS model_pricing (
4874 model_pattern TEXT NOT NULL,
4875 provider TEXT NOT NULL,
4876 input_cost_per_mtok REAL NOT NULL,
4877 output_cost_per_mtok REAL NOT NULL,
4878 cache_read_cost_per_mtok REAL,
4879 cache_creation_cost_per_mtok REAL,
4880 effective_date TEXT NOT NULL,
4881 PRIMARY KEY (model_pattern, effective_date)
4882);
4883
4884INSERT OR IGNORE INTO model_pricing VALUES
4885 ('claude-opus-4%', 'anthropic', 15.0, 75.0, 1.5, 18.75, '2025-10-01'),
4886 ('claude-sonnet-4%', 'anthropic', 3.0, 15.0, 0.3, 3.75, '2025-10-01'),
4887 ('claude-haiku-4%', 'anthropic', 0.80, 4.0, 0.08, 1.0, '2025-10-01'),
4888 ('gpt-4o%', 'openai', 2.50, 10.0, NULL, NULL, '2025-01-01'),
4889 ('gpt-4-turbo%', 'openai', 10.0, 30.0, NULL, NULL, '2024-04-01'),
4890 ('gpt-4.1%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
4891 ('o3%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
4892 ('o4-mini%', 'openai', 1.10, 4.40, NULL, NULL, '2025-04-01'),
4893 ('gemini-2%flash%', 'google', 0.075, 0.30, NULL, NULL, '2025-01-01'),
4894 ('gemini-2%pro%', 'google', 1.25, 10.0, NULL, NULL, '2025-01-01');
4895";
4896
4897const CURRENT_SCHEMA_REPAIR_MESSAGE_METRICS_SQL: &str = r"
4898CREATE TABLE IF NOT EXISTS message_metrics (
4899 message_id INTEGER PRIMARY KEY REFERENCES messages(id) ON DELETE CASCADE,
4900 created_at_ms INTEGER NOT NULL,
4901 hour_id INTEGER NOT NULL,
4902 day_id INTEGER NOT NULL,
4903 agent_slug TEXT NOT NULL,
4904 workspace_id INTEGER NOT NULL DEFAULT 0,
4905 source_id TEXT NOT NULL DEFAULT 'local',
4906 role TEXT NOT NULL,
4907 content_chars INTEGER NOT NULL,
4908 content_tokens_est INTEGER NOT NULL,
4909 api_input_tokens INTEGER,
4910 api_output_tokens INTEGER,
4911 api_cache_read_tokens INTEGER,
4912 api_cache_creation_tokens INTEGER,
4913 api_thinking_tokens INTEGER,
4914 api_service_tier TEXT,
4915 api_data_source TEXT NOT NULL DEFAULT 'estimated',
4916 tool_call_count INTEGER NOT NULL DEFAULT 0,
4917 has_tool_calls INTEGER NOT NULL DEFAULT 0,
4918 has_plan INTEGER NOT NULL DEFAULT 0,
4919 model_name TEXT,
4920 model_family TEXT NOT NULL DEFAULT 'unknown',
4921 model_tier TEXT NOT NULL DEFAULT 'unknown',
4922 provider TEXT NOT NULL DEFAULT 'unknown'
4923);
4924
4925CREATE TABLE IF NOT EXISTS usage_hourly (
4926 hour_id INTEGER NOT NULL,
4927 agent_slug TEXT NOT NULL,
4928 workspace_id INTEGER NOT NULL DEFAULT 0,
4929 source_id TEXT NOT NULL DEFAULT 'local',
4930 message_count INTEGER NOT NULL DEFAULT 0,
4931 user_message_count INTEGER NOT NULL DEFAULT 0,
4932 assistant_message_count INTEGER NOT NULL DEFAULT 0,
4933 tool_call_count INTEGER NOT NULL DEFAULT 0,
4934 plan_message_count INTEGER NOT NULL DEFAULT 0,
4935 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
4936 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4937 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
4938 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
4939 api_tokens_total INTEGER NOT NULL DEFAULT 0,
4940 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
4941 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
4942 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
4943 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
4944 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
4945 last_updated INTEGER NOT NULL DEFAULT 0,
4946 plan_content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4947 plan_api_tokens_total INTEGER NOT NULL DEFAULT 0,
4948 PRIMARY KEY (hour_id, agent_slug, workspace_id, source_id)
4949);
4950
4951CREATE TABLE IF NOT EXISTS usage_daily (
4952 day_id INTEGER NOT NULL,
4953 agent_slug TEXT NOT NULL,
4954 workspace_id INTEGER NOT NULL DEFAULT 0,
4955 source_id TEXT NOT NULL DEFAULT 'local',
4956 message_count INTEGER NOT NULL DEFAULT 0,
4957 user_message_count INTEGER NOT NULL DEFAULT 0,
4958 assistant_message_count INTEGER NOT NULL DEFAULT 0,
4959 tool_call_count INTEGER NOT NULL DEFAULT 0,
4960 plan_message_count INTEGER NOT NULL DEFAULT 0,
4961 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
4962 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4963 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
4964 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
4965 api_tokens_total INTEGER NOT NULL DEFAULT 0,
4966 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
4967 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
4968 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
4969 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
4970 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
4971 last_updated INTEGER NOT NULL DEFAULT 0,
4972 plan_content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4973 plan_api_tokens_total INTEGER NOT NULL DEFAULT 0,
4974 PRIMARY KEY (day_id, agent_slug, workspace_id, source_id)
4975);
4976
4977CREATE TABLE IF NOT EXISTS usage_models_daily (
4978 day_id INTEGER NOT NULL,
4979 agent_slug TEXT NOT NULL,
4980 workspace_id INTEGER NOT NULL DEFAULT 0,
4981 source_id TEXT NOT NULL DEFAULT 'local',
4982 model_family TEXT NOT NULL DEFAULT 'unknown',
4983 model_tier TEXT NOT NULL DEFAULT 'unknown',
4984 message_count INTEGER NOT NULL DEFAULT 0,
4985 user_message_count INTEGER NOT NULL DEFAULT 0,
4986 assistant_message_count INTEGER NOT NULL DEFAULT 0,
4987 tool_call_count INTEGER NOT NULL DEFAULT 0,
4988 plan_message_count INTEGER NOT NULL DEFAULT 0,
4989 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
4990 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4991 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
4992 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
4993 api_tokens_total INTEGER NOT NULL DEFAULT 0,
4994 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
4995 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
4996 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
4997 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
4998 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
4999 last_updated INTEGER NOT NULL DEFAULT 0,
5000 PRIMARY KEY (day_id, agent_slug, workspace_id, source_id, model_family, model_tier)
5001);
5002
5003CREATE INDEX IF NOT EXISTS idx_mm_hour ON message_metrics(hour_id);
5004CREATE INDEX IF NOT EXISTS idx_mm_day ON message_metrics(day_id);
5005CREATE INDEX IF NOT EXISTS idx_mm_agent_hour ON message_metrics(agent_slug, hour_id);
5006CREATE INDEX IF NOT EXISTS idx_mm_agent_day ON message_metrics(agent_slug, day_id);
5007CREATE INDEX IF NOT EXISTS idx_mm_workspace_hour ON message_metrics(workspace_id, hour_id);
5008CREATE INDEX IF NOT EXISTS idx_mm_source_hour ON message_metrics(source_id, hour_id);
5009CREATE INDEX IF NOT EXISTS idx_mm_model_family_day ON message_metrics(model_family, day_id);
5010CREATE INDEX IF NOT EXISTS idx_mm_provider_day ON message_metrics(provider, day_id);
5011CREATE INDEX IF NOT EXISTS idx_uh_agent ON usage_hourly(agent_slug, hour_id);
5012CREATE INDEX IF NOT EXISTS idx_uh_workspace ON usage_hourly(workspace_id, hour_id);
5013CREATE INDEX IF NOT EXISTS idx_uh_source ON usage_hourly(source_id, hour_id);
5014CREATE INDEX IF NOT EXISTS idx_ud_agent ON usage_daily(agent_slug, day_id);
5015CREATE INDEX IF NOT EXISTS idx_ud_workspace ON usage_daily(workspace_id, day_id);
5016CREATE INDEX IF NOT EXISTS idx_ud_source ON usage_daily(source_id, day_id);
5017CREATE INDEX IF NOT EXISTS idx_umd_model_day ON usage_models_daily(model_family, day_id);
5018CREATE INDEX IF NOT EXISTS idx_umd_agent_day ON usage_models_daily(agent_slug, day_id);
5019CREATE INDEX IF NOT EXISTS idx_umd_workspace_day ON usage_models_daily(workspace_id, day_id);
5020CREATE INDEX IF NOT EXISTS idx_umd_source_day ON usage_models_daily(source_id, day_id);
5021";
5022
5023const CURRENT_SCHEMA_REPAIR_BATCHES: &[SchemaRepairBatch] = &[
5024 SchemaRepairBatch {
5025 name: "sources",
5026 tables: &["sources"],
5027 sql: CURRENT_SCHEMA_REPAIR_SOURCES_SQL,
5028 },
5029 SchemaRepairBatch {
5030 name: "daily_stats",
5031 tables: &["daily_stats"],
5032 sql: CURRENT_SCHEMA_REPAIR_DAILY_STATS_SQL,
5033 },
5034 SchemaRepairBatch {
5035 name: "conversation_external_lookup",
5036 tables: &["conversation_external_lookup"],
5037 sql: CURRENT_SCHEMA_REPAIR_CONVERSATION_EXTERNAL_LOOKUP_SQL,
5038 },
5039 SchemaRepairBatch {
5040 name: "conversation_external_tail_lookup",
5041 tables: &[
5042 "conversation_tail_state",
5043 "conversation_external_tail_lookup",
5044 ],
5045 sql: CURRENT_SCHEMA_REPAIR_CONVERSATION_EXTERNAL_TAIL_LOOKUP_SQL,
5046 },
5047 SchemaRepairBatch {
5048 name: "embedding_jobs",
5049 tables: &["embedding_jobs"],
5050 sql: CURRENT_SCHEMA_REPAIR_EMBEDDING_JOBS_SQL,
5051 },
5052 SchemaRepairBatch {
5053 name: "token_analytics",
5054 tables: &["token_usage", "token_daily_stats", "model_pricing"],
5055 sql: CURRENT_SCHEMA_REPAIR_TOKEN_ANALYTICS_SQL,
5056 },
5057 SchemaRepairBatch {
5058 name: "message_rollups",
5059 tables: &[
5060 "message_metrics",
5061 "usage_hourly",
5062 "usage_daily",
5063 "usage_models_daily",
5064 ],
5065 sql: CURRENT_SCHEMA_REPAIR_MESSAGE_METRICS_SQL,
5066 },
5067];
5068
5069fn current_schema_repair_batches_for_missing_tables(
5070 missing_tables: &[&'static str],
5071) -> Result<Vec<&'static SchemaRepairBatch>> {
5072 let missing_set: HashSet<&'static str> = missing_tables.iter().copied().collect();
5073 let mut selected_batches = Vec::new();
5074 let mut covered_tables = HashSet::new();
5075
5076 for batch in CURRENT_SCHEMA_REPAIR_BATCHES {
5077 if !batch
5078 .tables
5079 .iter()
5080 .any(|table_name| missing_set.contains(table_name))
5081 {
5082 continue;
5083 }
5084 selected_batches.push(batch);
5085 covered_tables.extend(batch.tables.iter().copied());
5086 }
5087
5088 for &table_name in missing_tables {
5089 if !covered_tables.contains(table_name) {
5090 return Err(anyhow!(
5091 "no current-schema repair batch registered for missing table {table_name}"
5092 ));
5093 }
5094 }
5095
5096 Ok(selected_batches)
5097}
5098
5099const MIGRATION_NAMES: [(i64, &str); 20] = [
5101 (1, "core_tables"),
5102 (2, "fts_messages"),
5103 (3, "fts_messages_rebuild"),
5104 (4, "sources"),
5105 (5, "provenance_columns"),
5106 (6, "source_path_index"),
5107 (7, "msgpack_columns"),
5108 (8, "daily_stats"),
5109 (9, "embedding_jobs"),
5110 (10, "token_analytics"),
5111 (11, "message_metrics"),
5112 (12, "model_dimensions"),
5113 (13, "plan_token_rollups"),
5114 (14, "fts_contentless"),
5115 (15, "conversation_tail_state_cache"),
5116 (16, "drop_redundant_message_conv_idx"),
5117 (17, "drop_message_created_idx"),
5118 (18, "conversation_tail_state_hot_table"),
5119 (19, "conversation_external_lookup"),
5120 (20, "conversation_external_tail_lookup"),
5121];
5122
5123fn transition_from_meta_version(conn: &FrankenConnection) -> Result<()> {
5139 if conn
5143 .query("SELECT version FROM \"_schema_migrations\";")
5144 .is_ok()
5145 {
5146 return Ok(());
5147 }
5148
5149 if conn.query("SELECT key FROM meta;").is_err() {
5151 return Ok(());
5153 }
5154
5155 let rows = conn
5157 .query("SELECT value FROM meta WHERE key = 'schema_version';")
5158 .with_context(|| "reading schema_version from meta")?;
5159
5160 let current_version: i64 = rows
5161 .first()
5162 .and_then(|row| row.get_typed::<String>(0).ok())
5163 .and_then(|s| s.parse().ok())
5164 .unwrap_or(0);
5165
5166 if current_version == 0 {
5167 if conn.query("SELECT id FROM conversations LIMIT 1;").is_err() {
5169 return Ok(());
5171 }
5172
5173 info!("meta.schema_version=0 but tables exist; skipping transition (corrupted state)");
5176 return Ok(());
5177 }
5178
5179 info!(
5181 current_version,
5182 "transitioning schema tracking from meta table to _schema_migrations"
5183 );
5184
5185 conn.execute(
5186 "CREATE TABLE IF NOT EXISTS _schema_migrations (\
5187 version INTEGER PRIMARY KEY, \
5188 name TEXT NOT NULL, \
5189 applied_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now'))\
5190 );",
5191 )
5192 .with_context(|| "creating _schema_migrations table for transition")?;
5193
5194 for &(version, name) in &MIGRATION_NAMES {
5195 if version > current_version {
5196 break;
5197 }
5198 conn.execute_compat(
5199 "INSERT INTO _schema_migrations (version, name) VALUES (?1, ?2);",
5200 &[ParamValue::from(version), ParamValue::from(name)],
5201 )
5202 .with_context(|| format!("backfilling _schema_migrations version {version}"))?;
5203 }
5204
5205 info!(
5206 current_version,
5207 "schema version transition complete: backfilled entries for versions 1..={current_version}"
5208 );
5209
5210 Ok(())
5211}
5212
5213const REQUIRED_CURRENT_SCHEMA_TABLE_PROBES: &[(&str, &str)] = &[
5214 ("sources", "SELECT id FROM sources LIMIT 1;"),
5215 ("daily_stats", "SELECT day_id FROM daily_stats LIMIT 1;"),
5216 (
5217 "conversation_external_lookup",
5218 "SELECT lookup_key FROM conversation_external_lookup LIMIT 1;",
5219 ),
5220 (
5221 "conversation_tail_state",
5222 "SELECT conversation_id FROM conversation_tail_state LIMIT 1;",
5223 ),
5224 (
5225 "conversation_external_tail_lookup",
5226 "SELECT lookup_key, last_message_idx FROM conversation_external_tail_lookup LIMIT 1;",
5227 ),
5228 ("embedding_jobs", "SELECT id FROM embedding_jobs LIMIT 1;"),
5229 ("token_usage", "SELECT id FROM token_usage LIMIT 1;"),
5230 (
5231 "token_daily_stats",
5232 "SELECT day_id FROM token_daily_stats LIMIT 1;",
5233 ),
5234 (
5235 "model_pricing",
5236 "SELECT model_pattern FROM model_pricing LIMIT 1;",
5237 ),
5238 (
5239 "message_metrics",
5240 "SELECT message_id FROM message_metrics LIMIT 1;",
5241 ),
5242 ("usage_hourly", "SELECT hour_id FROM usage_hourly LIMIT 1;"),
5243 ("usage_daily", "SELECT day_id FROM usage_daily LIMIT 1;"),
5244 (
5245 "usage_models_daily",
5246 "SELECT day_id FROM usage_models_daily LIMIT 1;",
5247 ),
5248];
5249
5250const REQUIRED_CONVERSATION_TOKEN_COLUMNS: &[(&str, &str)] = &[
5251 ("total_input_tokens", "INTEGER"),
5252 ("total_output_tokens", "INTEGER"),
5253 ("total_cache_read_tokens", "INTEGER"),
5254 ("total_cache_creation_tokens", "INTEGER"),
5255 ("grand_total_tokens", "INTEGER"),
5256 ("estimated_cost_usd", "REAL"),
5257 ("primary_model", "TEXT"),
5258 ("api_call_count", "INTEGER"),
5259 ("tool_call_count", "INTEGER"),
5260 ("user_message_count", "INTEGER"),
5261 ("assistant_message_count", "INTEGER"),
5262];
5263
5264fn error_indicates_missing_table(err: &impl std::fmt::Display) -> bool {
5265 err.to_string()
5266 .to_ascii_lowercase()
5267 .contains("no such table")
5268}
5269
5270fn error_indicates_missing_column(err: &impl std::fmt::Display) -> bool {
5271 err.to_string()
5272 .to_ascii_lowercase()
5273 .contains("no such column")
5274}
5275
5276const ORPHAN_FK_ID_CHUNK_SIZE: usize = 256;
5277
5278fn collect_orphan_message_ids(conn: &FrankenConnection) -> Result<Vec<i64>> {
5279 let min_conversation_id = conn
5280 .query_map_collect(
5281 "SELECT conversation_id
5282 FROM messages
5283 ORDER BY conversation_id ASC
5284 LIMIT 1",
5285 fparams![],
5286 |row| row.get_typed(0),
5287 )
5288 .context("finding minimum message conversation id for orphan FK cleanup")?
5289 .into_iter()
5290 .next();
5291 let Some(min_conversation_id) = min_conversation_id else {
5292 return Ok(Vec::new());
5293 };
5294 let max_conversation_id: i64 = conn
5295 .query_row_map(
5296 "SELECT conversation_id
5297 FROM messages
5298 ORDER BY conversation_id DESC
5299 LIMIT 1",
5300 fparams![],
5301 |row| row.get_typed(0),
5302 )
5303 .context("finding maximum message conversation id for orphan FK cleanup")?;
5304
5305 let parent_conversation_ids: Vec<i64> = conn
5306 .query_map_collect(
5307 "SELECT id
5308 FROM conversations
5309 WHERE id BETWEEN ?1 AND ?2
5310 ORDER BY id",
5311 fparams![min_conversation_id, max_conversation_id],
5312 |row| row.get_typed(0),
5313 )
5314 .context("listing parent conversation ids for orphan FK cleanup")?;
5315
5316 let mut message_ids = Vec::new();
5317 let mut gap_start = min_conversation_id;
5318 for parent_id in parent_conversation_ids {
5319 if parent_id < gap_start {
5320 continue;
5321 }
5322 if parent_id > max_conversation_id {
5323 break;
5324 }
5325 if gap_start < parent_id {
5326 collect_message_ids_for_conversation_gap(
5327 conn,
5328 gap_start,
5329 parent_id.saturating_sub(1),
5330 &mut message_ids,
5331 )?;
5332 }
5333 if parent_id == i64::MAX {
5334 return Ok(message_ids);
5335 }
5336 gap_start = parent_id + 1;
5337 }
5338 if gap_start <= max_conversation_id {
5339 collect_message_ids_for_conversation_gap(
5340 conn,
5341 gap_start,
5342 max_conversation_id,
5343 &mut message_ids,
5344 )?;
5345 }
5346
5347 Ok(message_ids)
5348}
5349
5350fn collect_message_ids_for_conversation_gap(
5351 conn: &FrankenConnection,
5352 gap_start: i64,
5353 gap_end: i64,
5354 message_ids: &mut Vec<i64>,
5355) -> Result<()> {
5356 let (sql, params) = if gap_start == gap_end {
5357 (
5358 "SELECT id FROM messages WHERE conversation_id = ?1",
5359 vec![SqliteValue::from(gap_start)],
5360 )
5361 } else {
5362 (
5363 "SELECT id FROM messages WHERE conversation_id BETWEEN ?1 AND ?2",
5364 vec![SqliteValue::from(gap_start), SqliteValue::from(gap_end)],
5365 )
5366 };
5367 let rows = conn.query_with_params(sql, ¶ms).with_context(|| {
5368 format!("listing orphan message ids for conversation-id gap {gap_start}..={gap_end}")
5369 })?;
5370 message_ids.reserve(rows.len());
5371 for row in rows {
5372 message_ids.push(row.get_typed(0)?);
5373 }
5374 Ok(())
5375}
5376
5377fn delete_rows_by_i64_chunks(
5378 tx: &FrankenTransaction<'_>,
5379 delete_sql: &'static str,
5380 ids: &[i64],
5381) -> Result<usize> {
5382 let mut deleted = 0;
5383 for chunk in ids.chunks(ORPHAN_FK_ID_CHUNK_SIZE) {
5384 for id in chunk {
5385 deleted += tx.execute_with_params(delete_sql, &[SqliteValue::from(*id)])?;
5386 }
5387 }
5388 Ok(deleted)
5389}
5390
5391fn delete_orphan_message_ids_bisecting_oom(conn: &FrankenConnection, ids: &[i64]) -> Result<usize> {
5392 let mut deleted = 0usize;
5393 for chunk in ids.chunks(ORPHAN_FK_ID_CHUNK_SIZE) {
5394 deleted = deleted.saturating_add(delete_orphan_message_id_chunk(conn, chunk)?);
5395 }
5396 Ok(deleted)
5397}
5398
5399fn delete_orphan_message_id_chunk(conn: &FrankenConnection, ids: &[i64]) -> Result<usize> {
5400 if ids.is_empty() {
5401 return Ok(0);
5402 }
5403
5404 match delete_orphan_message_id_chunk_once(conn, ids) {
5405 Ok(deleted) => Ok(deleted),
5406 Err(err) if is_out_of_memory_error(&err) && ids.len() > 1 => {
5407 let split_at = ids.len() / 2;
5408 tracing::warn!(
5409 target: "cass::fk_repair",
5410 rows = ids.len(),
5411 left = split_at,
5412 right = ids.len().saturating_sub(split_at),
5413 error = %err,
5414 "orphan-message cleanup ran out of memory; retrying as smaller batches"
5415 );
5416 let left = delete_orphan_message_id_chunk(conn, &ids[..split_at])?;
5417 let right = delete_orphan_message_id_chunk(conn, &ids[split_at..])?;
5418 Ok(left.saturating_add(right))
5419 }
5420 Err(err) => Err(err),
5421 }
5422}
5423
5424fn delete_orphan_message_id_chunk_once(conn: &FrankenConnection, ids: &[i64]) -> Result<usize> {
5425 let mut tx = conn.transaction()?;
5426 let mut deleted = 0usize;
5427 for entry in ORPHAN_MESSAGE_DEPENDENT_TABLES {
5428 match delete_rows_by_i64_chunks(&tx, entry.delete_sql, ids) {
5429 Ok(count) => {
5430 deleted = deleted.saturating_add(count);
5431 }
5432 Err(err) if error_indicates_missing_table(&err) => {
5433 tracing::debug!(
5434 target: "cass::fk_repair",
5435 child_table = entry.child_table,
5436 error = %err,
5437 "skipping orphan-message dependent cleanup (table unavailable)"
5438 );
5439 }
5440 Err(err) => {
5441 return Err(err).with_context(|| {
5442 format!(
5443 "deleting rows from {} that depend on orphan messages",
5444 entry.child_table
5445 )
5446 });
5447 }
5448 }
5449 }
5450 deleted = deleted.saturating_add(
5451 delete_rows_by_i64_chunks(&tx, "DELETE FROM messages WHERE id = ?1", ids)
5452 .context("deleting orphan rows from messages")?,
5453 );
5454 tx.commit()?;
5455 Ok(deleted)
5456}
5457
5458fn collect_direct_orphan_id_page(
5459 conn: &FrankenConnection,
5460 entry: &'static OrphanFkTable,
5461) -> Result<Vec<i64>> {
5462 Ok(conn.query_map_collect(
5463 entry.orphan_id_page_sql,
5464 fparams![i64::try_from(ORPHAN_FK_ID_CHUNK_SIZE).unwrap_or(i64::MAX)],
5465 |row| row.get_typed(0),
5466 )?)
5467}
5468
5469fn delete_direct_orphan_ids_bisecting_oom(
5470 conn: &FrankenConnection,
5471 entry: &'static OrphanFkTable,
5472 ids: &[i64],
5473) -> Result<usize> {
5474 let mut deleted = 0usize;
5475 for chunk in ids.chunks(ORPHAN_FK_ID_CHUNK_SIZE) {
5476 deleted = deleted.saturating_add(delete_direct_orphan_id_chunk(conn, entry, chunk)?);
5477 }
5478 Ok(deleted)
5479}
5480
5481fn delete_direct_orphan_id_chunk(
5482 conn: &FrankenConnection,
5483 entry: &'static OrphanFkTable,
5484 ids: &[i64],
5485) -> Result<usize> {
5486 if ids.is_empty() {
5487 return Ok(0);
5488 }
5489
5490 match delete_direct_orphan_id_chunk_once(conn, entry, ids) {
5491 Ok(deleted) => Ok(deleted),
5492 Err(err) if is_out_of_memory_error(&err) && ids.len() > 1 => {
5493 let split_at = ids.len() / 2;
5494 tracing::warn!(
5495 target: "cass::fk_repair",
5496 child_table = entry.child_table,
5497 rows = ids.len(),
5498 left = split_at,
5499 right = ids.len().saturating_sub(split_at),
5500 error = %err,
5501 "direct orphan cleanup ran out of memory; retrying as smaller batches"
5502 );
5503 let left = delete_direct_orphan_id_chunk(conn, entry, &ids[..split_at])?;
5504 let right = delete_direct_orphan_id_chunk(conn, entry, &ids[split_at..])?;
5505 Ok(left.saturating_add(right))
5506 }
5507 Err(err) => Err(err),
5508 }
5509}
5510
5511fn delete_direct_orphan_id_chunk_once(
5512 conn: &FrankenConnection,
5513 entry: &'static OrphanFkTable,
5514 ids: &[i64],
5515) -> Result<usize> {
5516 let mut tx = conn.transaction()?;
5517 let deleted = delete_rows_by_i64_chunk_bulk(&tx, entry.delete_many_sql_prefix, ids)?;
5518 tx.commit()?;
5519 Ok(deleted)
5520}
5521
5522fn delete_rows_by_i64_chunk_bulk(
5523 tx: &FrankenTransaction<'_>,
5524 delete_many_sql_prefix: &'static str,
5525 ids: &[i64],
5526) -> Result<usize> {
5527 if ids.is_empty() {
5528 return Ok(0);
5529 }
5530
5531 let placeholders = (1..=ids.len())
5532 .map(|idx| format!("?{idx}"))
5533 .collect::<Vec<_>>()
5534 .join(", ");
5535 let sql = format!("{delete_many_sql_prefix} ({placeholders})");
5536 let params = ids
5537 .iter()
5538 .map(|id| SqliteValue::from(*id))
5539 .collect::<Vec<_>>();
5540 Ok(tx.execute_with_params(&sql, ¶ms)?)
5541}
5542
5543struct OrphanFkTable {
5549 child_table: &'static str,
5550 orphan_id_page_sql: &'static str,
5551 delete_many_sql_prefix: &'static str,
5552}
5553
5554const ORPHAN_DIRECT_CHILD_TABLES: &[OrphanFkTable] = &[
5555 OrphanFkTable {
5556 child_table: "message_metrics",
5557 orphan_id_page_sql: "SELECT message_id FROM message_metrics \
5558 WHERE message_id NOT IN (SELECT id FROM messages) \
5559 ORDER BY message_id \
5560 LIMIT ?1",
5561 delete_many_sql_prefix: "DELETE FROM message_metrics WHERE message_id IN",
5562 },
5563 OrphanFkTable {
5564 child_table: "token_usage",
5565 orphan_id_page_sql: "SELECT message_id FROM token_usage \
5566 WHERE message_id NOT IN (SELECT id FROM messages) \
5567 ORDER BY message_id \
5568 LIMIT ?1",
5569 delete_many_sql_prefix: "DELETE FROM token_usage WHERE message_id IN",
5570 },
5571 OrphanFkTable {
5572 child_table: "snippets",
5573 orphan_id_page_sql: "SELECT message_id FROM snippets \
5574 WHERE message_id NOT IN (SELECT id FROM messages) \
5575 ORDER BY message_id \
5576 LIMIT ?1",
5577 delete_many_sql_prefix: "DELETE FROM snippets WHERE message_id IN",
5578 },
5579 OrphanFkTable {
5580 child_table: "conversation_tags",
5581 orphan_id_page_sql: "SELECT conversation_id FROM conversation_tags \
5582 WHERE conversation_id NOT IN (SELECT id FROM conversations) \
5583 ORDER BY conversation_id \
5584 LIMIT ?1",
5585 delete_many_sql_prefix: "DELETE FROM conversation_tags WHERE conversation_id IN",
5586 },
5587];
5588
5589struct OrphanMessageDependentTable {
5590 child_table: &'static str,
5591 delete_sql: &'static str,
5592}
5593
5594const ORPHAN_MESSAGE_DEPENDENT_TABLES: &[OrphanMessageDependentTable] = &[
5595 OrphanMessageDependentTable {
5596 child_table: "message_metrics",
5597 delete_sql: "DELETE FROM message_metrics WHERE message_id = ?1",
5598 },
5599 OrphanMessageDependentTable {
5600 child_table: "token_usage",
5601 delete_sql: "DELETE FROM token_usage WHERE message_id = ?1",
5602 },
5603 OrphanMessageDependentTable {
5604 child_table: "snippets",
5605 delete_sql: "DELETE FROM snippets WHERE message_id = ?1",
5606 },
5607];
5608
5609#[derive(Debug, Default, Clone)]
5620pub(crate) struct OrphanFkCleanupReport {
5621 pub total: i64,
5622 pub per_table: Vec<(&'static str, i64)>,
5623}
5624
5625impl OrphanFkCleanupReport {
5626 fn record(&mut self, child_table: &'static str, count: i64) {
5627 if let Some((_, existing)) = self
5628 .per_table
5629 .iter_mut()
5630 .find(|(table, _)| *table == child_table)
5631 {
5632 *existing = existing.saturating_add(count);
5633 } else {
5634 self.per_table.push((child_table, count));
5635 }
5636 self.total = self.total.saturating_add(count);
5637 }
5638}
5639
5640pub struct InsertOutcome {
5641 pub conversation_id: i64,
5642 pub conversation_inserted: bool,
5643 pub inserted_indices: Vec<i64>,
5644}
5645
5646#[cfg(test)]
5647#[derive(Debug, Clone, Default)]
5648struct MessageInsertSubstageProfile {
5649 single_row_calls: usize,
5650 batch_calls: usize,
5651 batch_rows: usize,
5652 payload_duration: Duration,
5653 sql_build_duration: Duration,
5654 param_build_duration: Duration,
5655 execute_duration: Duration,
5656 rowid_duration: Duration,
5657}
5658
5659#[cfg(test)]
5660#[derive(Debug, Clone, Default)]
5661struct InsertConversationTreePerfProfile {
5662 invocations: usize,
5663 messages: usize,
5664 inserted_messages: usize,
5665 total_duration: Duration,
5666 source_duration: Duration,
5667 tx_open_duration: Duration,
5668 existing_lookup_duration: Duration,
5669 existing_idx_lookup_duration: Duration,
5670 existing_replay_lookup_duration: Duration,
5671 dedupe_filter_duration: Duration,
5672 conversation_row_duration: Duration,
5673 message_insert_duration: Duration,
5674 message_insert_breakdown: MessageInsertSubstageProfile,
5675 snippet_insert_duration: Duration,
5676 fts_entry_duration: Duration,
5677 fts_flush_duration: Duration,
5678 analytics_duration: Duration,
5679 commit_duration: Duration,
5680}
5681
5682#[cfg(test)]
5683impl InsertConversationTreePerfProfile {
5684 fn millis(duration: Duration) -> f64 {
5685 duration.as_secs_f64() * 1000.0
5686 }
5687
5688 fn log_summary(&self, label: &str) {
5689 let calls = self.invocations.max(1) as f64;
5690 let accounted_duration = self.source_duration
5691 + self.tx_open_duration
5692 + self.existing_lookup_duration
5693 + self.existing_idx_lookup_duration
5694 + self.existing_replay_lookup_duration
5695 + self.dedupe_filter_duration
5696 + self.conversation_row_duration
5697 + self.message_insert_duration
5698 + self.snippet_insert_duration
5699 + self.fts_entry_duration
5700 + self.fts_flush_duration
5701 + self.analytics_duration
5702 + self.commit_duration;
5703 let residual_duration = self.total_duration.saturating_sub(accounted_duration);
5704 eprintln!(
5705 concat!(
5706 "CASS_INSERT_TREE_STAGE_PROFILE ",
5707 "label={} calls={} messages={} inserted_messages={} ",
5708 "total_ms={:.3} source_ms={:.3} tx_open_ms={:.3} existing_lookup_ms={:.3} ",
5709 "existing_idx_lookup_ms={:.3} existing_replay_lookup_ms={:.3} dedupe_filter_ms={:.3} ",
5710 "conversation_row_ms={:.3} message_insert_ms={:.3} snippet_insert_ms={:.3} ",
5711 "fts_entry_ms={:.3} fts_flush_ms={:.3} analytics_ms={:.3} commit_ms={:.3} ",
5712 "msg_payload_ms={:.3} msg_sql_ms={:.3} msg_param_ms={:.3} msg_execute_ms={:.3} msg_rowid_ms={:.3} ",
5713 "residual_ms={:.3} avg_total_ms={:.3} avg_message_insert_ms={:.3} ",
5714 "avg_msg_execute_ms={:.3} avg_msg_payload_ms={:.3} avg_snippet_insert_ms={:.3} avg_fts_entry_ms={:.3} avg_commit_ms={:.3}"
5715 ),
5716 label,
5717 self.invocations,
5718 self.messages,
5719 self.inserted_messages,
5720 Self::millis(self.total_duration),
5721 Self::millis(self.source_duration),
5722 Self::millis(self.tx_open_duration),
5723 Self::millis(self.existing_lookup_duration),
5724 Self::millis(self.existing_idx_lookup_duration),
5725 Self::millis(self.existing_replay_lookup_duration),
5726 Self::millis(self.dedupe_filter_duration),
5727 Self::millis(self.conversation_row_duration),
5728 Self::millis(self.message_insert_duration),
5729 Self::millis(self.snippet_insert_duration),
5730 Self::millis(self.fts_entry_duration),
5731 Self::millis(self.fts_flush_duration),
5732 Self::millis(self.analytics_duration),
5733 Self::millis(self.commit_duration),
5734 Self::millis(self.message_insert_breakdown.payload_duration),
5735 Self::millis(self.message_insert_breakdown.sql_build_duration),
5736 Self::millis(self.message_insert_breakdown.param_build_duration),
5737 Self::millis(self.message_insert_breakdown.execute_duration),
5738 Self::millis(self.message_insert_breakdown.rowid_duration),
5739 Self::millis(residual_duration),
5740 Self::millis(self.total_duration) / calls,
5741 Self::millis(self.message_insert_duration) / calls,
5742 Self::millis(self.message_insert_breakdown.execute_duration) / calls,
5743 Self::millis(self.message_insert_breakdown.payload_duration) / calls,
5744 Self::millis(self.snippet_insert_duration) / calls,
5745 Self::millis(self.fts_entry_duration) / calls,
5746 Self::millis(self.commit_duration) / calls,
5747 );
5748 }
5749}
5750
5751#[derive(Debug, Clone, PartialEq, Eq, Hash)]
5752enum PendingConversationKey {
5753 External {
5754 source_id: String,
5755 agent_id: i64,
5756 external_id: String,
5757 },
5758 SourcePath {
5759 source_id: String,
5760 agent_id: i64,
5761 source_path: String,
5762 started_at: Option<i64>,
5763 },
5764}
5765
5766fn conversation_external_lookup_key(source_id: &str, agent_id: i64, external_id: &str) -> String {
5767 format!(
5768 "{}:{source_id}:{agent_id}:{}:{external_id}",
5769 source_id.chars().count(),
5770 external_id.chars().count()
5771 )
5772}
5773
5774fn conversation_external_lookup_key_for_conv(agent_id: i64, conv: &Conversation) -> Option<String> {
5775 conv.external_id
5776 .as_deref()
5777 .map(|external_id| conversation_external_lookup_key(&conv.source_id, agent_id, external_id))
5778}
5779
5780#[derive(Debug, Clone, PartialEq, Eq, Hash)]
5781struct MessageMergeFingerprint {
5782 idx: i64,
5783 created_at: Option<i64>,
5784 role: MessageRole,
5785 author: Option<String>,
5786 content_hash: [u8; 32],
5787}
5788
5789#[derive(Debug, Clone, PartialEq, Eq, Hash)]
5790struct MessageReplayFingerprint {
5791 created_at: Option<i64>,
5792 role: MessageRole,
5793 author: Option<String>,
5794 content_hash: [u8; 32],
5795}
5796
5797#[derive(Debug, Clone, Copy, PartialEq, Eq)]
5798struct ConversationMergeEvidence {
5799 exact_overlap: usize,
5800 replay_overlap: usize,
5801 smaller_replay_set: usize,
5802 started_close: bool,
5803 start_distance_ms: i64,
5804}
5805
5806struct ExistingConversationNewMessages<'a> {
5807 messages: Vec<&'a Message>,
5808 new_chars: i64,
5809 idx_collision_count: usize,
5810 first_collision_idx: Option<i64>,
5811}
5812
5813#[derive(Debug, Clone, Copy)]
5814struct ExistingConversationTailState {
5815 last_message_idx: i64,
5816 last_message_created_at: i64,
5817 ended_at: Option<i64>,
5818}
5819
5820#[derive(Debug, Clone, Copy)]
5821struct ExistingConversationWithTail {
5822 id: i64,
5823 tail_state: Option<ExistingConversationTailState>,
5824}
5825
5826fn conversation_effective_started_at(conv: &Conversation) -> Option<i64> {
5827 conv.started_at
5828 .or_else(|| conv.messages.iter().filter_map(|msg| msg.created_at).min())
5829}
5830
5831fn conversation_tail_state(conv: &Conversation) -> (Option<i64>, Option<i64>) {
5832 (
5833 conv.messages.iter().map(|msg| msg.idx).max(),
5834 conv.messages.iter().filter_map(|msg| msg.created_at).max(),
5835 )
5836}
5837
5838fn borrowed_messages_tail_state(messages: &[&Message]) -> (Option<i64>, Option<i64>) {
5839 (
5840 messages.iter().map(|msg| msg.idx).max(),
5841 messages.iter().filter_map(|msg| msg.created_at).max(),
5842 )
5843}
5844
5845fn role_from_str(role: &str) -> MessageRole {
5846 match role {
5847 "user" => MessageRole::User,
5848 "agent" | "assistant" => MessageRole::Agent,
5849 "tool" => MessageRole::Tool,
5850 "system" => MessageRole::System,
5851 other => MessageRole::Other(other.to_string()),
5852 }
5853}
5854
5855fn message_merge_fingerprint(msg: &Message) -> MessageMergeFingerprint {
5856 MessageMergeFingerprint {
5857 idx: msg.idx,
5858 created_at: msg.created_at,
5859 role: msg.role.clone(),
5860 author: msg.author.clone(),
5861 content_hash: *blake3::hash(msg.content.as_bytes()).as_bytes(),
5862 }
5863}
5864
5865fn message_replay_fingerprint(msg: &Message) -> MessageReplayFingerprint {
5866 MessageReplayFingerprint {
5867 created_at: msg.created_at,
5868 role: msg.role.clone(),
5869 author: msg.author.clone(),
5870 content_hash: *blake3::hash(msg.content.as_bytes()).as_bytes(),
5871 }
5872}
5873
5874fn conversation_message_fingerprints(conv: &Conversation) -> HashSet<MessageMergeFingerprint> {
5875 conv.messages
5876 .iter()
5877 .map(message_merge_fingerprint)
5878 .collect()
5879}
5880
5881fn conversation_message_replay_fingerprints(
5882 conv: &Conversation,
5883) -> HashSet<MessageReplayFingerprint> {
5884 conv.messages
5885 .iter()
5886 .map(message_replay_fingerprint)
5887 .collect()
5888}
5889
5890fn replay_fingerprint_from_merge(
5891 fingerprint: &MessageMergeFingerprint,
5892) -> MessageReplayFingerprint {
5893 MessageReplayFingerprint {
5894 created_at: fingerprint.created_at,
5895 role: fingerprint.role.clone(),
5896 author: fingerprint.author.clone(),
5897 content_hash: fingerprint.content_hash,
5898 }
5899}
5900
5901fn replay_fingerprints_from_merge_set(
5902 fingerprints: &HashSet<MessageMergeFingerprint>,
5903) -> HashSet<MessageReplayFingerprint> {
5904 fingerprints
5905 .iter()
5906 .map(replay_fingerprint_from_merge)
5907 .collect()
5908}
5909
5910fn collect_new_messages_for_existing_conversation<'a>(
5911 conversation_id: i64,
5912 conv: &'a Conversation,
5913 existing_messages: &mut HashMap<i64, MessageMergeFingerprint>,
5914 existing_replay_fingerprints: &mut HashSet<MessageReplayFingerprint>,
5915 replay_skip_log: &'static str,
5916) -> ExistingConversationNewMessages<'a> {
5917 let mut idx_collision_count = 0usize;
5918 let mut first_collision_idx: Option<i64> = None;
5919 let mut new_chars: i64 = 0;
5920 let mut messages = Vec::new();
5921
5922 for msg in &conv.messages {
5923 let incoming_fingerprint = message_merge_fingerprint(msg);
5924 if let Some(existing_fingerprint) = existing_messages.get(&msg.idx) {
5925 if existing_fingerprint != &incoming_fingerprint {
5926 idx_collision_count = idx_collision_count.saturating_add(1);
5927 first_collision_idx.get_or_insert(msg.idx);
5928 }
5929 continue;
5930 }
5931
5932 let incoming_replay = replay_fingerprint_from_merge(&incoming_fingerprint);
5933 if existing_replay_fingerprints.contains(&incoming_replay) {
5934 tracing::debug!(
5935 conversation_id,
5936 idx = msg.idx,
5937 source_path = %conv.source_path.display(),
5938 "{replay_skip_log}"
5939 );
5940 continue;
5941 }
5942
5943 existing_messages.insert(msg.idx, incoming_fingerprint);
5944 existing_replay_fingerprints.insert(incoming_replay);
5945 new_chars += msg.content.len() as i64;
5946 messages.push(msg);
5947 }
5948
5949 ExistingConversationNewMessages {
5950 messages,
5951 new_chars,
5952 idx_collision_count,
5953 first_collision_idx,
5954 }
5955}
5956
5957fn franken_existing_conversation_append_tail_state(
5958 tx: &FrankenTransaction<'_>,
5959 conversation_id: i64,
5960) -> Result<Option<ExistingConversationTailState>> {
5961 let cached: Option<(Option<i64>, Option<i64>, Option<i64>)> = tx
5962 .query_row_map(
5963 "SELECT last_message_idx, last_message_created_at, ended_at
5964 FROM conversation_tail_state
5965 WHERE conversation_id = ?1",
5966 fparams![conversation_id],
5967 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
5968 )
5969 .optional()?;
5970 if let Some(cached) = cached {
5971 let (_, _, cached_ended_at) = cached;
5972 if let Some(tail_state) =
5973 existing_conversation_tail_state_from_cached(cached.0, cached.1, cached_ended_at)
5974 {
5975 return Ok(Some(tail_state));
5976 }
5977 }
5978
5979 let legacy_cached: (Option<i64>, Option<i64>, Option<i64>) = tx.query_row_map(
5980 "SELECT last_message_idx, last_message_created_at, ended_at
5981 FROM conversations
5982 WHERE id = ?1",
5983 fparams![conversation_id],
5984 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
5985 )?;
5986 let (_, _, cached_ended_at) = legacy_cached;
5987 if let Some(tail_state) = existing_conversation_tail_state_from_cached(
5988 legacy_cached.0,
5989 legacy_cached.1,
5990 cached_ended_at,
5991 ) {
5992 franken_insert_conversation_tail_state(
5993 tx,
5994 conversation_id,
5995 cached_ended_at,
5996 Some(tail_state.last_message_idx),
5997 Some(tail_state.last_message_created_at),
5998 )?;
5999 return Ok(Some(tail_state));
6000 }
6001
6002 let (max_idx, max_created_at): (Option<i64>, Option<i64>) = tx.query_row_map(
6003 "SELECT MAX(idx), MAX(created_at)
6004 FROM messages
6005 WHERE conversation_id = ?1",
6006 fparams![conversation_id],
6007 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
6008 )?;
6009 if let Some((last_message_idx, last_message_created_at)) = max_idx.zip(max_created_at) {
6010 franken_update_conversation_tail_state(
6011 tx,
6012 conversation_id,
6013 None,
6014 Some(last_message_idx),
6015 Some(last_message_created_at),
6016 )?;
6017 return Ok(Some(ExistingConversationTailState {
6018 last_message_idx,
6019 last_message_created_at,
6020 ended_at: cached_ended_at,
6021 }));
6022 }
6023 Ok(None)
6024}
6025
6026fn existing_conversation_tail_state_from_cached(
6027 last_message_idx: Option<i64>,
6028 last_message_created_at: Option<i64>,
6029 ended_at: Option<i64>,
6030) -> Option<ExistingConversationTailState> {
6031 let (last_message_idx, last_message_created_at) =
6032 last_message_idx.zip(last_message_created_at)?;
6033 Some(ExistingConversationTailState {
6034 last_message_idx,
6035 last_message_created_at,
6036 ended_at,
6037 })
6038}
6039
6040fn franken_find_existing_conversation_with_tail_by_key(
6041 tx: &FrankenTransaction<'_>,
6042 key: &PendingConversationKey,
6043 conv: Option<&Conversation>,
6044) -> Result<Option<ExistingConversationWithTail>> {
6045 if let PendingConversationKey::External {
6046 source_id,
6047 agent_id,
6048 external_id,
6049 } = key
6050 {
6051 let lookup_key = conversation_external_lookup_key(source_id, *agent_id, external_id);
6052 if let Some(existing) = franken_find_external_conversation_tail_lookup(tx, &lookup_key)? {
6053 return Ok(Some(existing));
6054 }
6055 return Ok(None);
6056 }
6057
6058 let Some(id) = franken_find_existing_conversation_by_key(tx, key, conv)? else {
6059 return Ok(None);
6060 };
6061 let tail_state = franken_existing_conversation_append_tail_state(tx, id)?;
6062 Ok(Some(ExistingConversationWithTail { id, tail_state }))
6063}
6064
6065fn franken_insert_conversation_tail_state(
6066 tx: &FrankenTransaction<'_>,
6067 conversation_id: i64,
6068 ended_at: Option<i64>,
6069 last_message_idx: Option<i64>,
6070 last_message_created_at: Option<i64>,
6071) -> Result<()> {
6072 if ended_at.is_none() && last_message_idx.is_none() && last_message_created_at.is_none() {
6073 return Ok(());
6074 }
6075 tx.execute_compat(
6076 "INSERT OR REPLACE INTO conversation_tail_state (
6077 conversation_id, ended_at, last_message_idx, last_message_created_at
6078 ) VALUES (?1, ?2, ?3, ?4)",
6079 fparams![
6080 conversation_id,
6081 ended_at,
6082 last_message_idx,
6083 last_message_created_at
6084 ],
6085 )?;
6086 Ok(())
6087}
6088
6089fn franken_update_conversation_tail_columns(
6090 tx: &FrankenTransaction<'_>,
6091 conversation_id: i64,
6092 ended_at_candidate: Option<i64>,
6093 last_message_idx_candidate: Option<i64>,
6094 last_message_created_at_candidate: Option<i64>,
6095) -> Result<()> {
6096 if ended_at_candidate.is_none()
6097 && last_message_idx_candidate.is_none()
6098 && last_message_created_at_candidate.is_none()
6099 {
6100 return Ok(());
6101 }
6102
6103 tx.execute_compat(
6104 "UPDATE conversations
6105 SET ended_at = CASE
6106 WHEN ?1 IS NULL THEN ended_at
6107 WHEN ended_at IS NULL OR ended_at < ?1 THEN ?1
6108 ELSE ended_at
6109 END,
6110 last_message_idx = CASE
6111 WHEN ?2 IS NULL THEN last_message_idx
6112 WHEN last_message_idx IS NULL OR last_message_idx < ?2 THEN ?2
6113 ELSE last_message_idx
6114 END,
6115 last_message_created_at = CASE
6116 WHEN ?3 IS NULL THEN last_message_created_at
6117 WHEN last_message_created_at IS NULL OR last_message_created_at < ?3 THEN ?3
6118 ELSE last_message_created_at
6119 END
6120 WHERE id = ?4",
6121 fparams![
6122 ended_at_candidate,
6123 last_message_idx_candidate,
6124 last_message_created_at_candidate,
6125 conversation_id
6126 ],
6127 )?;
6128 Ok(())
6129}
6130
6131fn franken_tail_state_insert_ended_at(
6132 tx: &FrankenTransaction<'_>,
6133 conversation_id: i64,
6134 candidate: Option<i64>,
6135) -> Result<Option<i64>> {
6136 let canonical: Option<i64> = tx
6137 .query_row_map(
6138 "SELECT ended_at FROM conversations WHERE id = ?1",
6139 fparams![conversation_id],
6140 |row| row.get_typed(0),
6141 )
6142 .optional()?
6143 .flatten();
6144 Ok(canonical.max(candidate))
6145}
6146
6147fn franken_update_conversation_tail_state(
6148 tx: &FrankenTransaction<'_>,
6149 conversation_id: i64,
6150 ended_at_candidate: Option<i64>,
6151 last_message_idx_candidate: Option<i64>,
6152 last_message_created_at_candidate: Option<i64>,
6153) -> Result<()> {
6154 if ended_at_candidate.is_none()
6155 && last_message_idx_candidate.is_none()
6156 && last_message_created_at_candidate.is_none()
6157 {
6158 return Ok(());
6159 }
6160
6161 let changed = tx.execute_compat(
6162 "UPDATE conversation_tail_state
6163 SET ended_at = CASE
6164 WHEN ?1 IS NULL THEN ended_at
6165 ELSE MAX(IFNULL(ended_at, 0), ?1)
6166 END,
6167 last_message_idx = CASE
6168 WHEN ?2 IS NULL THEN last_message_idx
6169 WHEN last_message_idx IS NULL OR last_message_idx < ?2 THEN ?2
6170 ELSE last_message_idx
6171 END,
6172 last_message_created_at = CASE
6173 WHEN ?3 IS NULL THEN last_message_created_at
6174 WHEN last_message_created_at IS NULL OR last_message_created_at < ?3 THEN ?3
6175 ELSE last_message_created_at
6176 END
6177 WHERE conversation_id = ?4",
6178 fparams![
6179 ended_at_candidate,
6180 last_message_idx_candidate,
6181 last_message_created_at_candidate,
6182 conversation_id
6183 ],
6184 )?;
6185 if changed == 0 {
6186 let insert_ended_at =
6187 franken_tail_state_insert_ended_at(tx, conversation_id, ended_at_candidate)?;
6188 franken_insert_conversation_tail_state(
6189 tx,
6190 conversation_id,
6191 insert_ended_at,
6192 last_message_idx_candidate,
6193 last_message_created_at_candidate,
6194 )?;
6195 }
6196 franken_update_conversation_tail_columns(
6197 tx,
6198 conversation_id,
6199 ended_at_candidate,
6200 last_message_idx_candidate,
6201 last_message_created_at_candidate,
6202 )?;
6203 Ok(())
6204}
6205
6206fn franken_set_conversation_tail_state_after_append(
6207 tx: &FrankenTransaction<'_>,
6208 conversation_id: i64,
6209 ended_at: i64,
6210 last_message_idx: i64,
6211 last_message_created_at: i64,
6212) -> Result<()> {
6213 let changed = tx.execute_compat(
6214 "UPDATE conversation_tail_state
6215 SET ended_at = ?1,
6216 last_message_idx = ?2,
6217 last_message_created_at = ?3
6218 WHERE conversation_id = ?4",
6219 fparams![
6220 ended_at,
6221 last_message_idx,
6222 last_message_created_at,
6223 conversation_id
6224 ],
6225 )?;
6226 if changed == 0 {
6227 let insert_ended_at =
6228 franken_tail_state_insert_ended_at(tx, conversation_id, Some(ended_at))?;
6229 franken_insert_conversation_tail_state(
6230 tx,
6231 conversation_id,
6232 insert_ended_at,
6233 Some(last_message_idx),
6234 Some(last_message_created_at),
6235 )?;
6236 }
6237 franken_update_conversation_tail_columns(
6238 tx,
6239 conversation_id,
6240 Some(ended_at),
6241 Some(last_message_idx),
6242 Some(last_message_created_at),
6243 )?;
6244 Ok(())
6245}
6246
6247fn collect_append_only_tail_messages<'a>(
6248 conv: &'a Conversation,
6249 existing_max_idx: i64,
6250 existing_max_created_at: i64,
6251) -> Option<ExistingConversationNewMessages<'a>> {
6252 if conv.messages.is_empty() {
6253 return Some(ExistingConversationNewMessages {
6254 messages: Vec::new(),
6255 new_chars: 0,
6256 idx_collision_count: 0,
6257 first_collision_idx: None,
6258 });
6259 }
6260
6261 let mut split_idx = None;
6262 let mut prev_idx = None;
6263 for (pos, msg) in conv.messages.iter().enumerate() {
6264 if prev_idx.is_some_and(|prev| msg.idx < prev) {
6265 return None;
6266 }
6267 prev_idx = Some(msg.idx);
6268 if split_idx.is_none() && msg.idx > existing_max_idx {
6269 split_idx = Some(pos);
6270 }
6271 }
6272 let split_idx = split_idx?;
6273
6274 let mut seen_tail_idx = HashSet::new();
6275 let mut seen_tail_replay = HashSet::new();
6276 let mut new_chars = 0i64;
6277 let mut messages = Vec::new();
6278 for msg in &conv.messages[split_idx..] {
6279 let created_at = msg.created_at?;
6280 if created_at <= existing_max_created_at {
6281 return None;
6282 }
6283
6284 if !seen_tail_idx.insert(msg.idx) {
6285 return None;
6286 }
6287
6288 let replay_fingerprint = message_replay_fingerprint(msg);
6289 if !seen_tail_replay.insert(replay_fingerprint) {
6290 return None;
6291 }
6292
6293 new_chars += msg.content.len() as i64;
6294 messages.push(msg);
6295 }
6296
6297 Some(ExistingConversationNewMessages {
6298 messages,
6299 new_chars,
6300 idx_collision_count: 0,
6301 first_collision_idx: None,
6302 })
6303}
6304
6305fn start_distance_ms(left: Option<i64>, right: Option<i64>) -> i64 {
6306 match (left, right) {
6307 (Some(left), Some(right)) => (i128::from(left) - i128::from(right))
6308 .abs()
6309 .try_into()
6310 .unwrap_or(i64::MAX),
6311 _ => i64::MAX,
6312 }
6313}
6314
6315fn conversation_merge_evidence(
6316 incoming_exact: &HashSet<MessageMergeFingerprint>,
6317 incoming_replay: &HashSet<MessageReplayFingerprint>,
6318 existing_exact: &HashSet<MessageMergeFingerprint>,
6319 existing_replay: &HashSet<MessageReplayFingerprint>,
6320 incoming_started_at: Option<i64>,
6321 existing_started_at: Option<i64>,
6322) -> Option<ConversationMergeEvidence> {
6323 let exact_overlap = incoming_exact.intersection(existing_exact).count();
6324 let replay_overlap = incoming_replay.intersection(existing_replay).count();
6325 if exact_overlap == 0 && replay_overlap == 0 {
6326 return None;
6327 }
6328
6329 let smaller_replay_set = incoming_replay.len().min(existing_replay.len());
6330 let started_close = timestamps_within_tolerance(
6331 incoming_started_at,
6332 existing_started_at,
6333 SOURCE_PATH_MERGE_START_TOLERANCE_MS,
6334 );
6335 let full_replay_subset_match = smaller_replay_set >= 2 && replay_overlap == smaller_replay_set;
6336
6337 let merge_allowed = if started_close {
6338 exact_overlap >= 1 || replay_overlap >= 2
6339 } else {
6340 exact_overlap >= 2 || full_replay_subset_match
6341 };
6342
6343 merge_allowed.then_some(ConversationMergeEvidence {
6344 exact_overlap,
6345 replay_overlap,
6346 smaller_replay_set,
6347 started_close,
6348 start_distance_ms: start_distance_ms(incoming_started_at, existing_started_at),
6349 })
6350}
6351
6352fn timestamps_within_tolerance(left: Option<i64>, right: Option<i64>, tolerance_ms: i64) -> bool {
6353 match (left, right) {
6354 (Some(left), Some(right)) => {
6355 (i128::from(left) - i128::from(right)).abs() <= i128::from(tolerance_ms)
6356 }
6357 _ => false,
6358 }
6359}
6360
6361fn conversation_merge_key(agent_id: i64, conv: &Conversation) -> PendingConversationKey {
6362 if let Some(external_id) = conv.external_id.clone() {
6363 PendingConversationKey::External {
6364 source_id: conv.source_id.clone(),
6365 agent_id,
6366 external_id,
6367 }
6368 } else {
6369 PendingConversationKey::SourcePath {
6370 source_id: conv.source_id.clone(),
6371 agent_id,
6372 source_path: path_to_string(&conv.source_path),
6373 started_at: conversation_effective_started_at(conv),
6374 }
6375 }
6376}
6377
6378pub struct MessageForEmbedding {
6380 pub message_id: i64,
6381 pub created_at: Option<i64>,
6382 pub agent_id: i64,
6383 pub workspace_id: Option<i64>,
6384 pub source_id_hash: u32,
6385 pub role: String,
6386 pub content: String,
6387}
6388
6389impl FrankenStorage {
6394 pub fn ensure_agent(&self, agent: &Agent) -> Result<i64> {
6396 let cache_key = EnsuredAgentKey::from_agent(agent);
6397 if let Some(id) = self.cached_agent_id(&cache_key) {
6398 return Ok(id);
6399 }
6400
6401 let now = Self::now_millis();
6402 self.conn.execute_compat(
6403 "INSERT INTO agents(slug, name, version, kind, created_at, updated_at)
6404 VALUES(?1, ?2, ?3, ?4, ?5, ?6)
6405 ON CONFLICT(slug) DO UPDATE SET
6406 name = excluded.name,
6407 version = excluded.version,
6408 kind = excluded.kind,
6409 updated_at = excluded.updated_at
6410 WHERE NOT (
6411 agents.name IS excluded.name
6412 AND agents.version IS excluded.version
6413 AND agents.kind IS excluded.kind
6414 )",
6415 fparams![
6416 agent.slug.as_str(),
6417 agent.name.as_str(),
6418 agent.version.as_deref(),
6419 cache_key.kind.as_str(),
6420 now,
6421 now
6422 ],
6423 )?;
6424
6425 let id = self
6426 .conn
6427 .query_row_map(
6428 "SELECT id FROM agents WHERE slug = ?1 LIMIT 1",
6429 fparams![agent.slug.as_str()],
6430 |row| row.get_typed(0),
6431 )
6432 .with_context(|| format!("fetching agent id for {}", agent.slug))?;
6433 self.mark_agent_ensured(cache_key, id);
6434 Ok(id)
6435 }
6436
6437 pub fn ensure_workspace(&self, path: &Path, display_name: Option<&str>) -> Result<i64> {
6439 let path_str = path.to_string_lossy().to_string();
6440 let cache_key = EnsuredWorkspaceKey::new(path_str.clone(), display_name);
6441 if let Some(id) = self.cached_workspace_id(&cache_key) {
6442 return Ok(id);
6443 }
6444
6445 if let Some(display_name) = display_name {
6446 self.conn.execute_compat(
6447 "INSERT INTO workspaces(path, display_name)
6448 VALUES(?1, ?2)
6449 ON CONFLICT(path) DO UPDATE SET
6450 display_name = excluded.display_name
6451 WHERE NOT (workspaces.display_name IS excluded.display_name)",
6452 fparams![path_str.as_str(), display_name],
6453 )?;
6454 } else {
6455 self.conn.execute_compat(
6456 "INSERT OR IGNORE INTO workspaces(path, display_name) VALUES(?1, NULL)",
6457 fparams![path_str.as_str()],
6458 )?;
6459 }
6460
6461 let id = self
6462 .conn
6463 .query_row_map(
6464 "SELECT id FROM workspaces WHERE path = ?1 LIMIT 1",
6465 fparams![path_str.as_str()],
6466 |row| row.get_typed(0),
6467 )
6468 .with_context(|| format!("fetching workspace id for {path_str}"))?;
6469 self.mark_workspace_ensured(cache_key, id);
6470 Ok(id)
6471 }
6472
6473 pub fn now_millis() -> i64 {
6475 SystemTime::now()
6476 .duration_since(UNIX_EPOCH)
6477 .map(|d| i64::try_from(d.as_millis()).unwrap_or(i64::MAX))
6478 .unwrap_or(0)
6479 }
6480
6481 pub fn day_id_from_millis(timestamp_ms: i64) -> i64 {
6483 const EPOCH_2020_SECS: i64 = 1_577_836_800;
6484 let secs = timestamp_ms.div_euclid(1000);
6485 (secs - EPOCH_2020_SECS).div_euclid(86400)
6486 }
6487
6488 pub fn hour_id_from_millis(timestamp_ms: i64) -> i64 {
6490 const EPOCH_2020_SECS: i64 = 1_577_836_800;
6491 let secs = timestamp_ms.div_euclid(1000);
6492 (secs - EPOCH_2020_SECS).div_euclid(3600)
6493 }
6494
6495 pub fn millis_from_day_id(day_id: i64) -> i64 {
6497 const EPOCH_2020_SECS: i64 = 1_577_836_800;
6498 (EPOCH_2020_SECS + day_id * 86400) * 1000
6499 }
6500
6501 pub fn millis_from_hour_id(hour_id: i64) -> i64 {
6503 const EPOCH_2020_SECS: i64 = 1_577_836_800;
6504 (EPOCH_2020_SECS + hour_id * 3600) * 1000
6505 }
6506
6507 pub fn get_last_scan_ts(&self) -> Result<Option<i64>> {
6509 let result: Result<String, _> = self.conn.query_row_map(
6510 "SELECT value FROM meta WHERE key = 'last_scan_ts'",
6511 fparams![],
6512 |row| row.get_typed(0),
6513 );
6514 match result.optional() {
6515 Ok(Some(s)) => Ok(s.parse().ok()),
6516 Ok(None) => Ok(None),
6517 Err(e) => Err(e.into()),
6518 }
6519 }
6520
6521 pub fn set_last_scan_ts(&self, ts: i64) -> Result<()> {
6523 self.conn.execute_compat(
6524 "INSERT OR REPLACE INTO meta(key, value) VALUES('last_scan_ts', ?1)",
6525 fparams![ts.to_string()],
6526 )?;
6527 Ok(())
6528 }
6529
6530 pub fn get_last_indexed_at(&self) -> Result<Option<i64>> {
6532 let result: Result<String, _> = self.conn.query_row_map(
6533 "SELECT value FROM meta WHERE key = 'last_indexed_at'",
6534 fparams![],
6535 |row| row.get_typed(0),
6536 );
6537 match result.optional() {
6538 Ok(Some(s)) => Ok(s.parse().ok()),
6539 Ok(None) => Ok(None),
6540 Err(e) => Err(e.into()),
6541 }
6542 }
6543
6544 pub fn set_last_indexed_at(&self, ts: i64) -> Result<()> {
6546 self.conn.execute_compat(
6547 "INSERT OR REPLACE INTO meta(key, value) VALUES('last_indexed_at', ?1)",
6548 fparams![ts.to_string()],
6549 )?;
6550 Ok(())
6551 }
6552
6553 pub fn list_agents(&self) -> Result<Vec<Agent>> {
6555 self.conn
6556 .query_map_collect(
6557 "SELECT id, slug, name, version, kind FROM agents ORDER BY slug",
6558 fparams![],
6559 |row| {
6560 let kind: String = row.get_typed(4)?;
6561 Ok(Agent {
6562 id: Some(row.get_typed(0)?),
6563 slug: row.get_typed(1)?,
6564 name: row.get_typed(2)?,
6565 version: row.get_typed(3)?,
6566 kind: match kind.as_str() {
6567 "cli" => AgentKind::Cli,
6568 "vscode" => AgentKind::VsCode,
6569 _ => AgentKind::Hybrid,
6570 },
6571 })
6572 },
6573 )
6574 .with_context(|| "listing agents")
6575 }
6576
6577 pub fn total_conversation_count(&self) -> Result<usize> {
6579 let count: i64 =
6580 self.conn
6581 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
6582 row.get_typed(0)
6583 })?;
6584 Ok(count.max(0) as usize)
6585 }
6586
6587 pub fn total_message_count(&self) -> Result<usize> {
6589 let count: i64 =
6590 self.conn
6591 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
6592 row.get_typed(0)
6593 })?;
6594 Ok(count.max(0) as usize)
6595 }
6596
6597 pub fn purge_agent_archive_data(&self, agent_slug: &str) -> Result<AgentArchivePurgeResult> {
6602 let normalized = agent_slug.trim().to_ascii_lowercase();
6603 if normalized.is_empty() {
6604 return Err(anyhow!("agent slug cannot be empty"));
6605 }
6606
6607 let Some(agent_id) = self
6608 .conn
6609 .query_row_map(
6610 "SELECT id FROM agents WHERE slug = ?1",
6611 fparams![normalized.as_str()],
6612 |row| row.get_typed::<i64>(0),
6613 )
6614 .optional()?
6615 else {
6616 return Ok(AgentArchivePurgeResult::default());
6617 };
6618
6619 let conversations_deleted: i64 = self.conn.query_row_map(
6620 "SELECT COUNT(*) FROM conversations WHERE agent_id = ?1",
6621 fparams![agent_id],
6622 |row| row.get_typed(0),
6623 )?;
6624 if conversations_deleted == 0 {
6625 return Ok(AgentArchivePurgeResult::default());
6626 }
6627
6628 let messages_deleted: i64 = self.conn.query_row_map(
6629 "SELECT COUNT(*)
6630 FROM messages
6631 WHERE conversation_id IN (
6632 SELECT id FROM conversations WHERE agent_id = ?1
6633 )",
6634 fparams![agent_id],
6635 |row| row.get_typed(0),
6636 )?;
6637
6638 let mut tx = self.conn.transaction()?;
6639 tx.execute_compat(
6640 "DELETE FROM conversation_external_lookup
6641 WHERE conversation_id IN (
6642 SELECT id FROM conversations WHERE agent_id = ?1
6643 )",
6644 fparams![agent_id],
6645 )?;
6646 tx.execute_compat(
6647 "DELETE FROM conversation_external_tail_lookup
6648 WHERE conversation_id IN (
6649 SELECT id FROM conversations WHERE agent_id = ?1
6650 )",
6651 fparams![agent_id],
6652 )?;
6653 tx.execute_compat(
6654 "DELETE FROM conversations WHERE agent_id = ?1",
6655 fparams![agent_id],
6656 )?;
6657 tx.execute_compat(
6658 "DELETE FROM agents
6659 WHERE id = ?1
6660 AND NOT EXISTS (
6661 SELECT 1 FROM conversations WHERE agent_id = ?1
6662 )",
6663 fparams![agent_id],
6664 )?;
6665 tx.commit()?;
6666
6667 Ok(AgentArchivePurgeResult {
6668 conversations_deleted: conversations_deleted.max(0) as usize,
6669 messages_deleted: messages_deleted.max(0) as usize,
6670 })
6671 }
6672
6673 pub fn list_workspaces(&self) -> Result<Vec<crate::model::types::Workspace>> {
6675 self.conn
6676 .query_map_collect(
6677 "SELECT id, path, display_name FROM workspaces ORDER BY path",
6678 fparams![],
6679 |row| {
6680 let path_str: String = row.get_typed(1)?;
6681 Ok(crate::model::types::Workspace {
6682 id: Some(row.get_typed(0)?),
6683 path: Path::new(&path_str).to_path_buf(),
6684 display_name: row.get_typed(2)?,
6685 })
6686 },
6687 )
6688 .with_context(|| "listing workspaces")
6689 }
6690
6691 pub fn list_conversations(&self, limit: i64, offset: i64) -> Result<Vec<Conversation>> {
6693 self.conn
6700 .query_map_collect(
6701 r"SELECT c.id,
6702 COALESCE((SELECT a.slug FROM agents a WHERE a.id = c.agent_id), 'unknown'),
6703 (SELECT w.path FROM workspaces w WHERE w.id = c.workspace_id),
6704 c.external_id, c.title, c.source_path,
6705 c.started_at,
6706 COALESCE(
6707 (SELECT ts.ended_at
6708 FROM conversation_tail_state ts
6709 WHERE ts.conversation_id = c.id),
6710 c.ended_at
6711 ),
6712 c.approx_tokens, c.metadata_json,
6713 c.source_id, c.origin_host, c.metadata_bin
6714 FROM conversations c
6715 ORDER BY CASE WHEN c.started_at IS NULL THEN 1 ELSE 0 END, c.started_at DESC, c.id DESC
6716 LIMIT ?1 OFFSET ?2",
6717 fparams![limit, offset],
6718 |row| {
6719 let workspace_path: Option<String> = row.get_typed(2)?;
6720 let source_path: String = row.get_typed(5)?;
6721 let raw_source_id: Option<String> = row.get_typed(10)?;
6722 let raw_origin_host: Option<String> = row.get_typed(11)?;
6723 let (source_id, _, origin_host) = normalized_storage_source_parts(
6724 raw_source_id.as_deref(),
6725 None,
6726 raw_origin_host.as_deref(),
6727 );
6728 Ok(Conversation {
6729 id: Some(row.get_typed(0)?),
6730 agent_slug: row.get_typed(1)?,
6731 workspace: workspace_path.map(|p| Path::new(&p).to_path_buf()),
6732 external_id: row.get_typed(3)?,
6733 title: row.get_typed(4)?,
6734 source_path: Path::new(&source_path).to_path_buf(),
6735 started_at: row.get_typed(6)?,
6736 ended_at: row.get_typed(7)?,
6737 approx_tokens: row.get_typed(8)?,
6738 metadata_json: franken_read_metadata_compat(row, 9, 12),
6739 messages: Vec::new(),
6740 source_id,
6741 origin_host,
6742 })
6743 },
6744 )
6745 .with_context(|| "listing conversations")
6746 }
6747
6748 pub fn build_lexical_rebuild_lookups(
6752 &self,
6753 ) -> Result<(HashMap<i64, String>, HashMap<i64, PathBuf>)> {
6754 let agents: HashMap<i64, String> = self
6755 .conn
6756 .query_map_collect("SELECT id, slug FROM agents", fparams![], |row| {
6757 Ok((row.get_typed::<i64>(0)?, row.get_typed::<String>(1)?))
6758 })
6759 .with_context(|| "loading agent lookup for lexical rebuild")?
6760 .into_iter()
6761 .collect();
6762 let workspaces: HashMap<i64, PathBuf> = self
6763 .conn
6764 .query_map_collect("SELECT id, path FROM workspaces", fparams![], |row| {
6765 let path_str: String = row.get_typed(1)?;
6766 Ok((row.get_typed::<i64>(0)?, PathBuf::from(path_str)))
6767 })
6768 .with_context(|| "loading workspace lookup for lexical rebuild")?
6769 .into_iter()
6770 .collect();
6771 Ok((agents, workspaces))
6772 }
6773
6774 pub fn list_conversation_footprints_for_lexical_rebuild(
6787 &self,
6788 ) -> Result<Vec<LexicalRebuildConversationFootprintRow>> {
6789 let tail_state_rows: Vec<(i64, Option<i64>)> = match self.conn.query_map_collect(
6790 "SELECT conversation_id, last_message_idx
6791 FROM conversation_tail_state
6792 ORDER BY conversation_id ASC",
6793 fparams![],
6794 |row| Ok((row.get_typed::<i64>(0)?, row.get_typed::<Option<i64>>(1)?)),
6795 ) {
6796 Ok(rows) => rows,
6797 Err(err) if error_indicates_missing_table(&err) => Vec::new(),
6798 Err(err) => {
6799 return Err(err).with_context(|| "listing lexical rebuild tail-state estimates");
6800 }
6801 };
6802 let tail_state_by_conversation: HashMap<i64, Option<i64>> =
6803 tail_state_rows.into_iter().collect();
6804
6805 let rows: Vec<(i64, Option<i64>)> = match self.conn.query_map_collect(
6806 "SELECT id, last_message_idx
6807 FROM conversations
6808 ORDER BY id ASC",
6809 fparams![],
6810 |row| Ok((row.get_typed::<i64>(0)?, row.get_typed::<Option<i64>>(1)?)),
6811 ) {
6812 Ok(rows) => rows,
6813 Err(err) if error_indicates_missing_column(&err) => self
6814 .conn
6815 .query_map_collect(
6816 "SELECT id
6817 FROM conversations
6818 ORDER BY id ASC",
6819 fparams![],
6820 |row| Ok((row.get_typed::<i64>(0)?, None)),
6821 )
6822 .with_context(|| {
6823 "listing lexical rebuild conversation ids after missing tail column fallback"
6824 })?,
6825 Err(err) => {
6826 return Err(err)
6827 .with_context(|| "listing lexical rebuild conversation footprint estimates");
6828 }
6829 };
6830
6831 let mut footprints = Vec::with_capacity(rows.len());
6832 let mut missing_tail_positions = HashMap::new();
6833 for (conversation_id, conversation_last_message_idx) in rows {
6834 let last_message_idx = tail_state_by_conversation
6835 .get(&conversation_id)
6836 .copied()
6837 .flatten()
6838 .or(conversation_last_message_idx);
6839 let Some(message_count) = lexical_rebuild_message_count_from_tail_idx(last_message_idx)
6840 else {
6841 missing_tail_positions.insert(conversation_id, footprints.len());
6842 footprints.push(LexicalRebuildConversationFootprintRow {
6843 conversation_id,
6844 message_count: 0,
6845 message_bytes: 0,
6846 });
6847 continue;
6848 };
6849 footprints.push(lexical_rebuild_conversation_footprint_from_count(
6850 conversation_id,
6851 message_count,
6852 ));
6853 }
6854
6855 let every_footprint_was_missing_tail = missing_tail_positions.len() == footprints.len();
6856 if !missing_tail_positions.is_empty() {
6857 self.fill_missing_lexical_rebuild_footprint_tails(
6858 &mut footprints,
6859 &missing_tail_positions,
6860 )?;
6861 }
6862 if !every_footprint_was_missing_tail {
6863 self.raise_lexical_rebuild_footprints_to_exact_message_counts(&mut footprints)?;
6864 }
6865
6866 Ok(footprints)
6867 }
6868
6869 pub fn lexical_rebuild_has_tail_footprint_metadata(&self) -> Result<bool> {
6870 let total_conversations: i64 = self
6871 .conn
6872 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
6873 row.get_typed(0)
6874 })
6875 .with_context(|| "counting conversations for lexical rebuild tail metadata coverage")?;
6876 let total_conversations = usize::try_from(total_conversations.max(0)).unwrap_or(usize::MAX);
6877 if total_conversations == 0 {
6878 return Ok(true);
6879 }
6880
6881 let conversation_columns = franken_table_column_names(&self.conn, "conversations")?;
6882 let conversations_have_tail_column = conversation_columns.contains("last_message_idx");
6883 let tail_state_has_tail_column =
6884 match franken_table_column_names(&self.conn, "conversation_tail_state") {
6885 Ok(columns) => columns.contains("last_message_idx"),
6886 Err(err) if error_indicates_missing_table(&err) => false,
6887 Err(err) => {
6888 return Err(err)
6889 .with_context(|| "reading lexical rebuild tail-state metadata columns");
6890 }
6891 };
6892 if !conversations_have_tail_column && !tail_state_has_tail_column {
6893 return Ok(false);
6894 }
6895
6896 let covered_sql = match (conversations_have_tail_column, tail_state_has_tail_column) {
6897 (true, true) => {
6898 "SELECT COUNT(*)
6899 FROM conversations c
6900 LEFT JOIN conversation_tail_state ts ON ts.conversation_id = c.id
6901 WHERE c.last_message_idx IS NOT NULL
6902 OR ts.last_message_idx IS NOT NULL"
6903 }
6904 (true, false) => {
6905 "SELECT COUNT(*)
6906 FROM conversations
6907 WHERE last_message_idx IS NOT NULL"
6908 }
6909 (false, true) => {
6910 "SELECT COUNT(*)
6911 FROM conversations c
6912 WHERE EXISTS (
6913 SELECT 1
6914 FROM conversation_tail_state ts
6915 WHERE ts.conversation_id = c.id
6916 AND ts.last_message_idx IS NOT NULL
6917 )"
6918 }
6919 (false, false) => unreachable!("checked before covered_sql selection"),
6920 };
6921 let covered_conversations: i64 = self
6922 .conn
6923 .query_row_map(covered_sql, fparams![], |row| row.get_typed(0))
6924 .with_context(
6925 || "counting conversations covered by lexical rebuild tail footprint metadata",
6926 )?;
6927 let covered_conversations =
6928 usize::try_from(covered_conversations.max(0)).unwrap_or(usize::MAX);
6929
6930 Ok(lexical_rebuild_tail_metadata_coverage_is_sufficient(
6931 total_conversations,
6932 covered_conversations,
6933 ))
6934 }
6935
6936 fn raise_lexical_rebuild_footprints_to_exact_message_counts(
6937 &self,
6938 footprints: &mut [LexicalRebuildConversationFootprintRow],
6939 ) -> Result<()> {
6940 if footprints.is_empty() {
6941 return Ok(());
6942 }
6943
6944 let positions_by_conversation: HashMap<i64, usize> = footprints
6945 .iter()
6946 .enumerate()
6947 .map(|(position, footprint)| (footprint.conversation_id, position))
6948 .collect();
6949 self.conn
6950 .query_with_params_for_each(
6951 "SELECT conversation_id, COUNT(*) AS message_count
6952 FROM messages
6953 GROUP BY conversation_id
6954 ORDER BY conversation_id ASC",
6955 &[] as &[SqliteValue],
6956 |row| {
6957 let conversation_id: i64 = row.get_typed(0)?;
6958 let exact_count: i64 = row.get_typed(1)?;
6959 let Some(position) = positions_by_conversation.get(&conversation_id) else {
6960 return Ok(());
6961 };
6962 let exact_count = usize::try_from(exact_count.max(0)).unwrap_or(usize::MAX);
6963 let footprint = &mut footprints[*position];
6964 if exact_count > footprint.message_count {
6965 footprint.message_count = exact_count;
6966 footprint.message_bytes =
6967 footprint.message_bytes.max(exact_count.saturating_mul(
6968 LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
6969 ));
6970 }
6971 Ok(())
6972 },
6973 )
6974 .with_context(|| "raising lexical rebuild footprints to exact message counts")?;
6975 Ok(())
6976 }
6977
6978 fn fill_missing_lexical_rebuild_footprint_tails(
6979 &self,
6980 footprints: &mut [LexicalRebuildConversationFootprintRow],
6981 missing_tail_positions: &HashMap<i64, usize>,
6982 ) -> Result<()> {
6983 if missing_tail_positions.len() <= LEXICAL_REBUILD_FOOTPRINT_POINT_TAIL_FALLBACK_LIMIT {
6984 for (conversation_id, position) in missing_tail_positions {
6985 let last_message_idx: Option<i64> = self
6986 .conn
6987 .query_row_map(
6988 "SELECT MAX(idx) FROM messages WHERE conversation_id = ?1",
6989 fparams![*conversation_id],
6990 |row| row.get_typed(0),
6991 )
6992 .with_context(|| {
6993 format!(
6994 "looking up missing lexical rebuild tail estimate for conversation {conversation_id}"
6995 )
6996 })?;
6997 if let Some(message_count) =
6998 lexical_rebuild_message_count_from_tail_idx(last_message_idx)
6999 {
7000 footprints[*position] = lexical_rebuild_conversation_footprint_from_count(
7001 *conversation_id,
7002 message_count,
7003 );
7004 }
7005 }
7006 return Ok(());
7007 }
7008
7009 self.fill_missing_lexical_rebuild_footprint_tails_from_grouped_messages(
7010 footprints,
7011 missing_tail_positions,
7012 "SELECT conversation_id, MAX(idx) AS last_message_idx
7013 FROM messages INDEXED BY idx_messages_conv_idx
7014 GROUP BY conversation_id
7015 ORDER BY conversation_id ASC",
7016 )
7017 .or_else(|err| {
7018 if err
7019 .to_string()
7020 .contains("no such index: idx_messages_conv_idx")
7021 {
7022 return self.fill_missing_lexical_rebuild_footprint_tails_from_grouped_messages(
7023 footprints,
7024 missing_tail_positions,
7025 "SELECT conversation_id, MAX(idx) AS last_message_idx
7026 FROM messages
7027 GROUP BY conversation_id
7028 ORDER BY conversation_id ASC",
7029 );
7030 }
7031 Err(err)
7032 })
7033 .with_context(|| "grouping missing lexical rebuild tail estimates from messages")?;
7034
7035 Ok(())
7036 }
7037
7038 fn fill_missing_lexical_rebuild_footprint_tails_from_grouped_messages(
7039 &self,
7040 footprints: &mut [LexicalRebuildConversationFootprintRow],
7041 missing_tail_positions: &HashMap<i64, usize>,
7042 sql: &str,
7043 ) -> Result<()> {
7044 self.conn
7045 .query_with_params_for_each(sql, &[] as &[SqliteValue], |row| {
7046 let conversation_id: i64 = row.get_typed(0)?;
7047 let last_message_idx: Option<i64> = row.get_typed(1)?;
7048 let Some(position) = missing_tail_positions.get(&conversation_id) else {
7049 return Ok(());
7050 };
7051 if let Some(message_count) =
7052 lexical_rebuild_message_count_from_tail_idx(last_message_idx)
7053 {
7054 footprints[*position] = lexical_rebuild_conversation_footprint_from_count(
7055 conversation_id,
7056 message_count,
7057 );
7058 }
7059 Ok(())
7060 })
7061 .with_context(|| "grouping lexical rebuild missing tail estimates")
7062 }
7063
7064 pub fn list_conversation_ids_for_lexical_rebuild(&self) -> Result<Vec<i64>> {
7066 self.conn
7067 .query_map_collect(
7068 "SELECT id FROM conversations ORDER BY id ASC",
7069 fparams![],
7070 |row| row.get_typed(0),
7071 )
7072 .with_context(|| "listing conversation ids for lexical rebuild")
7073 }
7074 pub fn list_conversations_for_lexical_rebuild_by_offset(
7079 &self,
7080 limit: i64,
7081 offset: i64,
7082 agent_slugs: &HashMap<i64, String>,
7083 workspace_paths: &HashMap<i64, PathBuf>,
7084 ) -> Result<Vec<LexicalRebuildConversationRow>> {
7085 self.conn
7088 .query_map_collect(
7089 r"SELECT id, agent_id, workspace_id, external_id, title, source_path,
7090 started_at,
7091 COALESCE(
7092 (SELECT ts.ended_at
7093 FROM conversation_tail_state ts
7094 WHERE ts.conversation_id = conversations.id),
7095 ended_at
7096 ),
7097 source_id, origin_host
7098 FROM conversations
7099 ORDER BY id ASC
7100 LIMIT ?1 OFFSET ?2",
7101 fparams![limit, offset],
7102 |row| {
7103 let agent_id: Option<i64> = row.get_typed(1)?;
7104 let workspace_id: Option<i64> = row.get_typed(2)?;
7105 let source_path: String = row.get_typed(5)?;
7106 let raw_source_id: Option<String> = row.get_typed(8)?;
7107 let raw_origin_host: Option<String> = row.get_typed(9)?;
7108 let (source_id, _, origin_host) = normalized_storage_source_parts(
7109 raw_source_id.as_deref(),
7110 None,
7111 raw_origin_host.as_deref(),
7112 );
7113 Ok(LexicalRebuildConversationRow {
7114 id: Some(row.get_typed(0)?),
7115 agent_slug: agent_id
7116 .and_then(|aid| agent_slugs.get(&aid).cloned())
7117 .unwrap_or_else(|| "unknown".to_string()),
7118 workspace: workspace_id.and_then(|wid| workspace_paths.get(&wid).cloned()),
7119 external_id: row.get_typed(3)?,
7120 title: row.get_typed(4)?,
7121 source_path: Path::new(&source_path).to_path_buf(),
7122 started_at: row.get_typed(6)?,
7123 ended_at: row.get_typed(7)?,
7124 source_id,
7125 origin_host,
7126 })
7127 },
7128 )
7129 .with_context(|| "listing conversations for lexical rebuild")
7130 }
7131
7132 pub fn list_conversations_for_lexical_rebuild_after_id(
7137 &self,
7138 limit: i64,
7139 after_conversation_id: i64,
7140 agent_slugs: &HashMap<i64, String>,
7141 workspace_paths: &HashMap<i64, PathBuf>,
7142 ) -> Result<Vec<LexicalRebuildConversationRow>> {
7143 self.conn
7144 .query_map_collect(
7145 r"SELECT id, agent_id, workspace_id, external_id, title, source_path,
7146 started_at,
7147 COALESCE(
7148 (SELECT ts.ended_at
7149 FROM conversation_tail_state ts
7150 WHERE ts.conversation_id = conversations.id),
7151 ended_at
7152 ),
7153 source_id, origin_host
7154 FROM conversations
7155 WHERE id > ?2
7156 ORDER BY id ASC
7157 LIMIT ?1",
7158 fparams![limit, after_conversation_id],
7159 |row| {
7160 let agent_id: Option<i64> = row.get_typed(1)?;
7161 let workspace_id: Option<i64> = row.get_typed(2)?;
7162 let source_path: String = row.get_typed(5)?;
7163 let raw_source_id: Option<String> = row.get_typed(8)?;
7164 let raw_origin_host: Option<String> = row.get_typed(9)?;
7165 let (source_id, _, origin_host) = normalized_storage_source_parts(
7166 raw_source_id.as_deref(),
7167 None,
7168 raw_origin_host.as_deref(),
7169 );
7170 Ok(LexicalRebuildConversationRow {
7171 id: Some(row.get_typed(0)?),
7172 agent_slug: agent_id
7173 .and_then(|aid| agent_slugs.get(&aid).cloned())
7174 .unwrap_or_else(|| "unknown".to_string()),
7175 workspace: workspace_id.and_then(|wid| workspace_paths.get(&wid).cloned()),
7176 external_id: row.get_typed(3)?,
7177 title: row.get_typed(4)?,
7178 source_path: Path::new(&source_path).to_path_buf(),
7179 started_at: row.get_typed(6)?,
7180 ended_at: row.get_typed(7)?,
7181 source_id,
7182 origin_host,
7183 })
7184 },
7185 )
7186 .with_context(|| {
7187 format!(
7188 "listing conversations for lexical rebuild after id {after_conversation_id}"
7189 )
7190 })
7191 }
7192
7193 pub fn list_conversations_for_lexical_rebuild_after_id_through_id(
7199 &self,
7200 limit: i64,
7201 after_conversation_id: i64,
7202 through_conversation_id: i64,
7203 agent_slugs: &HashMap<i64, String>,
7204 workspace_paths: &HashMap<i64, PathBuf>,
7205 ) -> Result<Vec<LexicalRebuildConversationRow>> {
7206 if through_conversation_id <= after_conversation_id {
7207 return Ok(Vec::new());
7208 }
7209 self.conn
7210 .query_map_collect(
7211 r"SELECT id, agent_id, workspace_id, external_id, title, source_path,
7212 started_at,
7213 COALESCE(
7214 (SELECT ts.ended_at
7215 FROM conversation_tail_state ts
7216 WHERE ts.conversation_id = conversations.id),
7217 ended_at
7218 ),
7219 source_id, origin_host
7220 FROM conversations
7221 WHERE id > ?2 AND id <= ?3
7222 ORDER BY id ASC
7223 LIMIT ?1",
7224 fparams![limit, after_conversation_id, through_conversation_id],
7225 |row| {
7226 let agent_id: Option<i64> = row.get_typed(1)?;
7227 let workspace_id: Option<i64> = row.get_typed(2)?;
7228 let source_path: String = row.get_typed(5)?;
7229 let raw_source_id: Option<String> = row.get_typed(8)?;
7230 let raw_origin_host: Option<String> = row.get_typed(9)?;
7231 let (source_id, _, origin_host) = normalized_storage_source_parts(
7232 raw_source_id.as_deref(),
7233 None,
7234 raw_origin_host.as_deref(),
7235 );
7236 Ok(LexicalRebuildConversationRow {
7237 id: Some(row.get_typed(0)?),
7238 agent_slug: agent_id
7239 .and_then(|aid| agent_slugs.get(&aid).cloned())
7240 .unwrap_or_else(|| "unknown".to_string()),
7241 workspace: workspace_id.and_then(|wid| workspace_paths.get(&wid).cloned()),
7242 external_id: row.get_typed(3)?,
7243 title: row.get_typed(4)?,
7244 source_path: Path::new(&source_path).to_path_buf(),
7245 started_at: row.get_typed(6)?,
7246 ended_at: row.get_typed(7)?,
7247 source_id,
7248 origin_host,
7249 })
7250 },
7251 )
7252 .with_context(|| {
7253 format!(
7254 "listing conversations for lexical rebuild after id {after_conversation_id} through id {through_conversation_id}"
7255 )
7256 })
7257 }
7258
7259 pub fn fetch_messages(&self, conversation_id: i64) -> Result<Vec<Message>> {
7261 let hinted_sql = "SELECT id, idx, role, author, created_at, content, extra_json, extra_bin \
7262 FROM messages INDEXED BY sqlite_autoindex_messages_1 \
7263 WHERE conversation_id = ?1 ORDER BY idx";
7264 let fallback_sql = "SELECT id, idx, role, author, created_at, content, extra_json, extra_bin \
7265 FROM messages \
7266 WHERE conversation_id = ?1 ORDER BY idx";
7267
7268 self.conn
7269 .query_map_collect(hinted_sql, fparams![conversation_id], |row| {
7270 let role: String = row.get_typed(2)?;
7271 Ok(Message {
7272 id: Some(row.get_typed(0)?),
7273 idx: row.get_typed(1)?,
7274 role: match role.as_str() {
7275 "user" => MessageRole::User,
7276 "agent" | "assistant" => MessageRole::Agent,
7277 "tool" => MessageRole::Tool,
7278 "system" => MessageRole::System,
7279 other => MessageRole::Other(other.to_string()),
7280 },
7281 author: row.get_typed(3)?,
7282 created_at: row.get_typed(4)?,
7283 content: row.get_typed(5)?,
7284 extra_json: franken_read_message_extra_compat(row, 6, 7),
7285 snippets: Vec::new(),
7286 })
7287 })
7288 .or_else(|err| {
7289 if err
7290 .to_string()
7291 .contains("no such index: sqlite_autoindex_messages_1")
7292 {
7293 return self.conn.query_map_collect(
7294 fallback_sql,
7295 fparams![conversation_id],
7296 |row| {
7297 let role: String = row.get_typed(2)?;
7298 Ok(Message {
7299 id: Some(row.get_typed(0)?),
7300 idx: row.get_typed(1)?,
7301 role: match role.as_str() {
7302 "user" => MessageRole::User,
7303 "agent" | "assistant" => MessageRole::Agent,
7304 "tool" => MessageRole::Tool,
7305 "system" => MessageRole::System,
7306 other => MessageRole::Other(other.to_string()),
7307 },
7308 author: row.get_typed(3)?,
7309 created_at: row.get_typed(4)?,
7310 content: row.get_typed(5)?,
7311 extra_json: franken_read_message_extra_compat(row, 6, 7),
7312 snippets: Vec::new(),
7313 })
7314 },
7315 );
7316 }
7317 Err(err)
7318 })
7319 .with_context(|| format!("fetching messages for conversation {conversation_id}"))
7320 }
7321
7322 pub fn fetch_messages_for_lexical_rebuild(&self, conversation_id: i64) -> Result<Vec<Message>> {
7328 let hinted_sql = "SELECT id, idx, role, author, created_at, content \
7329 FROM messages INDEXED BY sqlite_autoindex_messages_1 \
7330 WHERE conversation_id = ?1 ORDER BY idx";
7331 let fallback_sql = "SELECT id, idx, role, author, created_at, content \
7332 FROM messages \
7333 WHERE conversation_id = ?1 ORDER BY idx";
7334
7335 self.conn
7336 .query_map_collect(hinted_sql, fparams![conversation_id], |row| {
7337 let role: String = row.get_typed(2)?;
7338 Ok(Message {
7339 id: Some(row.get_typed(0)?),
7340 idx: row.get_typed(1)?,
7341 role: match role.as_str() {
7342 "user" => MessageRole::User,
7343 "agent" | "assistant" => MessageRole::Agent,
7344 "tool" => MessageRole::Tool,
7345 "system" => MessageRole::System,
7346 other => MessageRole::Other(other.to_string()),
7347 },
7348 author: row.get_typed(3)?,
7349 created_at: row.get_typed(4)?,
7350 content: row.get_typed(5)?,
7351 extra_json: serde_json::Value::Null,
7352 snippets: Vec::new(),
7353 })
7354 })
7355 .or_else(|err| {
7356 if err
7357 .to_string()
7358 .contains("no such index: sqlite_autoindex_messages_1")
7359 {
7360 return self.conn.query_map_collect(
7361 fallback_sql,
7362 fparams![conversation_id],
7363 |row| {
7364 let role: String = row.get_typed(2)?;
7365 Ok(Message {
7366 id: Some(row.get_typed(0)?),
7367 idx: row.get_typed(1)?,
7368 role: match role.as_str() {
7369 "user" => MessageRole::User,
7370 "agent" | "assistant" => MessageRole::Agent,
7371 "tool" => MessageRole::Tool,
7372 "system" => MessageRole::System,
7373 other => MessageRole::Other(other.to_string()),
7374 },
7375 author: row.get_typed(3)?,
7376 created_at: row.get_typed(4)?,
7377 content: row.get_typed(5)?,
7378 extra_json: serde_json::Value::Null,
7379 snippets: Vec::new(),
7380 })
7381 },
7382 );
7383 }
7384 Err(err)
7385 })
7386 .with_context(|| {
7387 format!("fetching messages for lexical rebuild of conversation {conversation_id}")
7388 })
7389 }
7390
7391 pub fn fetch_messages_for_lexical_rebuild_batch(
7396 &self,
7397 conversation_ids: &[i64],
7398 max_messages: Option<usize>,
7399 max_content_bytes: Option<usize>,
7400 ) -> Result<HashMap<i64, Vec<Message>>> {
7401 if conversation_ids.is_empty() {
7402 return Ok(HashMap::new());
7403 }
7404
7405 let mut grouped: HashMap<i64, Vec<Message>> =
7406 HashMap::with_capacity(conversation_ids.len());
7407 let mut fetched_conversation_ids = HashSet::with_capacity(conversation_ids.len());
7408 let mut total_messages = 0usize;
7409 let mut total_content_bytes = 0usize;
7410
7411 for conversation_id in conversation_ids {
7416 if !fetched_conversation_ids.insert(*conversation_id) {
7417 continue;
7418 }
7419
7420 let messages = self
7421 .fetch_messages_for_lexical_rebuild(*conversation_id)
7422 .with_context(|| {
7423 format!("fetching lexical rebuild messages for conversation {conversation_id}")
7424 })?;
7425 total_messages = total_messages.saturating_add(messages.len());
7426 if let Some(limit) = max_messages
7427 && total_messages > limit
7428 {
7429 return Err(anyhow!(
7430 "lexical rebuild batch fetch exceeded message guardrail: messages={total_messages} limit={limit} conversations={}",
7431 conversation_ids.len()
7432 ));
7433 }
7434
7435 let message_bytes = messages
7436 .iter()
7437 .map(|message| message.content.len())
7438 .sum::<usize>();
7439 total_content_bytes = total_content_bytes.saturating_add(message_bytes);
7440 if let Some(limit) = max_content_bytes
7441 && total_content_bytes > limit
7442 {
7443 return Err(anyhow!(
7444 "lexical rebuild batch fetch exceeded content-byte guardrail: bytes={total_content_bytes} limit={limit} conversations={}",
7445 conversation_ids.len()
7446 ));
7447 }
7448
7449 if !messages.is_empty() {
7450 grouped.insert(*conversation_id, messages);
7451 }
7452 }
7453
7454 Ok(grouped)
7455 }
7456
7457 pub fn stream_messages_for_lexical_rebuild_between_conversation_ids<F>(
7460 &self,
7461 start_conversation_id: i64,
7462 end_conversation_id: i64,
7463 mut f: F,
7464 ) -> Result<()>
7465 where
7466 F: FnMut(LexicalRebuildMessageRow) -> Result<()>,
7467 {
7468 if end_conversation_id < start_conversation_id {
7469 return Ok(());
7470 }
7471
7472 let conversation_ids: Vec<i64> = self
7473 .conn
7474 .query_map_collect(
7475 "SELECT id FROM conversations WHERE id >= ?1 AND id <= ?2 ORDER BY id ASC",
7476 fparams![start_conversation_id, end_conversation_id],
7477 |row| row.get_typed(0),
7478 )
7479 .with_context(|| "listing conversation ids for streamed lexical rebuild")?;
7480
7481 for conversation_id in conversation_ids {
7482 let messages = self
7483 .fetch_messages_for_lexical_rebuild(conversation_id)
7484 .with_context(|| {
7485 format!("streaming lexical rebuild messages for conversation {conversation_id}")
7486 })?;
7487
7488 for message in messages {
7489 let message_id = message.id.ok_or_else(|| {
7490 anyhow!(
7491 "lexical rebuild message missing id for conversation {conversation_id} idx {}",
7492 message.idx
7493 )
7494 })?;
7495 f(LexicalRebuildMessageRow {
7496 conversation_id,
7497 id: message_id,
7498 idx: message.idx,
7499 role: role_str(&message.role),
7500 author: message.author,
7501 created_at: message.created_at,
7502 content: message.content,
7503 })?;
7504 }
7505 }
7506
7507 Ok(())
7508 }
7509
7510 pub fn stream_grouped_messages_for_lexical_rebuild_between_conversation_ids<F>(
7514 &self,
7515 start_conversation_id: i64,
7516 end_conversation_id: i64,
7517 mut f: F,
7518 ) -> Result<()>
7519 where
7520 F: FnMut(i64, LexicalRebuildGroupedMessageRows, i64) -> Result<()>,
7521 {
7522 if end_conversation_id < start_conversation_id {
7523 return Ok(());
7524 }
7525
7526 let mut current_conversation_id: Option<i64> = None;
7527 let mut current_messages: LexicalRebuildGroupedMessageRows = SmallVec::new();
7528 let mut current_last_message_id = 0i64;
7529 let mut flush_current = |current_conversation_id: &mut Option<i64>,
7530 current_messages: &mut LexicalRebuildGroupedMessageRows,
7531 current_last_message_id: &mut i64|
7532 -> Result<()> {
7533 let Some(conversation_id) = current_conversation_id.take() else {
7534 return Ok(());
7535 };
7536 let messages = std::mem::take(current_messages);
7537 let last_message_id = std::mem::take(current_last_message_id);
7538 f(conversation_id, messages, last_message_id)
7539 };
7540
7541 self.stream_messages_for_lexical_rebuild_between_conversation_ids(
7542 start_conversation_id,
7543 end_conversation_id,
7544 |row| {
7545 if current_conversation_id != Some(row.conversation_id) {
7546 flush_current(
7547 &mut current_conversation_id,
7548 &mut current_messages,
7549 &mut current_last_message_id,
7550 )?;
7551 current_conversation_id = Some(row.conversation_id);
7552 }
7553 current_last_message_id = row.id;
7554 current_messages.push(LexicalRebuildGroupedMessageRow {
7555 idx: row.idx,
7556 is_tool_role: row.role == "tool",
7557 created_at: row.created_at,
7558 content: row.content,
7559 });
7560 Ok(())
7561 },
7562 )
7563 .with_context(|| "streaming grouped lexical rebuild messages")?;
7564
7565 flush_current(
7566 &mut current_conversation_id,
7567 &mut current_messages,
7568 &mut current_last_message_id,
7569 )
7570 .with_context(|| "flushing grouped lexical rebuild messages")
7571 }
7572
7573 pub fn stream_grouped_messages_for_lexical_rebuild_from_conversation_id<F>(
7576 &self,
7577 start_conversation_id: i64,
7578 f: F,
7579 ) -> Result<()>
7580 where
7581 F: FnMut(i64, LexicalRebuildGroupedMessageRows, i64) -> Result<()>,
7582 {
7583 self.stream_grouped_messages_for_lexical_rebuild_between_conversation_ids(
7584 start_conversation_id,
7585 i64::MAX,
7586 f,
7587 )
7588 }
7589
7590 pub fn stream_messages_for_lexical_rebuild_from_conversation_id<F>(
7593 &self,
7594 start_conversation_id: i64,
7595 f: F,
7596 ) -> Result<()>
7597 where
7598 F: FnMut(LexicalRebuildMessageRow) -> Result<()>,
7599 {
7600 self.stream_messages_for_lexical_rebuild_between_conversation_ids(
7601 start_conversation_id,
7602 i64::MAX,
7603 f,
7604 )
7605 }
7606
7607 pub fn get_source(&self, id: &str) -> Result<Option<Source>> {
7609 let result = self.conn.query_row_map(
7610 "SELECT id, kind, host_label, machine_id, platform, config_json, created_at, updated_at FROM sources WHERE id = ?1",
7611 fparams![id],
7612 |row| {
7613 let kind_str: String = row.get_typed(1)?;
7614 let config_json_str: Option<String> = row.get_typed(5)?;
7615 Ok(Source {
7616 id: row.get_typed(0)?,
7617 kind: SourceKind::parse(&kind_str).unwrap_or_default(),
7618 host_label: row.get_typed(2)?,
7619 machine_id: row.get_typed(3)?,
7620 platform: row.get_typed(4)?,
7621 config_json: config_json_str.and_then(|s| serde_json::from_str(&s).ok()),
7622 created_at: row.get_typed(6)?,
7623 updated_at: row.get_typed(7)?,
7624 })
7625 },
7626 );
7627 Ok(result.optional()?)
7628 }
7629
7630 pub fn list_sources(&self) -> Result<Vec<Source>> {
7632 self.conn
7633 .query_map_collect(
7634 "SELECT id, kind, host_label, machine_id, platform, config_json, created_at, updated_at FROM sources ORDER BY id",
7635 fparams![],
7636 |row| {
7637 let kind_str: String = row.get_typed(1)?;
7638 let config_json_str: Option<String> = row.get_typed(5)?;
7639 Ok(Source {
7640 id: row.get_typed(0)?,
7641 kind: SourceKind::parse(&kind_str).unwrap_or_default(),
7642 host_label: row.get_typed(2)?,
7643 machine_id: row.get_typed(3)?,
7644 platform: row.get_typed(4)?,
7645 config_json: config_json_str.and_then(|s| serde_json::from_str(&s).ok()),
7646 created_at: row.get_typed(6)?,
7647 updated_at: row.get_typed(7)?,
7648 })
7649 },
7650 )
7651 .with_context(|| "listing sources")
7652 }
7653
7654 pub fn get_source_ids(&self) -> Result<Vec<String>> {
7656 self.conn
7657 .query_map_collect(
7658 "SELECT id FROM sources WHERE id != 'local' ORDER BY id",
7659 fparams![],
7660 |row| row.get_typed(0),
7661 )
7662 .with_context(|| "listing source ids")
7663 }
7664
7665 pub fn upsert_source(&self, source: &Source) -> Result<()> {
7667 self.invalidate_conversation_source_cache(source.id.as_str());
7668 let now = Self::now_millis();
7669 let kind_str = source.kind.to_string();
7670 let config_json_str = source
7671 .config_json
7672 .as_ref()
7673 .map(serde_json::to_string)
7674 .transpose()?;
7675
7676 self.conn.execute_compat(
7680 "INSERT INTO sources(id, kind, host_label, machine_id, platform, config_json, created_at, updated_at)
7681 VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)
7682 ON CONFLICT(id) DO UPDATE SET
7683 kind = excluded.kind,
7684 host_label = excluded.host_label,
7685 machine_id = excluded.machine_id,
7686 platform = excluded.platform,
7687 config_json = excluded.config_json,
7688 updated_at = excluded.updated_at
7689 WHERE NOT (
7690 sources.kind IS excluded.kind
7691 AND sources.host_label IS excluded.host_label
7692 AND sources.machine_id IS excluded.machine_id
7693 AND sources.platform IS excluded.platform
7694 AND sources.config_json IS excluded.config_json
7695 )",
7696 fparams![
7697 source.id.as_str(),
7698 kind_str.as_str(),
7699 source.host_label.as_deref(),
7700 source.machine_id.as_deref(),
7701 source.platform.as_deref(),
7702 config_json_str.as_deref(),
7703 source.created_at.unwrap_or(now),
7704 now
7705 ],
7706 )?;
7707 Ok(())
7708 }
7709
7710 fn historical_bundle_key_hash(
7711 version: u32,
7712 bundle: &HistoricalDatabaseBundle,
7713 include_bundle_stats: bool,
7714 ) -> String {
7715 let signature = if include_bundle_stats {
7716 format!(
7717 "{}:{}:{}:{}",
7718 version,
7719 bundle.root_path.display(),
7720 bundle.total_bytes,
7721 bundle.modified_at_ms
7722 )
7723 } else {
7724 format!("{}:{}", version, bundle.root_path.display())
7725 };
7726 blake3::hash(signature.as_bytes()).to_hex().to_string()
7727 }
7728
7729 fn historical_bundle_meta_key(bundle: &HistoricalDatabaseBundle) -> String {
7730 format!(
7731 "historical_bundle_salvaged:{}",
7732 Self::historical_bundle_key_hash(HISTORICAL_SALVAGE_LEDGER_VERSION, bundle, false)
7733 )
7734 }
7735
7736 fn historical_bundle_legacy_meta_key(bundle: &HistoricalDatabaseBundle) -> String {
7737 let signature = format!(
7738 "{}:{}:{}:{}",
7739 HISTORICAL_SALVAGE_LEDGER_VERSION,
7740 bundle.root_path.display(),
7741 bundle.total_bytes,
7742 bundle.modified_at_ms
7743 );
7744 format!(
7745 "historical_bundle_salvaged:{}",
7746 blake3::hash(signature.as_bytes()).to_hex()
7747 )
7748 }
7749
7750 fn historical_bundle_progress_key(bundle: &HistoricalDatabaseBundle) -> String {
7751 format!(
7752 "historical_bundle_progress:{}",
7753 Self::historical_bundle_key_hash(HISTORICAL_SALVAGE_PROGRESS_VERSION, bundle, false)
7754 )
7755 }
7756
7757 fn historical_bundle_legacy_progress_key(bundle: &HistoricalDatabaseBundle) -> String {
7758 let signature = format!(
7759 "{}:{}:{}:{}",
7760 HISTORICAL_SALVAGE_PROGRESS_VERSION,
7761 bundle.root_path.display(),
7762 bundle.total_bytes,
7763 bundle.modified_at_ms
7764 );
7765 format!(
7766 "historical_bundle_progress:{}",
7767 blake3::hash(signature.as_bytes()).to_hex()
7768 )
7769 }
7770
7771 fn historical_bundle_already_imported(
7772 &self,
7773 bundle: &HistoricalDatabaseBundle,
7774 ) -> Result<bool> {
7775 for key in [
7776 Self::historical_bundle_meta_key(bundle),
7777 Self::historical_bundle_legacy_meta_key(bundle),
7778 ] {
7779 let existing: Option<String> = self
7780 .conn
7781 .query_row_map(
7782 "SELECT value FROM meta WHERE key = ?1",
7783 fparams![key.as_str()],
7784 |row| row.get_typed(0),
7785 )
7786 .optional()?;
7787 if existing.is_some() {
7788 return Ok(true);
7789 }
7790 }
7791 Ok(false)
7792 }
7793
7794 pub(crate) fn has_pending_historical_bundles(&self, canonical_db_path: &Path) -> Result<bool> {
7795 for bundle in discover_historical_database_bundles(canonical_db_path) {
7796 if !self.historical_bundle_already_imported(&bundle)? {
7797 return Ok(true);
7798 }
7799 }
7800 Ok(false)
7801 }
7802
7803 fn load_historical_bundle_progress(
7804 &self,
7805 bundle: &HistoricalDatabaseBundle,
7806 ) -> Result<Option<HistoricalBundleProgress>> {
7807 for key in [
7808 Self::historical_bundle_progress_key(bundle),
7809 Self::historical_bundle_legacy_progress_key(bundle),
7810 ] {
7811 let raw: Option<String> = self
7812 .conn
7813 .query_row_map(
7814 "SELECT value FROM meta WHERE key = ?1",
7815 fparams![key.as_str()],
7816 |row| row.get_typed(0),
7817 )
7818 .optional()?;
7819 let Some(raw) = raw else {
7820 continue;
7821 };
7822 let parsed: HistoricalBundleProgress =
7823 serde_json::from_str(&raw).with_context(|| {
7824 format!(
7825 "parsing historical salvage progress checkpoint for {}",
7826 bundle.root_path.display()
7827 )
7828 })?;
7829 if parsed.progress_version == HISTORICAL_SALVAGE_PROGRESS_VERSION {
7830 return Ok(Some(parsed));
7831 }
7832 }
7833 Ok(None)
7834 }
7835
7836 fn record_historical_bundle_progress(
7837 &self,
7838 bundle: &HistoricalDatabaseBundle,
7839 method: &str,
7840 last_completed_source_row_id: i64,
7841 conversations_imported: usize,
7842 messages_imported: usize,
7843 ) -> Result<()> {
7844 let key = Self::historical_bundle_progress_key(bundle);
7845 let value = HistoricalBundleProgress {
7846 progress_version: HISTORICAL_SALVAGE_PROGRESS_VERSION,
7847 path: bundle.root_path.display().to_string(),
7848 bytes: bundle.total_bytes,
7849 modified_at_ms: bundle.modified_at_ms,
7850 method: method.to_string(),
7851 last_completed_source_row_id,
7852 conversations_imported,
7853 messages_imported,
7854 updated_at_ms: Self::now_millis(),
7855 };
7856 let value_str = serde_json::to_string(&value)?;
7857 self.conn.execute_compat(
7858 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
7859 fparams![key.as_str(), value_str.as_str()],
7860 )?;
7861 Ok(())
7862 }
7863
7864 fn clear_historical_bundle_progress(&self, bundle: &HistoricalDatabaseBundle) -> Result<()> {
7865 for key in [
7866 Self::historical_bundle_progress_key(bundle),
7867 Self::historical_bundle_legacy_progress_key(bundle),
7868 ] {
7869 self.conn
7870 .execute_compat("DELETE FROM meta WHERE key = ?1", fparams![key.as_str()])?;
7871 }
7872 Ok(())
7873 }
7874
7875 fn record_historical_bundle_import(
7876 &self,
7877 bundle: &HistoricalDatabaseBundle,
7878 method: &str,
7879 conversations_imported: usize,
7880 messages_imported: usize,
7881 ) -> Result<()> {
7882 let key = Self::historical_bundle_meta_key(bundle);
7883 let value = serde_json::json!({
7884 "salvage_version": HISTORICAL_SALVAGE_LEDGER_VERSION,
7885 "path": bundle.root_path.display().to_string(),
7886 "bytes": bundle.total_bytes,
7887 "modified_at_ms": bundle.modified_at_ms,
7888 "method": method,
7889 "conversations_imported": conversations_imported,
7890 "messages_imported": messages_imported,
7891 "recorded_at_ms": Self::now_millis(),
7892 });
7893 let value_str = serde_json::to_string(&value)?;
7894 self.conn.execute_compat(
7895 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
7896 fparams![key.as_str(), value_str.as_str()],
7897 )?;
7898 Ok(())
7899 }
7900
7901 fn historical_import_error_is_split_retryable(err: &anyhow::Error) -> bool {
7902 const RETRYABLE_PATTERNS: &[&str] = &[
7903 "out of memory",
7904 "string or blob too big",
7905 "too many sql variables",
7906 ];
7907 err.chain().any(|cause| {
7908 let rendered = cause.to_string().to_ascii_lowercase();
7909 RETRYABLE_PATTERNS
7910 .iter()
7911 .any(|pattern| rendered.contains(pattern))
7912 })
7913 }
7914
7915 fn split_historical_batch_entry_messages(
7916 entry: &HistoricalBatchEntry,
7917 ) -> Option<(HistoricalBatchEntry, HistoricalBatchEntry)> {
7918 if entry.conversation.messages.len() < 2 {
7919 return None;
7920 }
7921 let split_at = entry.conversation.messages.len() / 2;
7922 if split_at == 0 || split_at >= entry.conversation.messages.len() {
7923 return None;
7924 }
7925
7926 let mut left = entry.clone();
7927 left.conversation.messages = entry.conversation.messages[..split_at].to_vec();
7928
7929 let mut right = entry.clone();
7930 right.conversation.messages = entry.conversation.messages[split_at..].to_vec();
7931
7932 Some((left, right))
7933 }
7934
7935 fn import_historical_batch_with_retry<F>(
7936 entries: &[HistoricalBatchEntry],
7937 insert_batch: &mut F,
7938 ) -> Result<HistoricalBatchImportTotals>
7939 where
7940 F: FnMut(&[HistoricalBatchEntry]) -> Result<HistoricalBatchImportTotals>,
7941 {
7942 match insert_batch(entries) {
7943 Ok(totals) => Ok(totals),
7944 Err(err) if Self::historical_import_error_is_split_retryable(&err) => {
7945 if entries.len() > 1 {
7946 let mid = entries.len() / 2;
7947 tracing::warn!(
7948 batch_entries = entries.len(),
7949 split_left = mid,
7950 split_right = entries.len() - mid,
7951 error = %err,
7952 "historical salvage batch failed; retrying in smaller sub-batches"
7953 );
7954 let left =
7955 Self::import_historical_batch_with_retry(&entries[..mid], insert_batch)?;
7956 let right =
7957 Self::import_historical_batch_with_retry(&entries[mid..], insert_batch)?;
7958 return Ok(HistoricalBatchImportTotals {
7959 inserted_source_rows: left.inserted_source_rows
7960 + right.inserted_source_rows,
7961 inserted_messages: left.inserted_messages + right.inserted_messages,
7962 });
7963 }
7964
7965 if let Some(entry) = entries.first()
7966 && let Some((left, right)) = Self::split_historical_batch_entry_messages(entry)
7967 {
7968 tracing::warn!(
7969 source_row_id = entry.source_row_id,
7970 message_count = entry.conversation.messages.len(),
7971 error = %err,
7972 "historical salvage conversation failed; retrying in smaller message slices"
7973 );
7974 let left_totals = Self::import_historical_batch_with_retry(
7975 std::slice::from_ref(&left),
7976 insert_batch,
7977 )?;
7978 let right_totals = Self::import_historical_batch_with_retry(
7979 std::slice::from_ref(&right),
7980 insert_batch,
7981 )?;
7982 return Ok(HistoricalBatchImportTotals {
7983 inserted_source_rows: usize::from(
7984 left_totals.inserted_source_rows > 0
7985 || right_totals.inserted_source_rows > 0,
7986 ),
7987 inserted_messages: left_totals
7988 .inserted_messages
7989 .saturating_add(right_totals.inserted_messages),
7990 });
7991 }
7992
7993 Err(err)
7994 }
7995 Err(err) => Err(err),
7996 }
7997 }
7998
7999 fn import_historical_sources(&self, source_conn: &FrankenConnection) -> Result<()> {
8000 let sources: Vec<Source> = match source_conn.query_map_collect(
8001 "SELECT id, kind, host_label, machine_id, platform, config_json, created_at, updated_at
8002 FROM sources",
8003 fparams![],
8004 |row| {
8005 let raw_source_id: String = row.get_typed(0)?;
8006 let kind_str: String = row.get_typed(1)?;
8007 let raw_host_label: Option<String> = row.get_typed(2)?;
8008 let config_json_raw: Option<String> = row.get_typed(5)?;
8009 let (source_id, source_kind, host_label) = normalized_storage_source_parts(
8010 Some(raw_source_id.as_str()),
8011 Some(kind_str.as_str()),
8012 raw_host_label.as_deref(),
8013 );
8014 Ok(Source {
8015 id: source_id,
8016 kind: source_kind,
8017 host_label,
8018 machine_id: row.get_typed(3)?,
8019 platform: row.get_typed(4)?,
8020 config_json: config_json_raw.and_then(|raw| serde_json::from_str(&raw).ok()),
8021 created_at: row.get_typed(6)?,
8022 updated_at: row.get_typed(7)?,
8023 })
8024 },
8025 ) {
8026 Ok(rows) => rows,
8027 Err(err) => {
8028 tracing::warn!(error = %err, "historical sources table unavailable; skipping source import");
8029 return Ok(());
8030 }
8031 };
8032
8033 for source in sources {
8034 self.upsert_source(&source)?;
8035 }
8036 Ok(())
8037 }
8038
8039 fn import_historical_conversations(
8040 &self,
8041 bundle: &HistoricalDatabaseBundle,
8042 salvage_method: &str,
8043 source_conn: &FrankenConnection,
8044 ) -> Result<(usize, usize)> {
8045 let batch_limits = historical_import_batch_limits();
8046 let cache_enabled = IndexingCache::is_enabled();
8047 let mut indexing_cache = IndexingCache::new();
8048 let mut known_sources: HashSet<String> = self
8049 .list_sources()?
8050 .into_iter()
8051 .map(|source| source.id)
8052 .collect();
8053 let resume_progress = self.load_historical_bundle_progress(bundle)?;
8054 let resume_after_row_id = resume_progress
8055 .as_ref()
8056 .map(|progress| progress.last_completed_source_row_id)
8057 .filter(|row_id| *row_id > 0);
8058
8059 tracing::info!(
8060 target: "cass::historical_salvage",
8061 batch_conversations = batch_limits.conversations,
8062 batch_messages = batch_limits.messages,
8063 batch_payload_chars = batch_limits.payload_chars,
8064 cache_enabled,
8065 resume_after_row_id,
8066 "configured historical salvage batch limits"
8067 );
8068
8069 if let Some(progress) = &resume_progress {
8070 tracing::info!(
8071 target: "cass::historical_salvage",
8072 path = %bundle.root_path.display(),
8073 resume_after_row_id = progress.last_completed_source_row_id,
8074 prior_conversations_imported = progress.conversations_imported,
8075 prior_messages_imported = progress.messages_imported,
8076 "resuming historical salvage bundle from durable checkpoint"
8077 );
8078 }
8079
8080 let conv_sql = if resume_after_row_id.is_some() {
8086 "SELECT
8087 c.id,
8088 COALESCE(a.slug, 'unknown'),
8089 w.path,
8090 c.external_id,
8091 c.title,
8092 c.source_path,
8093 c.started_at,
8094 c.ended_at,
8095 c.approx_tokens,
8096 c.metadata_json,
8097 c.source_id,
8098 c.origin_host
8099 FROM conversations c
8100 LEFT JOIN agents a ON c.agent_id = a.id
8101 LEFT JOIN workspaces w ON c.workspace_id = w.id
8102 WHERE c.id > ?1
8103 ORDER BY c.id"
8104 } else {
8105 "SELECT
8106 c.id,
8107 COALESCE(a.slug, 'unknown'),
8108 w.path,
8109 c.external_id,
8110 c.title,
8111 c.source_path,
8112 c.started_at,
8113 c.ended_at,
8114 c.approx_tokens,
8115 c.metadata_json,
8116 c.source_id,
8117 c.origin_host
8118 FROM conversations c
8119 LEFT JOIN agents a ON c.agent_id = a.id
8120 LEFT JOIN workspaces w ON c.workspace_id = w.id
8121 ORDER BY c.id"
8122 };
8123 let conv_params: &[ParamValue] =
8124 if let Some(last_completed_source_row_id) = resume_after_row_id {
8125 &[ParamValue::from(last_completed_source_row_id)]
8126 } else {
8127 &[]
8128 };
8129
8130 #[allow(clippy::type_complexity)]
8131 let conv_rows: Vec<(
8132 i64,
8133 String,
8134 Option<String>,
8135 Option<String>,
8136 Option<String>,
8137 String,
8138 Option<i64>,
8139 Option<i64>,
8140 Option<i64>,
8141 Option<String>,
8142 Option<String>,
8143 Option<String>,
8144 )> = source_conn
8145 .query_map_collect(conv_sql, conv_params, |row| {
8146 Ok((
8147 row.get_typed::<i64>(0)?,
8148 row.get_typed::<String>(1)?,
8149 row.get_typed::<Option<String>>(2)?,
8150 row.get_typed::<Option<String>>(3)?,
8151 row.get_typed::<Option<String>>(4)?,
8152 row.get_typed::<String>(5)?,
8153 row.get_typed::<Option<i64>>(6)?,
8154 row.get_typed::<Option<i64>>(7)?,
8155 row.get_typed::<Option<i64>>(8)?,
8156 row.get_typed::<Option<String>>(9)?,
8157 row.get_typed::<Option<String>>(10)?,
8158 row.get_typed::<Option<String>>(11)?,
8159 ))
8160 })
8161 .context("querying historical conversations")?;
8162
8163 let msg_sql = "SELECT idx, role, author, created_at, content, extra_json
8164 FROM messages
8165 WHERE conversation_id = ?1
8166 ORDER BY idx";
8167
8168 let mut imported_conversations = resume_progress
8169 .as_ref()
8170 .map(|progress| progress.conversations_imported)
8171 .unwrap_or(0);
8172 let mut imported_messages = resume_progress
8173 .as_ref()
8174 .map(|progress| progress.messages_imported)
8175 .unwrap_or(0);
8176 let mut pending_batch: Vec<HistoricalBatchEntry> = Vec::new();
8177 let mut pending_batch_messages = 0usize;
8178 let mut pending_batch_chars = 0usize;
8179 let mut pending_batch_first_row_id: Option<i64> = None;
8180 let mut pending_batch_last_row_id: Option<i64> = None;
8181
8182 let flush_batch = |storage: &FrankenStorage,
8183 batch: &mut Vec<HistoricalBatchEntry>,
8184 pending_messages: &mut usize,
8185 pending_chars: &mut usize,
8186 first_row_id: &mut Option<i64>,
8187 last_row_id: &mut Option<i64>,
8188 imported_conversations: &mut usize,
8189 imported_messages: &mut usize|
8190 -> Result<()> {
8191 if batch.is_empty() {
8192 return Ok(());
8193 }
8194
8195 let batch_first_row_id = *first_row_id;
8196 let batch_last_row_id = *last_row_id;
8197 if historical_salvage_debug_enabled() {
8198 eprintln!(
8199 "[historical-salvage] flushing batch rows {:?}..{:?} conversations={} messages={} payload_chars={}",
8200 batch_first_row_id,
8201 batch_last_row_id,
8202 batch.len(),
8203 *pending_messages,
8204 *pending_chars
8205 );
8206 }
8207 tracing::info!(
8208 target: "cass::historical_salvage",
8209 batch_conversations = batch.len(),
8210 batch_messages = *pending_messages,
8211 batch_payload_chars = *pending_chars,
8212 first_source_row_id = batch_first_row_id,
8213 last_source_row_id = batch_last_row_id,
8214 "flushing historical salvage batch"
8215 );
8216
8217 let mut insert_batch =
8218 |entries: &[HistoricalBatchEntry]| -> Result<HistoricalBatchImportTotals> {
8219 let borrowed_batch: Vec<(i64, Option<i64>, &Conversation)> = entries
8220 .iter()
8221 .map(|entry| (entry.agent_id, entry.workspace_id, &entry.conversation))
8222 .collect();
8223 let outcomes = storage
8224 .insert_conversations_batched(&borrowed_batch)
8225 .with_context(|| {
8226 let first_source_row_id =
8227 entries.first().map(|entry| entry.source_row_id);
8228 let last_source_row_id =
8229 entries.last().map(|entry| entry.source_row_id);
8230 format!(
8231 "inserting historical salvage batch source rows {:?}..{:?}",
8232 first_source_row_id, last_source_row_id
8233 )
8234 })?;
8235 let mut totals = HistoricalBatchImportTotals::default();
8236 for outcome in outcomes {
8237 if !outcome.inserted_indices.is_empty() {
8238 totals.inserted_source_rows += 1;
8239 totals.inserted_messages += outcome.inserted_indices.len();
8240 }
8241 }
8242 Ok(totals)
8243 };
8244 let totals =
8245 Self::import_historical_batch_with_retry(batch.as_slice(), &mut insert_batch)?;
8246 *imported_conversations =
8247 (*imported_conversations).saturating_add(totals.inserted_source_rows);
8248 *imported_messages = (*imported_messages).saturating_add(totals.inserted_messages);
8249 if let Some(last_completed_row_id) = batch_last_row_id {
8250 storage.record_historical_bundle_progress(
8251 bundle,
8252 salvage_method,
8253 last_completed_row_id,
8254 *imported_conversations,
8255 *imported_messages,
8256 )?;
8257 }
8258 tracing::info!(
8259 target: "cass::historical_salvage",
8260 batch_conversations = batch.len(),
8261 batch_messages = *pending_messages,
8262 imported_conversations = *imported_conversations,
8263 imported_messages = *imported_messages,
8264 first_source_row_id = batch_first_row_id,
8265 last_source_row_id = batch_last_row_id,
8266 "historical salvage batch committed"
8267 );
8268 if historical_salvage_debug_enabled() {
8269 eprintln!(
8270 "[historical-salvage] committed batch rows {:?}..{:?} imported_conversations={} imported_messages={}",
8271 batch_first_row_id,
8272 batch_last_row_id,
8273 *imported_conversations,
8274 *imported_messages
8275 );
8276 }
8277 batch.clear();
8278 *pending_messages = 0;
8279 *pending_chars = 0;
8280 *first_row_id = None;
8281 *last_row_id = None;
8282 Ok(())
8283 };
8284
8285 for (
8286 conversation_row_id,
8287 agent_slug,
8288 workspace_path,
8289 external_id,
8290 title,
8291 source_path,
8292 started_at,
8293 ended_at,
8294 approx_tokens,
8295 metadata_json_raw,
8296 raw_source_id,
8297 raw_origin_host,
8298 ) in conv_rows
8299 {
8300 let source_id = crate::search::tantivy::normalized_index_source_id(
8301 raw_source_id.as_deref(),
8302 None,
8303 raw_origin_host.as_deref(),
8304 );
8305 let origin_host =
8306 crate::search::tantivy::normalized_index_origin_host(raw_origin_host.as_deref());
8307
8308 let messages: Vec<Message> = source_conn
8309 .query_map_collect(msg_sql, fparams![conversation_row_id], |msg_row| {
8310 let role: String = msg_row.get_typed(1)?;
8311 Ok(Message {
8312 id: None,
8313 idx: msg_row.get_typed(0)?,
8314 role: match role.as_str() {
8315 "user" => MessageRole::User,
8316 "agent" | "assistant" => MessageRole::Agent,
8317 "tool" => MessageRole::Tool,
8318 "system" => MessageRole::System,
8319 other => MessageRole::Other(other.to_string()),
8320 },
8321 author: msg_row.get_typed(2)?,
8322 created_at: msg_row.get_typed(3)?,
8323 content: msg_row.get_typed(4)?,
8324 extra_json: parse_historical_json_column(msg_row.get_typed(5)?),
8325 snippets: Vec::new(),
8326 })
8327 })
8328 .context("collecting historical message rows")?;
8329
8330 if messages.is_empty() {
8331 continue;
8332 }
8333
8334 let conversation_message_count = messages.len();
8335 let conversation_chars = messages
8336 .iter()
8337 .map(message_payload_size_hint)
8338 .sum::<usize>();
8339
8340 let conversation = Conversation {
8341 id: None,
8342 agent_slug: agent_slug.clone(),
8343 workspace: workspace_path.map(PathBuf::from),
8344 external_id,
8345 title,
8346 source_path: PathBuf::from(source_path),
8347 started_at,
8348 ended_at,
8349 approx_tokens,
8350 metadata_json: parse_json_column(metadata_json_raw),
8351 messages,
8352 source_id,
8353 origin_host,
8354 };
8355
8356 if !known_sources.contains(&conversation.source_id) {
8357 let placeholder = if conversation.source_id == LOCAL_SOURCE_ID {
8358 Source::local()
8359 } else {
8360 Source {
8361 id: conversation.source_id.clone(),
8362 kind: SourceKind::Ssh,
8363 host_label: conversation.origin_host.clone(),
8364 machine_id: None,
8365 platform: None,
8366 config_json: None,
8367 created_at: None,
8368 updated_at: None,
8369 }
8370 };
8371 self.upsert_source(&placeholder)?;
8372 known_sources.insert(conversation.source_id.clone());
8373 }
8374
8375 let agent = Agent {
8376 id: None,
8377 slug: agent_slug.clone(),
8378 name: agent_slug,
8379 version: None,
8380 kind: AgentKind::Cli,
8381 };
8382 let agent_id = if cache_enabled {
8383 indexing_cache.get_or_insert_agent(self, &agent)?
8384 } else {
8385 self.ensure_agent(&agent)?
8386 };
8387 let workspace_id = if let Some(workspace) = &conversation.workspace {
8388 if cache_enabled {
8389 Some(indexing_cache.get_or_insert_workspace(self, workspace, None)?)
8390 } else {
8391 Some(self.ensure_workspace(workspace, None)?)
8392 }
8393 } else {
8394 None
8395 };
8396
8397 let exceeds_pending_limits = !pending_batch.is_empty()
8398 && (pending_batch.len() >= batch_limits.conversations
8399 || pending_batch_messages.saturating_add(conversation_message_count)
8400 > batch_limits.messages
8401 || pending_batch_chars.saturating_add(conversation_chars)
8402 > batch_limits.payload_chars);
8403 if exceeds_pending_limits {
8404 flush_batch(
8405 self,
8406 &mut pending_batch,
8407 &mut pending_batch_messages,
8408 &mut pending_batch_chars,
8409 &mut pending_batch_first_row_id,
8410 &mut pending_batch_last_row_id,
8411 &mut imported_conversations,
8412 &mut imported_messages,
8413 )?;
8414 }
8415
8416 if pending_batch_first_row_id.is_none() {
8417 pending_batch_first_row_id = Some(conversation_row_id);
8418 }
8419 pending_batch_last_row_id = Some(conversation_row_id);
8420 pending_batch_messages =
8421 pending_batch_messages.saturating_add(conversation_message_count);
8422 pending_batch_chars = pending_batch_chars.saturating_add(conversation_chars);
8423 pending_batch.push(HistoricalBatchEntry {
8424 source_row_id: conversation_row_id,
8425 agent_id,
8426 workspace_id,
8427 conversation,
8428 });
8429
8430 if pending_batch.len() >= batch_limits.conversations
8431 || pending_batch_messages >= batch_limits.messages
8432 || pending_batch_chars >= batch_limits.payload_chars
8433 {
8434 flush_batch(
8435 self,
8436 &mut pending_batch,
8437 &mut pending_batch_messages,
8438 &mut pending_batch_chars,
8439 &mut pending_batch_first_row_id,
8440 &mut pending_batch_last_row_id,
8441 &mut imported_conversations,
8442 &mut imported_messages,
8443 )?;
8444 }
8445 }
8446
8447 flush_batch(
8448 self,
8449 &mut pending_batch,
8450 &mut pending_batch_messages,
8451 &mut pending_batch_chars,
8452 &mut pending_batch_first_row_id,
8453 &mut pending_batch_last_row_id,
8454 &mut imported_conversations,
8455 &mut imported_messages,
8456 )?;
8457
8458 if cache_enabled {
8459 let (hits, misses, hit_rate) = indexing_cache.stats();
8460 tracing::info!(
8461 target: "cass::historical_salvage",
8462 hits,
8463 misses,
8464 hit_rate = format!("{:.1}%", hit_rate * 100.0),
8465 agents = indexing_cache.agent_count(),
8466 workspaces = indexing_cache.workspace_count(),
8467 sources = known_sources.len(),
8468 "historical salvage cache stats"
8469 );
8470 }
8471
8472 Ok((imported_conversations, imported_messages))
8473 }
8474
8475 pub fn salvage_historical_databases(
8476 &self,
8477 canonical_db_path: &Path,
8478 ) -> Result<HistoricalSalvageOutcome> {
8479 let ordered_bundles = discover_historical_database_bundles(canonical_db_path);
8480 let mut outcome = HistoricalSalvageOutcome {
8481 bundles_considered: ordered_bundles.len(),
8482 ..HistoricalSalvageOutcome::default()
8483 };
8484
8485 for bundle in ordered_bundles {
8486 if self.historical_bundle_already_imported(&bundle)? {
8487 self.clear_historical_bundle_progress(&bundle)?;
8488 continue;
8489 }
8490
8491 let source = match open_historical_bundle_for_salvage(&bundle).with_context(|| {
8492 format!(
8493 "opening historical bundle {} for salvage",
8494 bundle.root_path.display()
8495 )
8496 }) {
8497 Ok(source) => source,
8498 Err(err) => {
8499 tracing::warn!(
8500 path = %bundle.root_path.display(),
8501 error = %err,
8502 "skipping unreadable historical cass database bundle during salvage"
8503 );
8504 self.clear_historical_bundle_progress(&bundle)?;
8505 continue;
8506 }
8507 };
8508
8509 if let Some(progress) = self.load_historical_bundle_progress(&bundle)? {
8517 let backup_max_conversation_id: i64 = source
8518 .conn
8519 .query_row_map(
8520 "SELECT COALESCE(MAX(id), 0) FROM conversations",
8521 fparams![],
8522 |row| row.get_typed(0),
8523 )
8524 .unwrap_or(0);
8525 if backup_max_conversation_id > 0
8526 && progress.last_completed_source_row_id >= backup_max_conversation_id
8527 {
8528 self.record_historical_bundle_import(
8529 &bundle,
8530 source.method,
8531 progress.conversations_imported,
8532 progress.messages_imported,
8533 )?;
8534 self.clear_historical_bundle_progress(&bundle)?;
8535 tracing::info!(
8536 path = %bundle.root_path.display(),
8537 last_completed_source_row_id = progress.last_completed_source_row_id,
8538 backup_max_conversation_id,
8539 conversations_imported = progress.conversations_imported,
8540 messages_imported = progress.messages_imported,
8541 "historical bundle already fully imported per checkpoint; marking salvaged and skipping O(n) re-scan"
8542 );
8543 continue;
8544 }
8545 }
8546
8547 self.import_historical_sources(&source.conn)?;
8548 let (imported_conversations, imported_messages) =
8549 self.import_historical_conversations(&bundle, source.method, &source.conn)?;
8550 self.record_historical_bundle_import(
8551 &bundle,
8552 source.method,
8553 imported_conversations,
8554 imported_messages,
8555 )?;
8556 self.clear_historical_bundle_progress(&bundle)?;
8557
8558 outcome.bundles_imported += 1;
8559 outcome.conversations_imported += imported_conversations;
8560 outcome.messages_imported += imported_messages;
8561
8562 tracing::info!(
8563 path = %bundle.root_path.display(),
8564 bytes = bundle.total_bytes,
8565 method = source.method,
8566 imported_conversations,
8567 imported_messages,
8568 "salvaged historical cass database bundle"
8569 );
8570 }
8571
8572 Ok(outcome)
8573 }
8574
8575 pub fn delete_source(&self, id: &str, _cascade: bool) -> Result<bool> {
8577 if id == LOCAL_SOURCE_ID {
8578 anyhow::bail!("cannot delete the local source");
8579 }
8580 let count = self
8581 .conn
8582 .execute_compat("DELETE FROM sources WHERE id = ?1", fparams![id])?;
8583 if count > 0 {
8584 self.invalidate_conversation_source_cache(id);
8585 }
8586 Ok(count > 0)
8587 }
8588
8589 pub fn insert_conversation_tree(
8591 &self,
8592 agent_id: i64,
8593 workspace_id: Option<i64>,
8594 conv: &Conversation,
8595 ) -> Result<InsertOutcome> {
8596 let normalized_conv = normalized_conversation_for_storage(conv);
8597 let conv = normalized_conv.as_ref();
8598 self.ensure_source_for_conversation(conv)?;
8599 let defer_lexical_updates = defer_storage_lexical_updates_enabled();
8600 let defer_analytics_updates = defer_analytics_updates_enabled();
8601 let conversation_key = conversation_merge_key(agent_id, conv);
8602 let mut tx = self.conn.transaction()?;
8603 let existing = franken_find_existing_conversation_with_tail_by_key(
8604 &tx,
8605 &conversation_key,
8606 Some(conv),
8607 )?;
8608 if let Some(existing) = existing {
8609 let outcome = self.franken_append_messages_with_tail_in_tx(
8610 &tx,
8611 agent_id,
8612 existing.id,
8613 conv,
8614 existing.tail_state,
8615 defer_lexical_updates,
8616 defer_analytics_updates,
8617 )?;
8618 tx.commit()?;
8619 return Ok(outcome);
8620 }
8621
8622 let conv_id = match franken_insert_conversation_or_get_existing_after_miss(
8623 &tx,
8624 agent_id,
8625 workspace_id,
8626 conv,
8627 &conversation_key,
8628 )? {
8629 ConversationInsertStatus::Inserted(conv_id) => conv_id,
8630 ConversationInsertStatus::Existing(existing_id) => {
8631 let ExistingMessageLookup {
8632 by_idx: mut existing_messages,
8633 replay: mut existing_replay_fingerprints,
8634 } = franken_existing_message_lookup(&tx, existing_id, &conv.messages)?;
8635 let ExistingConversationNewMessages {
8636 messages: new_messages,
8637 new_chars,
8638 idx_collision_count,
8639 first_collision_idx,
8640 } = collect_new_messages_for_existing_conversation(
8641 existing_id,
8642 conv,
8643 &mut existing_messages,
8644 &mut existing_replay_fingerprints,
8645 "skipping replay-equivalent recovered message with shifted idx",
8646 );
8647 let (inserted_last_idx, inserted_last_created_at) =
8648 borrowed_messages_tail_state(&new_messages);
8649 let mut inserted_indices = Vec::new();
8650 let mut fts_entries = Vec::new();
8651 let mut fts_pending_chars = 0usize;
8652 let mut _fts_inserted_total = 0usize;
8653 let inserted_message_ids =
8654 franken_append_insert_new_messages(&tx, existing_id, &new_messages)?;
8655 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
8656 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
8657 if !defer_lexical_updates {
8658 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
8659 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
8660 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
8661 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
8662 {
8663 flush_pending_fts_entries(
8664 self,
8665 &tx,
8666 &mut fts_entries,
8667 &mut fts_pending_chars,
8668 &mut _fts_inserted_total,
8669 )?;
8670 }
8671 }
8672 inserted_indices.push(msg.idx);
8673 }
8674
8675 if idx_collision_count > 0 {
8676 tracing::warn!(
8677 conversation_id = existing_id,
8678 collision_count = idx_collision_count,
8679 first_idx = first_collision_idx,
8680 source_path = %conv.source_path.display(),
8681 "message idx collisions encountered while merging recovered conversation; retaining canonical message variants"
8682 );
8683 }
8684
8685 if !defer_lexical_updates {
8686 flush_pending_fts_entries(
8687 self,
8688 &tx,
8689 &mut fts_entries,
8690 &mut fts_pending_chars,
8691 &mut _fts_inserted_total,
8692 )?;
8693 }
8694
8695 let conv_last_ts = conv.messages.iter().filter_map(|m| m.created_at).max();
8696 franken_update_conversation_tail_state(
8697 &tx,
8698 existing_id,
8699 conv_last_ts,
8700 inserted_last_idx,
8701 inserted_last_created_at,
8702 )?;
8703 if let Some(lookup_key) = conversation_external_lookup_key_for_conv(agent_id, conv)
8704 {
8705 franken_update_external_conversation_tail_lookup_key(
8706 &tx,
8707 &lookup_key,
8708 conv_last_ts,
8709 inserted_last_idx,
8710 inserted_last_created_at,
8711 )?;
8712 }
8713
8714 if !defer_analytics_updates && !inserted_indices.is_empty() {
8715 franken_update_daily_stats_in_tx(
8716 self,
8717 &tx,
8718 &conv.agent_slug,
8719 &conv.source_id,
8720 conversation_effective_started_at(conv),
8721 StatsDelta {
8722 session_count_delta: 0,
8723 message_count_delta: inserted_indices.len() as i64,
8724 total_chars_delta: new_chars,
8725 },
8726 )?;
8727 }
8728
8729 tx.commit()?;
8730 return Ok(InsertOutcome {
8731 conversation_id: existing_id,
8732 conversation_inserted: false,
8733 inserted_indices,
8734 });
8735 }
8736 };
8737 let mut fts_entries = Vec::new();
8738 let mut fts_pending_chars = 0usize;
8739 let mut _fts_inserted_total = 0usize;
8740 let mut total_chars: i64 = 0;
8741 let mut inserted_indices = Vec::new();
8742 let mut pending_messages = HashMap::new();
8743 let mut pending_replay_fingerprints = HashSet::new();
8744 let mut idx_collision_count = 0usize;
8745 let mut first_collision_idx: Option<i64> = None;
8746 let mut new_messages = Vec::new();
8747 for msg in &conv.messages {
8748 let incoming_fingerprint = message_merge_fingerprint(msg);
8749 if let Some(existing_fingerprint) = pending_messages.get(&msg.idx) {
8750 if existing_fingerprint != &incoming_fingerprint {
8751 idx_collision_count = idx_collision_count.saturating_add(1);
8752 first_collision_idx.get_or_insert(msg.idx);
8753 }
8754 continue;
8755 }
8756 let incoming_replay = message_replay_fingerprint(msg);
8757 if pending_replay_fingerprints.contains(&incoming_replay) {
8758 tracing::debug!(
8759 conversation_id = conv_id,
8760 idx = msg.idx,
8761 source_path = %conv.source_path.display(),
8762 "skipping replay-equivalent duplicate message within new conversation insert"
8763 );
8764 continue;
8765 }
8766 pending_messages.insert(msg.idx, incoming_fingerprint);
8767 pending_replay_fingerprints.insert(incoming_replay);
8768 new_messages.push(msg);
8769 }
8770 let inserted_message_ids = franken_batch_insert_new_messages(&tx, conv_id, &new_messages)?;
8771 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
8772 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
8773 if !defer_lexical_updates {
8774 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
8775 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
8776 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
8777 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
8778 {
8779 flush_pending_fts_entries(
8780 self,
8781 &tx,
8782 &mut fts_entries,
8783 &mut fts_pending_chars,
8784 &mut _fts_inserted_total,
8785 )?;
8786 }
8787 }
8788 total_chars += msg.content.len() as i64;
8789 inserted_indices.push(msg.idx);
8790 }
8791 if idx_collision_count > 0 {
8792 tracing::warn!(
8793 conversation_id = conv_id,
8794 collision_count = idx_collision_count,
8795 first_idx = first_collision_idx,
8796 source_path = %conv.source_path.display(),
8797 "message idx collisions encountered while inserting a new conversation; retaining the first canonical variant per idx"
8798 );
8799 }
8800 if !defer_lexical_updates {
8801 flush_pending_fts_entries(
8802 self,
8803 &tx,
8804 &mut fts_entries,
8805 &mut fts_pending_chars,
8806 &mut _fts_inserted_total,
8807 )?;
8808 }
8809
8810 if !defer_analytics_updates {
8811 franken_update_daily_stats_in_tx(
8812 self,
8813 &tx,
8814 &conv.agent_slug,
8815 &conv.source_id,
8816 conversation_effective_started_at(conv),
8817 StatsDelta {
8818 session_count_delta: 1,
8819 message_count_delta: inserted_indices.len() as i64,
8820 total_chars_delta: total_chars,
8821 },
8822 )?;
8823 }
8824
8825 tx.commit()?;
8826 Ok(InsertOutcome {
8827 conversation_id: conv_id,
8828 conversation_inserted: true,
8829 inserted_indices,
8830 })
8831 }
8832
8833 #[cfg(test)]
8834 fn insert_conversation_tree_with_profile(
8835 &self,
8836 agent_id: i64,
8837 workspace_id: Option<i64>,
8838 conv: &Conversation,
8839 profile: &mut InsertConversationTreePerfProfile,
8840 ) -> Result<InsertOutcome> {
8841 let total_start = Instant::now();
8842 let normalized_conv = normalized_conversation_for_storage(conv);
8843 let conv = normalized_conv.as_ref();
8844
8845 let source_start = Instant::now();
8846 self.ensure_source_for_conversation(conv)?;
8847 profile.source_duration += source_start.elapsed();
8848
8849 let defer_lexical_updates = defer_storage_lexical_updates_enabled();
8850 let defer_analytics_updates = defer_analytics_updates_enabled();
8851 let conversation_key = conversation_merge_key(agent_id, conv);
8852
8853 let tx_open_start = Instant::now();
8854 let mut tx = self.conn.transaction()?;
8855 profile.tx_open_duration += tx_open_start.elapsed();
8856
8857 let existing_lookup_start = Instant::now();
8858 let existing =
8859 franken_find_existing_conversation_by_key(&tx, &conversation_key, Some(conv))?;
8860 profile.existing_lookup_duration += existing_lookup_start.elapsed();
8861 if let Some(existing_id) = existing {
8862 return Err(anyhow!(
8863 "profile helper expects new conversation path, found existing id {existing_id}"
8864 ));
8865 }
8866
8867 let conversation_row_start = Instant::now();
8868 let conv_id = match franken_insert_conversation_or_get_existing_after_miss(
8869 &tx,
8870 agent_id,
8871 workspace_id,
8872 conv,
8873 &conversation_key,
8874 )? {
8875 ConversationInsertStatus::Inserted(conv_id) => conv_id,
8876 ConversationInsertStatus::Existing(existing_id) => {
8877 return Err(anyhow!(
8878 "profile helper expected inserted conversation row, reused existing id {existing_id}"
8879 ));
8880 }
8881 };
8882 profile.conversation_row_duration += conversation_row_start.elapsed();
8883
8884 let mut fts_entries = Vec::new();
8885 let mut fts_pending_chars = 0usize;
8886 let mut fts_inserted_total = 0usize;
8887 let mut total_chars: i64 = 0;
8888 let mut inserted_indices = Vec::new();
8889 let mut pending_messages = HashMap::new();
8890 let mut pending_replay_fingerprints = HashSet::new();
8891 let mut idx_collision_count = 0usize;
8892 let mut first_collision_idx: Option<i64> = None;
8893 let mut new_messages = Vec::new();
8894
8895 for msg in &conv.messages {
8896 let incoming_fingerprint = message_merge_fingerprint(msg);
8897 if let Some(existing_fingerprint) = pending_messages.get(&msg.idx) {
8898 if existing_fingerprint != &incoming_fingerprint {
8899 idx_collision_count = idx_collision_count.saturating_add(1);
8900 first_collision_idx.get_or_insert(msg.idx);
8901 }
8902 continue;
8903 }
8904
8905 let incoming_replay = message_replay_fingerprint(msg);
8906 if pending_replay_fingerprints.contains(&incoming_replay) {
8907 tracing::debug!(
8908 conversation_id = conv_id,
8909 idx = msg.idx,
8910 source_path = %conv.source_path.display(),
8911 "skipping replay-equivalent duplicate message within profiled new conversation insert"
8912 );
8913 continue;
8914 }
8915
8916 pending_messages.insert(msg.idx, incoming_fingerprint);
8917 pending_replay_fingerprints.insert(incoming_replay);
8918 new_messages.push(msg);
8919 }
8920
8921 let message_insert_start = Instant::now();
8922 let inserted_message_ids = franken_batch_insert_new_messages_with_profile(
8923 &tx,
8924 conv_id,
8925 &new_messages,
8926 &mut profile.message_insert_breakdown,
8927 )?;
8928 profile.message_insert_duration += message_insert_start.elapsed();
8929
8930 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
8931 let snippet_insert_start = Instant::now();
8932 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
8933 profile.snippet_insert_duration += snippet_insert_start.elapsed();
8934
8935 if !defer_lexical_updates {
8936 let fts_entry_start = Instant::now();
8937 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
8938 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
8939 profile.fts_entry_duration += fts_entry_start.elapsed();
8940 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
8941 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
8942 {
8943 let fts_flush_start = Instant::now();
8944 flush_pending_fts_entries(
8945 self,
8946 &tx,
8947 &mut fts_entries,
8948 &mut fts_pending_chars,
8949 &mut fts_inserted_total,
8950 )?;
8951 profile.fts_flush_duration += fts_flush_start.elapsed();
8952 }
8953 }
8954
8955 total_chars += msg.content.len() as i64;
8956 inserted_indices.push(msg.idx);
8957 }
8958
8959 if idx_collision_count > 0 {
8960 tracing::warn!(
8961 conversation_id = conv_id,
8962 collision_count = idx_collision_count,
8963 first_idx = first_collision_idx,
8964 source_path = %conv.source_path.display(),
8965 "message idx collisions encountered while profiling a new conversation insert; retaining the first canonical variant per idx"
8966 );
8967 }
8968
8969 if !defer_lexical_updates {
8970 let fts_flush_start = Instant::now();
8971 flush_pending_fts_entries(
8972 self,
8973 &tx,
8974 &mut fts_entries,
8975 &mut fts_pending_chars,
8976 &mut fts_inserted_total,
8977 )?;
8978 profile.fts_flush_duration += fts_flush_start.elapsed();
8979 }
8980
8981 if !defer_analytics_updates {
8982 let analytics_start = Instant::now();
8983 franken_update_daily_stats_in_tx(
8984 self,
8985 &tx,
8986 &conv.agent_slug,
8987 &conv.source_id,
8988 conversation_effective_started_at(conv),
8989 StatsDelta {
8990 session_count_delta: 1,
8991 message_count_delta: inserted_indices.len() as i64,
8992 total_chars_delta: total_chars,
8993 },
8994 )?;
8995 profile.analytics_duration += analytics_start.elapsed();
8996 }
8997
8998 let commit_start = Instant::now();
8999 tx.commit()?;
9000 profile.commit_duration += commit_start.elapsed();
9001 profile.invocations += 1;
9002 profile.messages += conv.messages.len();
9003 profile.inserted_messages += inserted_indices.len();
9004 profile.total_duration += total_start.elapsed();
9005
9006 Ok(InsertOutcome {
9007 conversation_id: conv_id,
9008 conversation_inserted: true,
9009 inserted_indices,
9010 })
9011 }
9012
9013 #[cfg(test)]
9014 fn append_existing_conversation_with_profile(
9015 &self,
9016 agent_id: i64,
9017 _workspace_id: Option<i64>,
9018 conv: &Conversation,
9019 profile: &mut InsertConversationTreePerfProfile,
9020 ) -> Result<InsertOutcome> {
9021 let total_start = Instant::now();
9022 let normalized_conv = normalized_conversation_for_storage(conv);
9023 let conv = normalized_conv.as_ref();
9024
9025 let source_start = Instant::now();
9026 self.ensure_source_for_conversation(conv)?;
9027 profile.source_duration += source_start.elapsed();
9028
9029 let defer_lexical_updates = defer_storage_lexical_updates_enabled();
9030 let defer_analytics_updates = defer_analytics_updates_enabled();
9031 let conversation_key = conversation_merge_key(agent_id, conv);
9032
9033 let tx_open_start = Instant::now();
9034 let mut tx = self.conn.transaction()?;
9035 profile.tx_open_duration += tx_open_start.elapsed();
9036
9037 let existing_lookup_start = Instant::now();
9038 let existing = franken_find_existing_conversation_with_tail_by_key(
9039 &tx,
9040 &conversation_key,
9041 Some(conv),
9042 )?;
9043 profile.existing_lookup_duration += existing_lookup_start.elapsed();
9044 let existing = existing.ok_or_else(|| {
9045 anyhow!("append profile helper expects existing conversation for {conversation_key:?}")
9046 })?;
9047 let existing_id = existing.id;
9048
9049 let existing_idx_lookup_start = Instant::now();
9050 let append_tail_state = existing.tail_state;
9051 let append_tail_ended_at = append_tail_state.and_then(|state| state.ended_at);
9052 let existing_plan = append_tail_state.as_ref().and_then(|state| {
9053 collect_append_only_tail_messages(
9054 conv,
9055 state.last_message_idx,
9056 state.last_message_created_at,
9057 )
9058 });
9059 let used_append_tail_plan = existing_plan.is_some();
9060 profile.existing_idx_lookup_duration += existing_idx_lookup_start.elapsed();
9061
9062 let dedupe_filter_start = Instant::now();
9063 let ExistingConversationNewMessages {
9064 messages: new_messages,
9065 new_chars,
9066 idx_collision_count,
9067 first_collision_idx,
9068 } = if let Some(existing_plan) = existing_plan {
9069 existing_plan
9070 } else {
9071 let ExistingMessageLookup {
9072 by_idx: mut existing_messages,
9073 replay: mut existing_replay_fingerprints,
9074 } = franken_existing_message_lookup(&tx, existing_id, &conv.messages)?;
9075 collect_new_messages_for_existing_conversation(
9076 existing_id,
9077 conv,
9078 &mut existing_messages,
9079 &mut existing_replay_fingerprints,
9080 "skipping replay-equivalent profiled append message with shifted idx",
9081 )
9082 };
9083 profile.dedupe_filter_duration += dedupe_filter_start.elapsed();
9084
9085 let mut inserted_indices = Vec::new();
9086 let mut fts_entries = Vec::new();
9087 let mut fts_pending_chars = 0usize;
9088 let mut fts_inserted_total = 0usize;
9089 let (inserted_last_idx, inserted_last_created_at) =
9090 borrowed_messages_tail_state(&new_messages);
9091
9092 let message_insert_start = Instant::now();
9093 let inserted_message_ids = franken_append_insert_new_messages_with_profile(
9094 &tx,
9095 existing_id,
9096 &new_messages,
9097 &mut profile.message_insert_breakdown,
9098 )?;
9099 profile.message_insert_duration += message_insert_start.elapsed();
9100
9101 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
9102 let snippet_insert_start = Instant::now();
9103 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
9104 profile.snippet_insert_duration += snippet_insert_start.elapsed();
9105
9106 if !defer_lexical_updates {
9107 let fts_entry_start = Instant::now();
9108 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
9109 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
9110 profile.fts_entry_duration += fts_entry_start.elapsed();
9111 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
9112 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
9113 {
9114 let fts_flush_start = Instant::now();
9115 flush_pending_fts_entries(
9116 self,
9117 &tx,
9118 &mut fts_entries,
9119 &mut fts_pending_chars,
9120 &mut fts_inserted_total,
9121 )?;
9122 profile.fts_flush_duration += fts_flush_start.elapsed();
9123 }
9124 }
9125
9126 inserted_indices.push(msg.idx);
9127 }
9128
9129 if idx_collision_count > 0 {
9130 tracing::warn!(
9131 conversation_id = existing_id,
9132 collision_count = idx_collision_count,
9133 first_idx = first_collision_idx,
9134 source_path = %conv.source_path.display(),
9135 "message idx collisions encountered while profiling append merge; retaining canonical message variants"
9136 );
9137 }
9138
9139 if !defer_lexical_updates {
9140 let fts_flush_start = Instant::now();
9141 flush_pending_fts_entries(
9142 self,
9143 &tx,
9144 &mut fts_entries,
9145 &mut fts_pending_chars,
9146 &mut fts_inserted_total,
9147 )?;
9148 profile.fts_flush_duration += fts_flush_start.elapsed();
9149 }
9150
9151 let conversation_row_start = Instant::now();
9152 let mut exact_append_tail_set = false;
9153 if used_append_tail_plan {
9154 if let (Some(last_message_idx), Some(last_message_created_at)) =
9155 (inserted_last_idx, inserted_last_created_at)
9156 {
9157 if append_tail_ended_at.is_none_or(|ended_at| ended_at <= last_message_created_at) {
9158 franken_set_conversation_tail_state_after_append(
9159 &tx,
9160 existing_id,
9161 last_message_created_at,
9162 last_message_idx,
9163 last_message_created_at,
9164 )?;
9165 exact_append_tail_set = true;
9166 } else {
9167 franken_update_conversation_tail_state(
9168 &tx,
9169 existing_id,
9170 Some(last_message_created_at),
9171 inserted_last_idx,
9172 inserted_last_created_at,
9173 )?;
9174 }
9175 }
9176 } else {
9177 let conv_last_ts = conv.messages.iter().filter_map(|m| m.created_at).max();
9178 franken_update_conversation_tail_state(
9179 &tx,
9180 existing_id,
9181 conv_last_ts,
9182 inserted_last_idx,
9183 inserted_last_created_at,
9184 )?;
9185 }
9186 franken_update_external_conversation_tail_after_append(
9187 &tx,
9188 agent_id,
9189 conv,
9190 used_append_tail_plan,
9191 exact_append_tail_set,
9192 inserted_last_idx,
9193 inserted_last_created_at,
9194 )?;
9195 profile.conversation_row_duration += conversation_row_start.elapsed();
9196
9197 if !defer_analytics_updates && !inserted_indices.is_empty() {
9198 let analytics_start = Instant::now();
9199 franken_update_daily_stats_in_tx(
9200 self,
9201 &tx,
9202 &conv.agent_slug,
9203 &conv.source_id,
9204 conversation_effective_started_at(conv),
9205 StatsDelta {
9206 session_count_delta: 0,
9207 message_count_delta: inserted_indices.len() as i64,
9208 total_chars_delta: new_chars,
9209 },
9210 )?;
9211 profile.analytics_duration += analytics_start.elapsed();
9212 }
9213
9214 let commit_start = Instant::now();
9215 tx.commit()?;
9216 profile.commit_duration += commit_start.elapsed();
9217 profile.invocations += 1;
9218 profile.messages += conv.messages.len();
9219 profile.inserted_messages += inserted_indices.len();
9220 profile.total_duration += total_start.elapsed();
9221
9222 Ok(InsertOutcome {
9223 conversation_id: existing_id,
9224 conversation_inserted: false,
9225 inserted_indices,
9226 })
9227 }
9228
9229 #[allow(clippy::too_many_arguments)]
9231 fn franken_append_messages_with_tail_in_tx(
9232 &self,
9233 tx: &FrankenTransaction<'_>,
9234 agent_id: i64,
9235 conversation_id: i64,
9236 conv: &Conversation,
9237 append_tail_state: Option<ExistingConversationTailState>,
9238 defer_lexical_updates: bool,
9239 defer_analytics_updates: bool,
9240 ) -> Result<InsertOutcome> {
9241 let append_tail_ended_at = append_tail_state.and_then(|state| state.ended_at);
9242 let append_plan = append_tail_state.as_ref().and_then(|state| {
9243 collect_append_only_tail_messages(
9244 conv,
9245 state.last_message_idx,
9246 state.last_message_created_at,
9247 )
9248 });
9249 let used_append_tail_plan = append_plan.is_some();
9250 let ExistingConversationNewMessages {
9251 messages: new_messages,
9252 new_chars,
9253 idx_collision_count,
9254 first_collision_idx,
9255 } = if let Some(append_plan) = append_plan {
9256 append_plan
9257 } else {
9258 let ExistingMessageLookup {
9259 by_idx: mut existing_messages,
9260 replay: mut existing_replay_fingerprints,
9261 } = franken_existing_message_lookup(tx, conversation_id, &conv.messages)?;
9262 collect_new_messages_for_existing_conversation(
9263 conversation_id,
9264 conv,
9265 &mut existing_messages,
9266 &mut existing_replay_fingerprints,
9267 "skipping replay-equivalent recovered message with shifted idx",
9268 )
9269 };
9270
9271 let mut inserted_indices = Vec::new();
9272 let mut fts_entries = Vec::new();
9273 let mut fts_pending_chars = 0usize;
9274 let mut _fts_inserted_total = 0usize;
9275 let (inserted_last_idx, inserted_last_created_at) =
9276 borrowed_messages_tail_state(&new_messages);
9277 let inserted_message_ids =
9278 franken_append_insert_new_messages(tx, conversation_id, &new_messages)?;
9279 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
9280 franken_insert_snippets(tx, msg_id, &msg.snippets)?;
9281 if !defer_lexical_updates {
9282 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
9283 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
9284 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
9285 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
9286 {
9287 flush_pending_fts_entries(
9288 self,
9289 tx,
9290 &mut fts_entries,
9291 &mut fts_pending_chars,
9292 &mut _fts_inserted_total,
9293 )?;
9294 }
9295 }
9296 inserted_indices.push(msg.idx);
9297 }
9298
9299 if idx_collision_count > 0 {
9300 tracing::warn!(
9301 conversation_id,
9302 collision_count = idx_collision_count,
9303 first_idx = first_collision_idx,
9304 source_path = %conv.source_path.display(),
9305 "message idx collisions encountered while appending to an existing conversation; retaining canonical message variants"
9306 );
9307 }
9308
9309 if !defer_lexical_updates {
9310 flush_pending_fts_entries(
9311 self,
9312 tx,
9313 &mut fts_entries,
9314 &mut fts_pending_chars,
9315 &mut _fts_inserted_total,
9316 )?;
9317 }
9318
9319 let mut exact_append_tail_set = false;
9320 if used_append_tail_plan {
9321 if let (Some(last_message_idx), Some(last_message_created_at)) =
9322 (inserted_last_idx, inserted_last_created_at)
9323 {
9324 if append_tail_ended_at.is_none_or(|ended_at| ended_at <= last_message_created_at) {
9325 franken_set_conversation_tail_state_after_append(
9326 tx,
9327 conversation_id,
9328 last_message_created_at,
9329 last_message_idx,
9330 last_message_created_at,
9331 )?;
9332 exact_append_tail_set = true;
9333 } else {
9334 franken_update_conversation_tail_state(
9335 tx,
9336 conversation_id,
9337 Some(last_message_created_at),
9338 inserted_last_idx,
9339 inserted_last_created_at,
9340 )?;
9341 }
9342 }
9343 } else {
9344 let conv_last_ts = conv.messages.iter().filter_map(|m| m.created_at).max();
9345 franken_update_conversation_tail_state(
9346 tx,
9347 conversation_id,
9348 conv_last_ts,
9349 inserted_last_idx,
9350 inserted_last_created_at,
9351 )?;
9352 }
9353 franken_update_external_conversation_tail_after_append(
9354 tx,
9355 agent_id,
9356 conv,
9357 used_append_tail_plan,
9358 exact_append_tail_set,
9359 inserted_last_idx,
9360 inserted_last_created_at,
9361 )?;
9362
9363 if !defer_analytics_updates && !inserted_indices.is_empty() {
9364 let message_count = inserted_indices.len() as i64;
9365 franken_update_daily_stats_in_tx(
9366 self,
9367 tx,
9368 &conv.agent_slug,
9369 &conv.source_id,
9370 conversation_effective_started_at(conv),
9371 StatsDelta {
9372 session_count_delta: 0,
9373 message_count_delta: message_count,
9374 total_chars_delta: new_chars,
9375 },
9376 )?;
9377 }
9378
9379 Ok(InsertOutcome {
9380 conversation_id,
9381 conversation_inserted: false,
9382 inserted_indices,
9383 })
9384 }
9385
9386 pub fn rebuild_fts(&self) -> Result<()> {
9388 self.rebuild_fts_via_frankensqlite().map(|_| ())
9389 }
9390
9391 pub(crate) fn ensure_search_fallback_fts_consistency(&self) -> Result<FtsConsistencyRepair> {
9396 self.ensure_fts_consistency_via_frankensqlite()
9397 }
9398
9399 pub(crate) fn fallback_fts_is_known_healthy_for_archive_fingerprint(
9400 &self,
9401 archive_fingerprint: &str,
9402 ) -> Result<bool> {
9403 Ok(
9404 self.read_fts_franken_rebuild_generation()? == Some(FTS_FRANKEN_REBUILD_GENERATION)
9405 && self
9406 .read_fts_franken_rebuild_archive_fingerprint()?
9407 .as_deref()
9408 == Some(archive_fingerprint),
9409 )
9410 }
9411
9412 pub(crate) fn record_search_fallback_fts_archive_fingerprint(
9413 &self,
9414 archive_fingerprint: &str,
9415 ) -> Result<()> {
9416 self.conn
9417 .execute_compat(
9418 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
9419 fparams![
9420 FTS_FRANKEN_REBUILD_FINGERPRINT_META_KEY,
9421 archive_fingerprint.to_string()
9422 ],
9423 )
9424 .with_context(|| "recording frankensqlite FTS archive fingerprint")?;
9425 Ok(())
9426 }
9427
9428 pub(crate) fn daily_stats_is_known_healthy_for_archive_fingerprint(
9429 &self,
9430 archive_fingerprint: &str,
9431 ) -> Result<bool> {
9432 Ok(
9433 self.read_daily_stats_health_generation()? == Some(DAILY_STATS_HEALTH_GENERATION)
9434 && self.read_daily_stats_archive_fingerprint()?.as_deref()
9435 == Some(archive_fingerprint),
9436 )
9437 }
9438
9439 pub(crate) fn record_daily_stats_archive_fingerprint(
9440 &self,
9441 archive_fingerprint: &str,
9442 ) -> Result<()> {
9443 self.conn
9444 .execute_compat(
9445 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
9446 fparams![
9447 DAILY_STATS_HEALTH_GENERATION_META_KEY,
9448 DAILY_STATS_HEALTH_GENERATION.to_string()
9449 ],
9450 )
9451 .with_context(|| "recording daily_stats health generation")?;
9452 self.conn
9453 .execute_compat(
9454 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
9455 fparams![DAILY_STATS_HEALTH_META_KEY, archive_fingerprint.to_string()],
9456 )
9457 .with_context(|| "recording daily_stats archive fingerprint")?;
9458 Ok(())
9459 }
9460
9461 fn read_fts_franken_rebuild_generation(&self) -> Result<Option<i64>> {
9462 let value: Option<String> = self
9463 .conn
9464 .query_row_map(
9465 "SELECT value FROM meta WHERE key = ?1",
9466 fparams![FTS_FRANKEN_REBUILD_META_KEY],
9467 |row| row.get_typed(0),
9468 )
9469 .optional()?;
9470 Ok(value.and_then(|v| v.parse::<i64>().ok()))
9471 }
9472
9473 fn read_fts_franken_rebuild_archive_fingerprint(&self) -> Result<Option<String>> {
9474 Ok(self
9475 .conn
9476 .query_row_map(
9477 "SELECT value FROM meta WHERE key = ?1",
9478 fparams![FTS_FRANKEN_REBUILD_FINGERPRINT_META_KEY],
9479 |row| row.get_typed(0),
9480 )
9481 .optional()?)
9482 }
9483
9484 fn read_daily_stats_health_generation(&self) -> Result<Option<i64>> {
9485 let value: Option<String> = self
9486 .conn
9487 .query_row_map(
9488 "SELECT value FROM meta WHERE key = ?1",
9489 fparams![DAILY_STATS_HEALTH_GENERATION_META_KEY],
9490 |row| row.get_typed(0),
9491 )
9492 .optional()?;
9493 Ok(value.and_then(|value| value.parse::<i64>().ok()))
9494 }
9495
9496 fn read_daily_stats_archive_fingerprint(&self) -> Result<Option<String>> {
9497 Ok(self
9498 .conn
9499 .query_row_map(
9500 "SELECT value FROM meta WHERE key = ?1",
9501 fparams![DAILY_STATS_HEALTH_META_KEY],
9502 |row| row.get_typed(0),
9503 )
9504 .optional()?)
9505 }
9506
9507 fn record_fts_franken_rebuild_generation(&self) -> Result<()> {
9508 self.conn
9509 .execute_compat(
9510 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
9511 fparams![
9512 FTS_FRANKEN_REBUILD_META_KEY,
9513 FTS_FRANKEN_REBUILD_GENERATION.to_string()
9514 ],
9515 )
9516 .with_context(|| "recording frankensqlite FTS rebuild generation")?;
9517 Ok(())
9518 }
9519
9520 fn ensure_fts_consistency_via_frankensqlite(&self) -> Result<FtsConsistencyRepair> {
9521 if self.read_fts_franken_rebuild_generation()? != Some(FTS_FRANKEN_REBUILD_GENERATION) {
9522 let fts_already_healthy = (|| -> Result<bool> {
9527 let fts_exists: i64 = self.conn.query_row_map(
9528 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
9529 fparams![],
9530 |row| row.get_typed(0),
9531 )?;
9532 if fts_exists != 1 {
9533 return Ok(false);
9534 }
9535 let total: i64 = self.conn.query_row_map(
9536 "SELECT COUNT(*) FROM messages",
9537 fparams![],
9538 |row| row.get_typed(0),
9539 )?;
9540 if total == 0 {
9541 return Ok(false);
9542 }
9543 let indexed: i64 = self.conn.query_row_map(
9544 "SELECT COUNT(*) FROM fts_messages",
9545 fparams![],
9546 |row| row.get_typed(0),
9547 )?;
9548 Ok(indexed > 0 && indexed * 100 >= total * 90)
9550 })()
9551 .unwrap_or(false);
9552
9553 if fts_already_healthy {
9554 tracing::info!(
9555 target: "cass::fts_rebuild",
9556 "FTS already populated and consistent; setting generation marker without rebuild"
9557 );
9558 self.record_fts_franken_rebuild_generation()?;
9559 self.set_fts_messages_present_cache(true);
9560 } else {
9561 let inserted_rows = self.rebuild_fts_via_frankensqlite()?;
9562 self.record_fts_franken_rebuild_generation()?;
9563 return Ok(FtsConsistencyRepair::Rebuilt { inserted_rows });
9564 }
9565 }
9566
9567 let inspection = (|| -> Result<(i64, bool)> {
9568 let fts_schema_rows = self.conn.query_row_map(
9569 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
9570 fparams![],
9571 |row| row.get_typed::<i64>(0),
9572 )?;
9573 let fts_queryable = fts_schema_rows == 1
9574 && self
9575 .conn
9576 .query("SELECT rowid FROM fts_messages LIMIT 1")
9577 .is_ok();
9578 Ok((fts_schema_rows, fts_queryable))
9579 })();
9580
9581 let (fts_schema_rows, fts_queryable) = match inspection {
9582 Ok(result) => result,
9583 Err(err) => {
9584 tracing::warn!(
9585 error = %err,
9586 "frankensqlite FTS consistency probe failed; rebuilding authoritative FTS"
9587 );
9588 let inserted_rows = self.rebuild_fts_via_frankensqlite()?;
9589 self.record_fts_franken_rebuild_generation()?;
9590 return Ok(FtsConsistencyRepair::Rebuilt { inserted_rows });
9591 }
9592 };
9593
9594 if fts_schema_rows != 1 || !fts_queryable {
9595 let inserted_rows = self.rebuild_fts_via_frankensqlite()?;
9596 self.record_fts_franken_rebuild_generation()?;
9597 return Ok(FtsConsistencyRepair::Rebuilt { inserted_rows });
9598 }
9599
9600 let total_messages =
9601 self.conn
9602 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
9603 row.get_typed::<i64>(0)
9604 })?;
9605 let indexed_messages =
9606 self.conn
9607 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
9608 row.get_typed::<i64>(0)
9609 })?;
9610
9611 if indexed_messages == total_messages {
9612 self.set_fts_messages_present_cache(true);
9613 return Ok(FtsConsistencyRepair::AlreadyHealthy {
9614 rows: usize::try_from(total_messages.max(0)).unwrap_or(usize::MAX),
9615 });
9616 }
9617
9618 if indexed_messages > total_messages {
9619 let inserted_rows = self.rebuild_fts_via_frankensqlite()?;
9620 self.record_fts_franken_rebuild_generation()?;
9621 return Ok(FtsConsistencyRepair::Rebuilt { inserted_rows });
9622 }
9623
9624 let inserted_rows = self
9625 .stream_fts_rows_via_frankensqlite(true)
9626 .with_context(|| "incrementally repairing missing FTS rows via frankensqlite")?;
9627 let repaired_rows =
9628 self.conn
9629 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
9630 row.get_typed::<i64>(0)
9631 })?;
9632 if repaired_rows == total_messages {
9633 self.set_fts_messages_present_cache(true);
9634 return Ok(FtsConsistencyRepair::IncrementalCatchUp {
9635 inserted_rows,
9636 total_rows: usize::try_from(repaired_rows.max(0)).unwrap_or(usize::MAX),
9637 });
9638 }
9639
9640 if inserted_rows == 0 {
9648 tracing::debug!(
9649 target: "cass::fts_rebuild",
9650 indexed_messages = repaired_rows,
9651 total_messages,
9652 un_indexable_gap = total_messages.saturating_sub(repaired_rows),
9653 "FTS catch-up inserted 0 rows; remaining gap is un-indexable (likely orphaned messages with dangling conversation_id)"
9654 );
9655 self.set_fts_messages_present_cache(true);
9656 return Ok(FtsConsistencyRepair::IncrementalCatchUp {
9657 inserted_rows: 0,
9658 total_rows: usize::try_from(repaired_rows.max(0)).unwrap_or(usize::MAX),
9659 });
9660 }
9661
9662 let inserted_rows = self.rebuild_fts_via_frankensqlite()?;
9665 self.record_fts_franken_rebuild_generation()?;
9666 Ok(FtsConsistencyRepair::Rebuilt { inserted_rows })
9667 }
9668
9669 pub(crate) fn rebuild_fts_via_frankensqlite(&self) -> Result<usize> {
9670 self.invalidate_fts_messages_present_cache();
9671 self.conn
9672 .execute("DROP TABLE IF EXISTS fts_messages;")
9673 .with_context(|| "dropping derived fts_messages before frankensqlite rebuild")?;
9674 self.conn
9675 .execute_compat(FTS5_REGISTER_SQL, fparams![])
9676 .with_context(|| "creating derived fts_messages via frankensqlite rebuild")?;
9677 self.set_fts_messages_present_cache(true);
9678
9679 self.stream_fts_rows_via_frankensqlite(false)
9680 }
9681
9682 fn stream_fts_rows_via_frankensqlite(&self, missing_only: bool) -> Result<usize> {
9683 let batch_size = fts_rebuild_batch_size().max(1);
9684 let batch_limit = i64::try_from(batch_size).unwrap_or(i64::MAX);
9685 let mut total_inserted: usize = 0;
9686 let mut total_skipped_orphans: usize = 0;
9687 let mut total_skipped_existing: usize = 0;
9688 let mut last_rowid: i64 = 0;
9689 let conversation_by_id = self.load_fts_conversation_projection_map()?;
9690 let agent_slug_by_id = self.load_fts_agent_slug_map()?;
9691 let workspace_path_by_id = self.load_fts_workspace_path_map()?;
9692 let existing_fts_rowids = if missing_only {
9693 Some(self.load_fts_message_rowid_set()?)
9694 } else {
9695 None
9696 };
9697 let mut entries = Vec::new();
9698 let mut pending_chars = 0usize;
9699
9700 loop {
9701 let rows = self.fetch_fts_rebuild_message_rows(last_rowid, batch_limit)?;
9702 let fetched_count = rows.len();
9703 if fetched_count == 0 {
9704 break;
9705 }
9706
9707 let inserted_before_batch = total_inserted;
9708 let skipped_before_batch = total_skipped_orphans;
9709 let existing_before_batch = total_skipped_existing;
9710
9711 for row in rows {
9712 last_rowid = row.rowid;
9713 if existing_fts_rowids
9714 .as_ref()
9715 .is_some_and(|rowids| rowids.contains(&row.message_id))
9716 {
9717 total_skipped_existing = total_skipped_existing.saturating_add(1);
9718 continue;
9719 }
9720 let Some(conversation) = conversation_by_id.get(&row.conversation_id) else {
9721 total_skipped_orphans = total_skipped_orphans.saturating_add(1);
9722 continue;
9723 };
9724 let agent = conversation
9725 .agent_id
9726 .and_then(|agent_id| agent_slug_by_id.get(&agent_id))
9727 .filter(|slug| !slug.is_empty())
9728 .cloned()
9729 .unwrap_or_else(|| "unknown".to_string());
9730 let workspace = conversation
9731 .workspace_id
9732 .and_then(|workspace_id| workspace_path_by_id.get(&workspace_id))
9733 .cloned()
9734 .unwrap_or_default();
9735 pending_chars = pending_chars.saturating_add(row.content.len());
9736 entries.push(FtsEntry {
9737 content: row.content,
9738 title: conversation.title.clone(),
9739 agent,
9740 workspace,
9741 source_path: conversation.source_path.clone(),
9742 created_at: row.created_at,
9743 message_id: row.message_id,
9744 });
9745 if entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
9746 || pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
9747 {
9748 total_inserted = total_inserted.saturating_add(
9749 franken_batch_insert_fts_on_connection(&self.conn, &entries)?,
9750 );
9751 entries.clear();
9752 pending_chars = 0;
9753 }
9754 }
9755
9756 if !entries.is_empty() {
9757 total_inserted = total_inserted.saturating_add(
9758 franken_batch_insert_fts_on_connection(&self.conn, &entries)?,
9759 );
9760 entries.clear();
9761 pending_chars = 0;
9762 }
9763
9764 tracing::debug!(
9765 target: "cass::fts_rebuild",
9766 batch_rows = fetched_count,
9767 batch_inserted = total_inserted.saturating_sub(inserted_before_batch),
9768 batch_skipped_orphans = total_skipped_orphans.saturating_sub(skipped_before_batch),
9769 batch_skipped_existing = total_skipped_existing.saturating_sub(existing_before_batch),
9770 total_inserted,
9771 total_skipped_orphans,
9772 total_skipped_existing,
9773 last_rowid,
9774 missing_only,
9775 "FTS streaming maintenance batch complete"
9776 );
9777
9778 if fetched_count < batch_size {
9779 break;
9780 }
9781 }
9782
9783 Ok(total_inserted)
9784 }
9785
9786 fn fetch_fts_rebuild_message_rows(
9787 &self,
9788 last_rowid: i64,
9789 batch_limit: i64,
9790 ) -> Result<Vec<FtsRebuildMessageRow>> {
9791 self.conn
9792 .query_map_collect(
9793 "SELECT m.rowid, m.id, m.conversation_id, m.content, m.created_at
9794 FROM messages m
9795 WHERE m.rowid > ?1
9796 ORDER BY m.rowid
9797 LIMIT ?2",
9798 fparams![last_rowid, batch_limit],
9799 |row| {
9800 Ok(FtsRebuildMessageRow {
9801 rowid: row.get_typed(0)?,
9802 message_id: row.get_typed(1)?,
9803 conversation_id: row.get_typed(2)?,
9804 content: row.get_typed::<Option<String>>(3)?.unwrap_or_default(),
9805 created_at: row.get_typed(4)?,
9806 })
9807 },
9808 )
9809 .with_context(|| format!("fetching FTS maintenance messages after rowid {last_rowid}"))
9810 }
9811
9812 fn load_fts_message_rowid_set(&self) -> Result<HashSet<i64>> {
9813 let rows: Vec<i64> = self
9814 .conn
9815 .query_map_collect("SELECT rowid FROM fts_messages", fparams![], |row| {
9816 row.get_typed(0)
9817 })
9818 .with_context(|| "loading existing FTS message rowids")?;
9819 Ok(rows.into_iter().collect())
9820 }
9821
9822 fn load_fts_conversation_projection_map(
9823 &self,
9824 ) -> Result<HashMap<i64, FtsConversationProjection>> {
9825 let rows: Vec<(i64, FtsConversationProjection)> = self
9826 .conn
9827 .query_map_collect(
9828 "SELECT id, title, agent_id, workspace_id, source_path
9829 FROM conversations",
9830 fparams![],
9831 |row| {
9832 Ok((
9833 row.get_typed(0)?,
9834 FtsConversationProjection {
9835 title: row.get_typed::<Option<String>>(1)?.unwrap_or_default(),
9836 agent_id: row.get_typed(2)?,
9837 workspace_id: row.get_typed(3)?,
9838 source_path: row.get_typed::<Option<String>>(4)?.unwrap_or_default(),
9839 },
9840 ))
9841 },
9842 )
9843 .with_context(|| "loading FTS conversation projection map")?;
9844 Ok(rows.into_iter().collect())
9845 }
9846
9847 fn load_fts_agent_slug_map(&self) -> Result<HashMap<i64, String>> {
9848 let rows: Vec<(i64, String)> = self
9849 .conn
9850 .query_map_collect("SELECT id, slug FROM agents", fparams![], |row| {
9851 Ok((
9852 row.get_typed(0)?,
9853 row.get_typed::<Option<String>>(1)?
9854 .unwrap_or_else(|| "unknown".to_string()),
9855 ))
9856 })
9857 .with_context(|| "loading FTS agent slug map")?;
9858 Ok(rows.into_iter().collect())
9859 }
9860
9861 fn load_fts_workspace_path_map(&self) -> Result<HashMap<i64, String>> {
9862 let rows: Vec<(i64, String)> = self
9863 .conn
9864 .query_map_collect("SELECT id, path FROM workspaces", fparams![], |row| {
9865 Ok((
9866 row.get_typed(0)?,
9867 row.get_typed::<Option<String>>(1)?.unwrap_or_default(),
9868 ))
9869 })
9870 .with_context(|| "loading FTS workspace path map")?;
9871 Ok(rows.into_iter().collect())
9872 }
9873
9874 pub fn fetch_messages_for_embedding(&self) -> Result<Vec<MessageForEmbedding>> {
9876 self.conn
9881 .query_map_collect(
9882 "SELECT m.id, m.created_at, COALESCE(c.agent_id, 0), c.workspace_id, c.source_id, m.role, m.content
9883 FROM messages m
9884 JOIN conversations c ON m.conversation_id = c.id
9885 ORDER BY m.id",
9886 fparams![],
9887 |row| {
9888 let source_id: String = row.get_typed::<Option<String>>(4)?
9889 .unwrap_or_else(|| "local".to_string());
9890 Ok(MessageForEmbedding {
9891 message_id: row.get_typed(0)?,
9892 created_at: row.get_typed(1)?,
9893 agent_id: row.get_typed(2)?,
9894 workspace_id: row.get_typed(3)?,
9895 source_id_hash: crc32fast::hash(source_id.as_bytes()),
9896 role: row.get_typed(5)?,
9897 content: row.get_typed(6)?,
9898 })
9899 },
9900 )
9901 .with_context(|| "fetching messages for embedding")
9902 }
9903
9904 pub fn get_last_embedded_message_id(&self) -> Result<Option<i64>> {
9906 let result: Result<String, _> = self.conn.query_row_map(
9907 "SELECT value FROM meta WHERE key = 'last_embedded_message_id'",
9908 fparams![],
9909 |row| row.get_typed(0),
9910 );
9911 match result.optional() {
9912 Ok(Some(s)) => Ok(s.parse().ok()),
9913 Ok(None) => Ok(None),
9914 Err(e) => Err(e.into()),
9915 }
9916 }
9917
9918 pub fn set_last_embedded_message_id(&self, id: i64) -> Result<()> {
9920 self.conn.execute_compat(
9921 "INSERT OR REPLACE INTO meta(key, value) VALUES('last_embedded_message_id', ?1)",
9922 fparams![id.to_string()],
9923 )?;
9924 Ok(())
9925 }
9926
9927 pub fn get_embedding_jobs(&self, db_path: &str) -> Result<Vec<EmbeddingJobRow>> {
9929 self.conn
9930 .query_map_collect(
9931 "SELECT id, db_path, model_id, status, total_docs, completed_docs, error_message, created_at, started_at, completed_at
9932 FROM embedding_jobs WHERE db_path = ?1 ORDER BY id DESC",
9933 fparams![db_path],
9934 |row| {
9935 Ok(EmbeddingJobRow {
9936 id: row.get_typed(0)?,
9937 db_path: row.get_typed(1)?,
9938 model_id: row.get_typed(2)?,
9939 status: row.get_typed(3)?,
9940 total_docs: row.get_typed(4)?,
9941 completed_docs: row.get_typed(5)?,
9942 error_message: row.get_typed(6)?,
9943 created_at: row.get_typed(7)?,
9944 started_at: row.get_typed(8)?,
9945 completed_at: row.get_typed(9)?,
9946 })
9947 },
9948 )
9949 .with_context(|| format!("fetching embedding jobs for {db_path}"))
9950 }
9951
9952 pub fn upsert_embedding_job(
9954 &self,
9955 db_path: &str,
9956 model_id: &str,
9957 total_docs: i64,
9958 ) -> Result<i64> {
9959 let updated = self.conn.execute_compat(
9960 "UPDATE embedding_jobs
9961 SET total_docs = ?3
9962 WHERE db_path = ?1 AND model_id = ?2 AND status IN ('pending', 'running')",
9963 fparams![db_path, model_id, total_docs],
9964 )?;
9965 if updated == 0 {
9966 let insert_result = self.conn.execute_compat(
9967 "INSERT INTO embedding_jobs(db_path, model_id, total_docs) VALUES(?1,?2,?3)",
9968 fparams![db_path, model_id, total_docs],
9969 );
9970 if let Err(err) = insert_result {
9971 if !matches!(err, frankensqlite::FrankenError::UniqueViolation { .. }) {
9972 return Err(err.into());
9973 }
9974 self.conn.execute_compat(
9975 "UPDATE embedding_jobs
9976 SET total_docs = ?3
9977 WHERE db_path = ?1 AND model_id = ?2 AND status IN ('pending', 'running')",
9978 fparams![db_path, model_id, total_docs],
9979 )?;
9980 }
9981 }
9982 self.conn
9983 .query_row_map(
9984 "SELECT id FROM embedding_jobs
9985 WHERE db_path = ?1 AND model_id = ?2 AND status IN ('pending', 'running')
9986 ORDER BY id DESC
9987 LIMIT 1",
9988 fparams![db_path, model_id],
9989 |row| row.get_typed(0),
9990 )
9991 .with_context(|| "resolving embedding job id after upsert")
9992 }
9993
9994 pub fn start_embedding_job(&self, job_id: i64) -> Result<()> {
9996 self.conn.execute_compat(
9997 "UPDATE embedding_jobs SET status = 'running', started_at = datetime('now') WHERE id = ?1",
9998 fparams![job_id],
9999 )?;
10000 Ok(())
10001 }
10002
10003 pub fn complete_embedding_job(&self, job_id: i64) -> Result<()> {
10005 self.conn.execute_compat(
10006 "UPDATE embedding_jobs SET status = 'completed', completed_at = datetime('now') WHERE id = ?1",
10007 fparams![job_id],
10008 )?;
10009 Ok(())
10010 }
10011
10012 pub fn fail_embedding_job(&self, job_id: i64, error: &str) -> Result<()> {
10014 self.conn.execute_compat(
10015 "UPDATE embedding_jobs SET status = 'failed', error_message = ?2, completed_at = datetime('now') WHERE id = ?1",
10016 fparams![job_id, error],
10017 )?;
10018 Ok(())
10019 }
10020
10021 pub fn cancel_embedding_jobs(&self, db_path: &str, model_id: Option<&str>) -> Result<usize> {
10023 if let Some(mid) = model_id {
10024 Ok(self.conn.execute_compat(
10025 "UPDATE embedding_jobs SET status = 'cancelled' WHERE db_path = ?1 AND model_id = ?2 AND status IN ('pending', 'running')",
10026 fparams![db_path, mid],
10027 )?)
10028 } else {
10029 Ok(self.conn.execute_compat(
10030 "UPDATE embedding_jobs SET status = 'cancelled' WHERE db_path = ?1 AND status IN ('pending', 'running')",
10031 fparams![db_path],
10032 )?)
10033 }
10034 }
10035
10036 pub fn update_job_progress(&self, job_id: i64, completed_docs: i64) -> Result<()> {
10038 self.conn.execute_compat(
10039 "UPDATE embedding_jobs SET completed_docs = ?2 WHERE id = ?1",
10040 fparams![job_id, completed_docs],
10041 )?;
10042 Ok(())
10043 }
10044
10045 pub fn count_sessions_in_range(
10054 &self,
10055 start_ts_ms: Option<i64>,
10056 end_ts_ms: Option<i64>,
10057 agent_slug: Option<&str>,
10058 source_id: Option<&str>,
10059 ) -> Result<(i64, bool)> {
10060 let agent = agent_slug.unwrap_or("all");
10061 let source = source_id.unwrap_or("all");
10062
10063 let stats_count: i64 = self
10065 .conn
10066 .query_row_map("SELECT COUNT(*) FROM daily_stats", fparams![], |row| {
10067 row.get_typed(0)
10068 })
10069 .unwrap_or(0);
10070
10071 if stats_count == 0 {
10072 return self.count_sessions_direct(start_ts_ms, end_ts_ms, agent_slug, source_id);
10073 }
10074
10075 let start_day = start_ts_ms.map(Self::day_id_from_millis);
10077 let end_day = end_ts_ms.map(Self::day_id_from_millis);
10078
10079 let count: i64 = match (start_day, end_day) {
10080 (Some(start), Some(end)) => self.conn.query_row_map(
10081 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats
10082 WHERE day_id BETWEEN ?1 AND ?2 AND agent_slug = ?3 AND source_id = ?4",
10083 fparams![start, end, agent, source],
10084 |row| row.get_typed(0),
10085 )?,
10086 (Some(start), None) => self.conn.query_row_map(
10087 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats
10088 WHERE day_id >= ?1 AND agent_slug = ?2 AND source_id = ?3",
10089 fparams![start, agent, source],
10090 |row| row.get_typed(0),
10091 )?,
10092 (None, Some(end)) => self.conn.query_row_map(
10093 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats
10094 WHERE day_id <= ?1 AND agent_slug = ?2 AND source_id = ?3",
10095 fparams![end, agent, source],
10096 |row| row.get_typed(0),
10097 )?,
10098 (None, None) => self.conn.query_row_map(
10099 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats
10100 WHERE agent_slug = ?1 AND source_id = ?2",
10101 fparams![agent, source],
10102 |row| row.get_typed(0),
10103 )?,
10104 };
10105
10106 Ok((count, true))
10107 }
10108
10109 fn count_sessions_direct(
10111 &self,
10112 start_ts_ms: Option<i64>,
10113 end_ts_ms: Option<i64>,
10114 agent_slug: Option<&str>,
10115 source_id: Option<&str>,
10116 ) -> Result<(i64, bool)> {
10117 let mut sql = "SELECT COUNT(*) FROM conversations c WHERE 1=1".to_string();
10124 let mut param_values: Vec<ParamValue> = Vec::new();
10125 let mut idx = 1;
10126
10127 if let Some(start) = start_ts_ms {
10128 sql.push_str(&format!(" AND c.started_at >= ?{idx}"));
10129 param_values.push(ParamValue::from(start));
10130 idx += 1;
10131 }
10132 if let Some(end) = end_ts_ms {
10133 sql.push_str(&format!(" AND c.started_at <= ?{idx}"));
10134 param_values.push(ParamValue::from(end));
10135 idx += 1;
10136 }
10137 if let Some(agent) = agent_slug
10138 && agent != "all"
10139 {
10140 sql.push_str(&format!(
10141 " AND EXISTS (SELECT 1 FROM agents a WHERE a.id = c.agent_id AND a.slug = ?{idx})"
10142 ));
10143 param_values.push(ParamValue::from(agent));
10144 idx += 1;
10145 }
10146 if let Some(source) = source_id
10147 && source != "all"
10148 {
10149 sql.push_str(&format!(" AND c.source_id = ?{idx}"));
10150 param_values.push(ParamValue::from(source));
10151 let _ = idx; }
10153
10154 let count: i64 = self
10155 .conn
10156 .query_row_map(&sql, ¶m_values, |row| row.get_typed(0))?;
10157 Ok((count, false))
10158 }
10159
10160 pub fn get_daily_histogram(
10162 &self,
10163 start_ts_ms: i64,
10164 end_ts_ms: i64,
10165 agent_slug: Option<&str>,
10166 source_id: Option<&str>,
10167 ) -> Result<Vec<DailyCount>> {
10168 let start_day = Self::day_id_from_millis(start_ts_ms);
10169 let end_day = Self::day_id_from_millis(end_ts_ms);
10170 let agent = agent_slug.unwrap_or("all");
10171 let source = source_id.unwrap_or("all");
10172
10173 let rows = self.conn.query_map_collect(
10174 "SELECT day_id, session_count, message_count, total_chars
10175 FROM daily_stats
10176 WHERE day_id BETWEEN ?1 AND ?2 AND agent_slug = ?3 AND source_id = ?4
10177 ORDER BY day_id",
10178 fparams![start_day, end_day, agent, source],
10179 |row| {
10180 Ok(DailyCount {
10181 day_id: row.get_typed(0)?,
10182 sessions: row.get_typed(1)?,
10183 messages: row.get_typed(2)?,
10184 chars: row.get_typed(3)?,
10185 })
10186 },
10187 )?;
10188
10189 Ok(rows)
10190 }
10191
10192 pub fn daily_stats_health(&self) -> Result<DailyStatsHealth> {
10194 let row_count: i64 =
10195 self.conn
10196 .query_row_map("SELECT COUNT(*) FROM daily_stats", fparams![], |row| {
10197 row.get_typed(0)
10198 })?;
10199
10200 let oldest_update: Option<i64> = self.conn.query_row_map(
10201 "SELECT MIN(last_updated) FROM daily_stats",
10202 fparams![],
10203 |row| row.get_typed(0),
10204 )?;
10205
10206 let conversation_count: i64 =
10207 self.conn
10208 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
10209 row.get_typed(0)
10210 })?;
10211
10212 let materialized_total: i64 = self.conn.query_row_map(
10213 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats
10214 WHERE agent_slug = 'all' AND source_id = 'all'",
10215 fparams![],
10216 |row| row.get_typed(0),
10217 )?;
10218
10219 Ok(DailyStatsHealth {
10220 populated: row_count > 0,
10221 row_count,
10222 oldest_update_ms: oldest_update,
10223 conversation_count,
10224 materialized_total,
10225 drift: (conversation_count - materialized_total).abs(),
10226 })
10227 }
10228
10229 pub fn insert_conversations_batched(
10233 &self,
10234 conversations: &[(i64, Option<i64>, &Conversation)],
10235 ) -> Result<Vec<InsertOutcome>> {
10236 if conversations.is_empty() {
10237 return Ok(Vec::new());
10238 }
10239
10240 self.ensure_sources_for_batch(conversations)?;
10241
10242 let defer_lexical_updates = defer_storage_lexical_updates_enabled();
10243 let defer_analytics_updates = defer_analytics_updates_enabled();
10244
10245 let pricing_table = PricingTable::franken_load(&self.conn).unwrap_or_else(|e| {
10246 tracing::warn!(target: "cass::analytics::pricing", error = %e, "failed to load pricing table");
10247 PricingTable { entries: Vec::new() }
10248 });
10249 let mut pricing_diag = PricingDiagnostics::default();
10250
10251 let mut tx = self.conn.transaction()?;
10252
10253 ensure_agents_in_tx(&tx, conversations)?;
10260 ensure_workspaces_in_tx(&tx, conversations)?;
10261 ensure_sources_in_tx(&tx, conversations)?;
10262
10263 let mut outcomes = Vec::with_capacity(conversations.len());
10264 let mut fts_entries = Vec::new();
10265 let mut fts_pending_chars = 0usize;
10266 let mut fts_inserted_total = 0usize;
10267 let mut fts_count_total = 0usize;
10268 let mut stats = StatsAggregator::new();
10269 let mut token_stats = TokenStatsAggregator::new();
10270 let mut token_entries: Vec<TokenUsageEntry> = Vec::new();
10271 let mut metrics_entries: Vec<MessageMetricsEntry> = Vec::new();
10272 let mut rollup_agg = AnalyticsRollupAggregator::new();
10273 let mut conv_ids_to_summarize: Vec<i64> = Vec::new();
10274 let mut pending_conversation_ids: HashMap<PendingConversationKey, i64> = HashMap::new();
10275 let mut pending_message_fingerprints: HashMap<i64, HashMap<i64, MessageMergeFingerprint>> =
10276 HashMap::new();
10277 let mut pending_message_replay_fingerprints: HashMap<
10278 i64,
10279 HashSet<MessageReplayFingerprint>,
10280 > = HashMap::new();
10281
10282 for &(agent_id, workspace_id, raw_conv) in conversations {
10283 let normalized_conv = normalized_conversation_for_storage(raw_conv);
10284 let conv = normalized_conv.as_ref();
10285 let mut total_chars: i64 = 0;
10286 let mut inserted_indices = Vec::with_capacity(conv.messages.len());
10287 let mut inserted_messages: Vec<(i64, &Message)> =
10288 Vec::with_capacity(conv.messages.len());
10289 let mut session_count_delta = 1_i64;
10290 let conversation_key = conversation_merge_key(agent_id, conv);
10291
10292 let existing_conv_id = if let Some(existing_id) =
10293 pending_conversation_ids.get(&conversation_key)
10294 {
10295 Some(*existing_id)
10296 } else {
10297 let existing_id =
10298 franken_find_existing_conversation_by_key(&tx, &conversation_key, Some(conv))?;
10299 if let Some(existing_id) = existing_id {
10300 pending_conversation_ids.insert(conversation_key.clone(), existing_id);
10301 }
10302 existing_id
10303 };
10304
10305 let conv_id = if let Some(existing_id) = existing_conv_id {
10306 session_count_delta = 0;
10307 let ExistingMessageLookup {
10308 by_idx: mut existing_messages,
10309 replay: mut existing_replay_fingerprints,
10310 } = franken_existing_message_lookup_with_pending(
10311 &tx,
10312 existing_id,
10313 &conv.messages,
10314 &mut pending_message_fingerprints,
10315 &mut pending_message_replay_fingerprints,
10316 )?;
10317 let ExistingConversationNewMessages {
10318 messages: new_messages,
10319 new_chars,
10320 idx_collision_count,
10321 first_collision_idx,
10322 } = collect_new_messages_for_existing_conversation(
10323 existing_id,
10324 conv,
10325 &mut existing_messages,
10326 &mut existing_replay_fingerprints,
10327 "skipping replay-equivalent recovered message with shifted idx during batched merge",
10328 );
10329 let (inserted_last_idx, inserted_last_created_at) =
10330 borrowed_messages_tail_state(&new_messages);
10331 let inserted_message_ids =
10332 franken_append_insert_new_messages(&tx, existing_id, &new_messages)?;
10333 total_chars += new_chars;
10334 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
10335 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
10336 if !defer_lexical_updates {
10337 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
10338 fts_count_total += 1;
10339 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
10340 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
10341 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
10342 {
10343 flush_pending_fts_entries(
10344 self,
10345 &tx,
10346 &mut fts_entries,
10347 &mut fts_pending_chars,
10348 &mut fts_inserted_total,
10349 )?;
10350 }
10351 }
10352 inserted_indices.push(msg.idx);
10353 inserted_messages.push((msg_id, msg));
10354 }
10355
10356 if idx_collision_count > 0 {
10357 tracing::warn!(
10358 conversation_id = existing_id,
10359 collision_count = idx_collision_count,
10360 first_idx = first_collision_idx,
10361 source_path = %conv.source_path.display(),
10362 "message idx collisions encountered during batched conversation merge; retaining canonical message variants"
10363 );
10364 }
10365
10366 let conv_last_ts = conv.messages.iter().filter_map(|m| m.created_at).max();
10367 franken_update_conversation_tail_state(
10368 &tx,
10369 existing_id,
10370 conv_last_ts,
10371 inserted_last_idx,
10372 inserted_last_created_at,
10373 )?;
10374 if let Some(lookup_key) = conversation_external_lookup_key_for_conv(agent_id, conv)
10375 {
10376 franken_update_external_conversation_tail_lookup_key(
10377 &tx,
10378 &lookup_key,
10379 conv_last_ts,
10380 inserted_last_idx,
10381 inserted_last_created_at,
10382 )?;
10383 }
10384
10385 pending_message_fingerprints.insert(existing_id, existing_messages);
10386 pending_message_replay_fingerprints
10387 .insert(existing_id, existing_replay_fingerprints);
10388
10389 existing_id
10390 } else {
10391 match franken_insert_conversation_or_get_existing(
10392 &tx,
10393 agent_id,
10394 workspace_id,
10395 conv,
10396 )? {
10397 ConversationInsertStatus::Inserted(new_conv_id) => {
10398 pending_conversation_ids.insert(conversation_key.clone(), new_conv_id);
10399 let pending_messages =
10400 pending_message_fingerprints.entry(new_conv_id).or_default();
10401 let pending_replay_fingerprints = pending_message_replay_fingerprints
10402 .entry(new_conv_id)
10403 .or_default();
10404 let mut new_messages = Vec::new();
10405 for msg in &conv.messages {
10406 let incoming_replay = message_replay_fingerprint(msg);
10407 if pending_messages.contains_key(&msg.idx)
10408 || pending_replay_fingerprints.contains(&incoming_replay)
10409 {
10410 continue;
10411 }
10412 pending_messages.insert(msg.idx, message_merge_fingerprint(msg));
10413 pending_replay_fingerprints.insert(incoming_replay);
10414 new_messages.push(msg);
10415 }
10416 let inserted_message_ids =
10417 franken_batch_insert_new_messages(&tx, new_conv_id, &new_messages)?;
10418 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
10419 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
10420 if !defer_lexical_updates {
10421 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
10422 fts_count_total += 1;
10423 fts_pending_chars =
10424 fts_pending_chars.saturating_add(msg.content.len());
10425 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
10426 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
10427 {
10428 flush_pending_fts_entries(
10429 self,
10430 &tx,
10431 &mut fts_entries,
10432 &mut fts_pending_chars,
10433 &mut fts_inserted_total,
10434 )?;
10435 }
10436 }
10437 total_chars += msg.content.len() as i64;
10438 inserted_indices.push(msg.idx);
10439 inserted_messages.push((msg_id, msg));
10440 }
10441 new_conv_id
10442 }
10443 ConversationInsertStatus::Existing(existing_id) => {
10444 session_count_delta = 0;
10445 pending_conversation_ids.insert(conversation_key.clone(), existing_id);
10446 let ExistingMessageLookup {
10447 by_idx: mut existing_messages,
10448 replay: mut existing_replay_fingerprints,
10449 } = franken_existing_message_lookup_with_pending(
10450 &tx,
10451 existing_id,
10452 &conv.messages,
10453 &mut pending_message_fingerprints,
10454 &mut pending_message_replay_fingerprints,
10455 )?;
10456 let ExistingConversationNewMessages {
10457 messages: new_messages,
10458 new_chars,
10459 idx_collision_count,
10460 first_collision_idx,
10461 } = collect_new_messages_for_existing_conversation(
10462 existing_id,
10463 conv,
10464 &mut existing_messages,
10465 &mut existing_replay_fingerprints,
10466 "skipping replay-equivalent recovered message with shifted idx after duplicate conversation recovery",
10467 );
10468 let (inserted_last_idx, inserted_last_created_at) =
10469 borrowed_messages_tail_state(&new_messages);
10470 let inserted_message_ids =
10471 franken_append_insert_new_messages(&tx, existing_id, &new_messages)?;
10472 total_chars += new_chars;
10473 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
10474 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
10475 if !defer_lexical_updates {
10476 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
10477 fts_count_total += 1;
10478 fts_pending_chars =
10479 fts_pending_chars.saturating_add(msg.content.len());
10480 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
10481 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
10482 {
10483 flush_pending_fts_entries(
10484 self,
10485 &tx,
10486 &mut fts_entries,
10487 &mut fts_pending_chars,
10488 &mut fts_inserted_total,
10489 )?;
10490 }
10491 }
10492 inserted_indices.push(msg.idx);
10493 inserted_messages.push((msg_id, msg));
10494 }
10495
10496 if idx_collision_count > 0 {
10497 tracing::warn!(
10498 conversation_id = existing_id,
10499 collision_count = idx_collision_count,
10500 first_idx = first_collision_idx,
10501 source_path = %conv.source_path.display(),
10502 "message idx collisions encountered after duplicate conversation recovery; retaining canonical message variants"
10503 );
10504 }
10505
10506 let conv_last_ts = conv.messages.iter().filter_map(|m| m.created_at).max();
10507 franken_update_conversation_tail_state(
10508 &tx,
10509 existing_id,
10510 conv_last_ts,
10511 inserted_last_idx,
10512 inserted_last_created_at,
10513 )?;
10514 if let Some(lookup_key) =
10515 conversation_external_lookup_key_for_conv(agent_id, conv)
10516 {
10517 franken_update_external_conversation_tail_lookup_key(
10518 &tx,
10519 &lookup_key,
10520 conv_last_ts,
10521 inserted_last_idx,
10522 inserted_last_created_at,
10523 )?;
10524 }
10525
10526 pending_message_fingerprints.insert(existing_id, existing_messages);
10527 pending_message_replay_fingerprints
10528 .insert(existing_id, existing_replay_fingerprints);
10529
10530 existing_id
10531 }
10532 }
10533 };
10534
10535 if !defer_analytics_updates {
10536 let delta = StatsDelta {
10537 session_count_delta,
10538 message_count_delta: inserted_messages.len() as i64,
10539 total_chars_delta: total_chars,
10540 };
10541
10542 let effective_started_at = conversation_effective_started_at(conv);
10543 let day_id = effective_started_at
10544 .map(FrankenStorage::day_id_from_millis)
10545 .unwrap_or(0);
10546 stats.record_delta(
10547 &conv.agent_slug,
10548 &conv.source_id,
10549 day_id,
10550 delta.session_count_delta,
10551 delta.message_count_delta,
10552 delta.total_chars_delta,
10553 );
10554
10555 let conv_day_id = day_id;
10556 let mut session_model_family = String::from("unknown");
10557 let mut has_any_tokens = false;
10558
10559 for &(message_id, msg) in &inserted_messages {
10560 let role_s = role_str(&msg.role);
10561 let usage = if historical_raw_json(&msg.extra_json).is_some() {
10562 crate::connectors::extract_tokens_for_agent(
10563 &conv.agent_slug,
10564 &serde_json::Value::Null,
10565 &msg.content,
10566 &role_s,
10567 )
10568 } else {
10569 crate::connectors::extract_tokens_for_agent(
10570 &conv.agent_slug,
10571 &msg.extra_json,
10572 &msg.content,
10573 &role_s,
10574 )
10575 };
10576
10577 let msg_ts = msg
10578 .created_at
10579 .or(conversation_effective_started_at(conv))
10580 .unwrap_or(0);
10581 let msg_day_id = if msg_ts > 0 {
10582 FrankenStorage::day_id_from_millis(msg_ts)
10583 } else {
10584 conv_day_id
10585 };
10586
10587 let model_info = usage
10588 .model_name
10589 .as_deref()
10590 .map(crate::connectors::normalize_model);
10591
10592 let model_family = model_info
10593 .as_ref()
10594 .map(|i| i.family.clone())
10595 .unwrap_or_else(|| "unknown".into());
10596 let model_tier = model_info
10597 .as_ref()
10598 .map(|i| i.tier.clone())
10599 .unwrap_or_else(|| "unknown".into());
10600 let provider = usage
10601 .provider
10602 .clone()
10603 .or_else(|| model_info.as_ref().map(|i| i.provider.clone()))
10604 .unwrap_or_else(|| "unknown".into());
10605
10606 if model_family != "unknown" {
10607 session_model_family = model_family.clone();
10608 }
10609
10610 let estimated_cost = pricing_table.compute_cost(
10611 usage.model_name.as_deref(),
10612 msg_day_id,
10613 usage.input_tokens,
10614 usage.output_tokens,
10615 usage.cache_read_tokens,
10616 usage.cache_creation_tokens,
10617 );
10618 if estimated_cost.is_some() {
10619 pricing_diag.record_priced();
10620 } else if usage.has_token_data() {
10621 pricing_diag.record_unpriced(usage.model_name.as_deref());
10622 }
10623
10624 token_stats.record(
10625 &conv.agent_slug,
10626 &conv.source_id,
10627 msg_day_id,
10628 &model_family,
10629 &role_s,
10630 &usage,
10631 msg.content.len() as i64,
10632 estimated_cost.unwrap_or(0.0),
10633 );
10634
10635 if usage.has_token_data() {
10636 has_any_tokens = true;
10637 }
10638
10639 let content_chars = msg.content.len() as i64;
10640 let content_tokens_est = content_chars / 4;
10641 let msg_hour_id = FrankenStorage::hour_id_from_millis(msg_ts);
10642 let has_plan = has_plan_for_role(&role_s, &msg.content);
10643
10644 token_entries.push(TokenUsageEntry {
10645 message_id,
10646 conversation_id: conv_id,
10647 agent_id,
10648 workspace_id,
10649 source_id: conv.source_id.clone(),
10650 timestamp_ms: msg_ts,
10651 day_id: msg_day_id,
10652 model_name: usage.model_name.clone(),
10653 model_family: Some(model_family.clone()),
10654 model_tier: Some(model_tier.clone()),
10655 service_tier: usage.service_tier.clone(),
10656 provider: Some(provider.clone()),
10657 input_tokens: usage.input_tokens,
10658 output_tokens: usage.output_tokens,
10659 cache_read_tokens: usage.cache_read_tokens,
10660 cache_creation_tokens: usage.cache_creation_tokens,
10661 thinking_tokens: usage.thinking_tokens,
10662 total_tokens: usage.total_tokens(),
10663 estimated_cost_usd: estimated_cost,
10664 role: role_s.to_string(),
10665 content_chars,
10666 has_tool_calls: usage.has_tool_calls,
10667 tool_call_count: usage.tool_call_count,
10668 data_source: usage.data_source.as_str().to_string(),
10669 });
10670
10671 let mm = MessageMetricsEntry {
10672 message_id,
10673 created_at_ms: msg_ts,
10674 hour_id: msg_hour_id,
10675 day_id: msg_day_id,
10676 agent_slug: conv.agent_slug.clone(),
10677 workspace_id: workspace_id.unwrap_or(0),
10678 source_id: conv.source_id.clone(),
10679 role: role_s.to_string(),
10680 content_chars,
10681 content_tokens_est,
10682 model_name: usage.model_name.clone(),
10683 model_family: model_family.clone(),
10684 model_tier: model_tier.clone(),
10685 provider,
10686 api_input_tokens: usage.input_tokens,
10687 api_output_tokens: usage.output_tokens,
10688 api_cache_read_tokens: usage.cache_read_tokens,
10689 api_cache_creation_tokens: usage.cache_creation_tokens,
10690 api_thinking_tokens: usage.thinking_tokens,
10691 api_service_tier: usage.service_tier.clone(),
10692 api_data_source: usage.data_source.as_str().to_string(),
10693 tool_call_count: usage.tool_call_count as i64,
10694 has_tool_calls: usage.has_tool_calls,
10695 has_plan,
10696 };
10697 rollup_agg.record(&mm);
10698 metrics_entries.push(mm);
10699 }
10700
10701 if session_count_delta > 0 {
10702 token_stats.record_session(
10703 &conv.agent_slug,
10704 &conv.source_id,
10705 conv_day_id,
10706 &session_model_family,
10707 );
10708 }
10709
10710 if has_any_tokens {
10711 conv_ids_to_summarize.push(conv_id);
10712 }
10713 }
10714
10715 outcomes.push(InsertOutcome {
10716 conversation_id: conv_id,
10717 conversation_inserted: session_count_delta > 0,
10718 inserted_indices,
10719 });
10720 }
10721
10722 if !defer_lexical_updates {
10724 flush_pending_fts_entries(
10725 self,
10726 &tx,
10727 &mut fts_entries,
10728 &mut fts_pending_chars,
10729 &mut fts_inserted_total,
10730 )?;
10731 }
10732 if !defer_lexical_updates && fts_count_total > 0 {
10733 tracing::debug!(
10734 target: "cass::perf::fts5",
10735 total = fts_count_total,
10736 inserted = fts_inserted_total,
10737 conversations = conversations.len(),
10738 "franken_batch_fts_insert_complete"
10739 );
10740 }
10741
10742 if !defer_analytics_updates && !stats.is_empty() {
10744 let entries = stats.expand();
10745 let affected = franken_update_daily_stats_batched_in_tx(&tx, &entries)?;
10746 tracing::debug!(
10747 target: "cass::perf::daily_stats",
10748 raw = stats.raw_entry_count(),
10749 expanded = entries.len(),
10750 affected = affected,
10751 "franken_batched_stats_update_complete"
10752 );
10753 }
10754
10755 if !defer_analytics_updates && !token_entries.is_empty() {
10757 let token_count = token_entries.len();
10758 let inserted = franken_insert_token_usage_batched_in_tx(&tx, &token_entries)?;
10759 tracing::debug!(
10760 target: "cass::perf::token_usage",
10761 total = token_count,
10762 inserted = inserted,
10763 "franken_batch_token_usage_insert_complete"
10764 );
10765 }
10766
10767 if !defer_analytics_updates && !token_stats.is_empty() {
10769 let entries = token_stats.expand();
10770 let affected = franken_update_token_daily_stats_batched_in_tx(&tx, &entries)?;
10771 tracing::debug!(
10772 target: "cass::perf::token_daily_stats",
10773 raw = token_stats.raw_entry_count(),
10774 expanded = entries.len(),
10775 affected = affected,
10776 "franken_batched_token_stats_update_complete"
10777 );
10778 }
10779
10780 if !defer_analytics_updates && !metrics_entries.is_empty() {
10782 let mm_count = metrics_entries.len();
10783 let inserted = franken_insert_message_metrics_batched_in_tx(&tx, &metrics_entries)?;
10784 tracing::debug!(
10785 target: "cass::perf::message_metrics",
10786 total = mm_count,
10787 inserted = inserted,
10788 "franken_batch_message_metrics_insert_complete"
10789 );
10790 }
10791
10792 if !defer_analytics_updates && !rollup_agg.is_empty() {
10794 let (hourly, daily, models_daily) =
10795 franken_flush_analytics_rollups_in_tx(&tx, &rollup_agg)?;
10796 tracing::debug!(
10797 target: "cass::perf::usage_rollups",
10798 hourly_buckets = rollup_agg.hourly_entry_count(),
10799 daily_buckets = rollup_agg.daily_entry_count(),
10800 models_daily_buckets = rollup_agg.models_daily_entry_count(),
10801 hourly_affected = hourly,
10802 daily_affected = daily,
10803 models_daily_affected = models_daily,
10804 "franken_batched_usage_rollups_complete"
10805 );
10806 }
10807
10808 if !defer_analytics_updates {
10810 for conv_id in &conv_ids_to_summarize {
10811 franken_update_conversation_token_summaries_in_tx(&tx, *conv_id)?;
10812 }
10813 }
10814
10815 tx.commit()?;
10816
10817 pricing_diag.log_summary();
10818
10819 Ok(outcomes)
10820 }
10821}
10822
10823fn normalized_storage_source_parts(
10824 source_id: Option<&str>,
10825 origin_kind: Option<&str>,
10826 origin_host: Option<&str>,
10827) -> (String, SourceKind, Option<String>) {
10828 let host_label = crate::search::tantivy::normalized_index_origin_host(origin_host);
10829 let source_id = crate::search::tantivy::normalized_index_source_id(
10830 source_id,
10831 origin_kind,
10832 host_label.as_deref(),
10833 );
10834
10835 if source_id == LOCAL_SOURCE_ID {
10836 (source_id, SourceKind::Local, None)
10837 } else {
10838 (source_id, SourceKind::Ssh, host_label)
10839 }
10840}
10841
10842fn normalized_source_for_conversation(conv: &Conversation) -> Source {
10843 let (id, kind, host_label) = normalized_storage_source_parts(
10844 Some(conv.source_id.as_str()),
10845 None,
10846 conv.origin_host.as_deref(),
10847 );
10848 Source {
10849 id,
10850 kind,
10851 host_label,
10852 machine_id: None,
10853 platform: None,
10854 config_json: None,
10855 created_at: None,
10856 updated_at: None,
10857 }
10858}
10859
10860fn is_bootstrap_local_source(source: &Source) -> bool {
10861 source.id == LOCAL_SOURCE_ID
10862 && matches!(source.kind, SourceKind::Local)
10863 && source.host_label.is_none()
10864 && source.machine_id.is_none()
10865 && source.platform.is_none()
10866 && source.config_json.is_none()
10867 && source.created_at.is_none()
10868 && source.updated_at.is_none()
10869}
10870
10871fn normalized_conversation_for_storage<'a>(conv: &'a Conversation) -> Cow<'a, Conversation> {
10872 let normalized_source = normalized_source_for_conversation(conv);
10873 if normalized_source.id == conv.source_id && normalized_source.host_label == conv.origin_host {
10874 Cow::Borrowed(conv)
10875 } else {
10876 let mut normalized = conv.clone();
10877 normalized.source_id = normalized_source.id;
10878 normalized.origin_host = normalized_source.host_label;
10879 Cow::Owned(normalized)
10880 }
10881}
10882
10883impl FrankenStorage {
10884 fn ensure_source_for_conversation(&self, conv: &Conversation) -> Result<()> {
10885 let source = normalized_source_for_conversation(conv);
10886 if is_bootstrap_local_source(&source) {
10887 return Ok(());
10890 }
10891 let cache_key = EnsuredConversationSourceKey::from_source(&source);
10892 if self.conversation_source_already_ensured(&cache_key) {
10893 return Ok(());
10894 }
10895 self.upsert_source(&source)?;
10896 self.mark_conversation_source_ensured(cache_key);
10897 Ok(())
10898 }
10899
10900 fn ensure_sources_for_batch(
10901 &self,
10902 conversations: &[(i64, Option<i64>, &Conversation)],
10903 ) -> Result<()> {
10904 let mut seen = HashSet::with_capacity(conversations.len());
10905 for &(_, _, conv) in conversations {
10906 let source = normalized_source_for_conversation(conv);
10907 if seen.insert(source.id.clone()) {
10908 if is_bootstrap_local_source(&source) {
10909 continue;
10910 }
10911 self.upsert_source(&source)?;
10912 self.mark_conversation_source_ensured(EnsuredConversationSourceKey::from_source(
10913 &source,
10914 ));
10915 }
10916 }
10917 Ok(())
10918 }
10919}
10920
10921fn franken_last_rowid(tx: &FrankenTransaction<'_>) -> Result<i64> {
10927 tx.last_insert_rowid()
10928 .ok()
10929 .filter(|&id| id > 0)
10930 .with_context(|| "last_insert_rowid() returned NULL or 0 after INSERT")
10931}
10932
10933fn ensure_agents_in_tx(
10939 tx: &FrankenTransaction<'_>,
10940 conversations: &[(i64, Option<i64>, &Conversation)],
10941) -> Result<()> {
10942 let mut seen = HashSet::new();
10943 let now = FrankenStorage::now_millis();
10944 for &(agent_id, _, conv) in conversations {
10945 if !seen.insert(agent_id) {
10946 continue;
10947 }
10948 let exists: i64 = tx.query_row_map(
10949 "SELECT COUNT(*) FROM agents WHERE id = ?1",
10950 fparams![agent_id],
10951 |row| row.get_typed(0),
10952 )?;
10953 if exists == 0 {
10954 tracing::debug!(
10955 target: "cass::fk_guard",
10956 agent_id,
10957 slug = %conv.agent_slug,
10958 "inserting agent row inside transaction to satisfy FK constraint"
10959 );
10960 tx.execute_compat(
10964 "INSERT OR IGNORE INTO agents(id, slug, name, kind, created_at, updated_at)
10965 VALUES(?1, ?2, ?3, 'cli', ?4, ?5)",
10966 fparams![
10967 agent_id,
10968 conv.agent_slug.as_str(),
10969 conv.agent_slug.as_str(),
10970 now,
10971 now
10972 ],
10973 )?;
10974 }
10975 }
10976 Ok(())
10977}
10978
10979fn ensure_workspaces_in_tx(
10982 tx: &FrankenTransaction<'_>,
10983 conversations: &[(i64, Option<i64>, &Conversation)],
10984) -> Result<()> {
10985 let mut seen = HashSet::new();
10986 for &(_, workspace_id, conv) in conversations {
10987 let ws_id = match workspace_id {
10988 Some(id) => id,
10989 None => continue,
10990 };
10991 if !seen.insert(ws_id) {
10992 continue;
10993 }
10994 let exists: i64 = tx.query_row_map(
10995 "SELECT COUNT(*) FROM workspaces WHERE id = ?1",
10996 fparams![ws_id],
10997 |row| row.get_typed(0),
10998 )?;
10999 if exists == 0 {
11000 let path_str = conv
11001 .workspace
11002 .as_ref()
11003 .map(|p| p.to_string_lossy().to_string())
11004 .unwrap_or_default();
11005 tracing::debug!(
11006 target: "cass::fk_guard",
11007 workspace_id = ws_id,
11008 path = %path_str,
11009 "inserting workspace row inside transaction to satisfy FK constraint"
11010 );
11011 tx.execute_compat(
11012 "INSERT OR IGNORE INTO workspaces(id, path) VALUES(?1, ?2)",
11013 fparams![ws_id, path_str.as_str()],
11014 )?;
11015 }
11016 }
11017 Ok(())
11018}
11019
11020fn ensure_sources_in_tx(
11024 tx: &FrankenTransaction<'_>,
11025 conversations: &[(i64, Option<i64>, &Conversation)],
11026) -> Result<()> {
11027 let mut seen = HashSet::new();
11028 for &(_, _, conv) in conversations {
11029 let (source_id, source_kind, host_label) = normalized_storage_source_parts(
11030 Some(conv.source_id.as_str()),
11031 None,
11032 conv.origin_host.as_deref(),
11033 );
11034 if !seen.insert(source_id.clone()) {
11035 continue;
11036 }
11037 let exists: i64 = tx.query_row_map(
11038 "SELECT COUNT(*) FROM sources WHERE id = ?1",
11039 fparams![source_id.as_str()],
11040 |row| row.get_typed(0),
11041 )?;
11042 if exists == 0 {
11043 let kind_str = source_kind.to_string();
11044 let now = FrankenStorage::now_millis();
11045 tracing::debug!(
11046 target: "cass::fk_guard",
11047 source_id = %source_id,
11048 kind = kind_str.as_str(),
11049 "inserting source row inside transaction to satisfy FK constraint"
11050 );
11051 tx.execute_compat(
11052 "INSERT OR IGNORE INTO sources(id, kind, host_label, created_at, updated_at)
11053 VALUES(?1, ?2, ?3, ?4, ?5)",
11054 fparams![
11055 source_id.as_str(),
11056 kind_str.as_str(),
11057 host_label.as_deref(),
11058 now,
11059 now
11060 ],
11061 )?;
11062 }
11063 }
11064 Ok(())
11065}
11066
11067fn env_flag_enabled(name: &str) -> bool {
11068 dotenvy::var(name)
11069 .map(|v| !(v == "0" || v.eq_ignore_ascii_case("false")))
11070 .unwrap_or(false)
11071}
11072
11073fn defer_storage_lexical_updates_enabled() -> bool {
11074 env_flag_enabled("CASS_DEFER_LEXICAL_UPDATES")
11075}
11076
11077fn defer_analytics_updates_enabled() -> bool {
11078 env_flag_enabled("CASS_DEFER_ANALYTICS_UPDATES")
11079}
11080
11081enum ConversationInsertStatus {
11082 Inserted(i64),
11083 Existing(i64),
11084}
11085
11086fn franken_find_external_conversation_tail_lookup(
11087 tx: &FrankenTransaction<'_>,
11088 lookup_key: &str,
11089) -> Result<Option<ExistingConversationWithTail>> {
11090 let params = [SqliteValue::from(lookup_key)];
11091 let row = tx
11092 .query_row_with_params(
11093 "SELECT conversation_id, ended_at, last_message_idx, last_message_created_at
11094 FROM conversation_external_tail_lookup
11095 WHERE lookup_key = ?1",
11096 ¶ms,
11097 )
11098 .optional()?;
11099 let Some(row) = row else {
11100 return Ok(None);
11101 };
11102 let id = row.get_typed(0)?;
11103 let ended_at = row.get_typed(1)?;
11104 let last_message_idx = row.get_typed(2)?;
11105 let last_message_created_at = row.get_typed(3)?;
11106 Ok(Some(ExistingConversationWithTail {
11107 id,
11108 tail_state: existing_conversation_tail_state_from_cached(
11109 last_message_idx,
11110 last_message_created_at,
11111 ended_at,
11112 ),
11113 }))
11114}
11115
11116fn franken_find_external_conversation_lookup(
11117 tx: &FrankenTransaction<'_>,
11118 lookup_key: &str,
11119) -> Result<Option<i64>> {
11120 Ok(franken_find_external_conversation_tail_lookup(tx, lookup_key)?.map(|existing| existing.id))
11121}
11122
11123fn franken_insert_external_conversation_tail_lookup_key(
11124 tx: &FrankenTransaction<'_>,
11125 lookup_key: &str,
11126 conversation_id: i64,
11127 ended_at: Option<i64>,
11128 last_message_idx: Option<i64>,
11129 last_message_created_at: Option<i64>,
11130) -> Result<()> {
11131 let params = [
11132 SqliteValue::from(lookup_key),
11133 SqliteValue::from(conversation_id),
11134 SqliteValue::from(ended_at),
11135 SqliteValue::from(last_message_idx),
11136 SqliteValue::from(last_message_created_at),
11137 ];
11138 tx.execute_with_params(
11139 "INSERT OR REPLACE INTO conversation_external_tail_lookup(
11140 lookup_key, conversation_id, ended_at, last_message_idx, last_message_created_at
11141 ) VALUES(?1, ?2, ?3, ?4, ?5)",
11142 ¶ms,
11143 )?;
11144 Ok(())
11145}
11146
11147fn franken_insert_external_conversation_tail_lookup(
11148 tx: &FrankenTransaction<'_>,
11149 source_id: &str,
11150 agent_id: i64,
11151 external_id: &str,
11152 existing: ExistingConversationWithTail,
11153) -> Result<()> {
11154 let lookup_key = conversation_external_lookup_key(source_id, agent_id, external_id);
11155 let ended_at = existing.tail_state.and_then(|state| state.ended_at);
11156 let last_message_idx = existing.tail_state.map(|state| state.last_message_idx);
11157 let last_message_created_at = existing
11158 .tail_state
11159 .map(|state| state.last_message_created_at);
11160 franken_insert_external_conversation_tail_lookup_key(
11161 tx,
11162 &lookup_key,
11163 existing.id,
11164 ended_at,
11165 last_message_idx,
11166 last_message_created_at,
11167 )
11168}
11169
11170fn franken_update_external_conversation_tail_lookup_key(
11171 tx: &FrankenTransaction<'_>,
11172 lookup_key: &str,
11173 ended_at_candidate: Option<i64>,
11174 last_message_idx_candidate: Option<i64>,
11175 last_message_created_at_candidate: Option<i64>,
11176) -> Result<()> {
11177 if ended_at_candidate.is_none()
11178 && last_message_idx_candidate.is_none()
11179 && last_message_created_at_candidate.is_none()
11180 {
11181 return Ok(());
11182 }
11183 tx.execute_compat(
11184 "UPDATE conversation_external_tail_lookup
11185 SET ended_at = CASE
11186 WHEN ?1 IS NULL THEN ended_at
11187 ELSE MAX(IFNULL(ended_at, 0), ?1)
11188 END,
11189 last_message_idx = CASE
11190 WHEN ?2 IS NULL THEN last_message_idx
11191 WHEN last_message_idx IS NULL OR last_message_idx < ?2 THEN ?2
11192 ELSE last_message_idx
11193 END,
11194 last_message_created_at = CASE
11195 WHEN ?3 IS NULL THEN last_message_created_at
11196 WHEN last_message_created_at IS NULL OR last_message_created_at < ?3 THEN ?3
11197 ELSE last_message_created_at
11198 END
11199 WHERE lookup_key = ?4",
11200 fparams![
11201 ended_at_candidate,
11202 last_message_idx_candidate,
11203 last_message_created_at_candidate,
11204 lookup_key
11205 ],
11206 )?;
11207 Ok(())
11208}
11209
11210fn franken_set_external_conversation_tail_lookup_after_append(
11211 tx: &FrankenTransaction<'_>,
11212 lookup_key: &str,
11213 ended_at: i64,
11214 last_message_idx: i64,
11215 last_message_created_at: i64,
11216) -> Result<()> {
11217 tx.execute_compat(
11218 "UPDATE conversation_external_tail_lookup
11219 SET ended_at = ?1,
11220 last_message_idx = ?2,
11221 last_message_created_at = ?3
11222 WHERE lookup_key = ?4",
11223 fparams![
11224 ended_at,
11225 last_message_idx,
11226 last_message_created_at,
11227 lookup_key
11228 ],
11229 )?;
11230 Ok(())
11231}
11232
11233fn franken_update_external_conversation_tail_after_append(
11234 tx: &FrankenTransaction<'_>,
11235 agent_id: i64,
11236 conv: &Conversation,
11237 used_append_tail_plan: bool,
11238 exact_append_set: bool,
11239 inserted_last_idx: Option<i64>,
11240 inserted_last_created_at: Option<i64>,
11241) -> Result<()> {
11242 let Some(lookup_key) = conversation_external_lookup_key_for_conv(agent_id, conv) else {
11243 return Ok(());
11244 };
11245
11246 if exact_append_set
11247 && let (Some(last_message_idx), Some(last_message_created_at)) =
11248 (inserted_last_idx, inserted_last_created_at)
11249 {
11250 return franken_set_external_conversation_tail_lookup_after_append(
11251 tx,
11252 &lookup_key,
11253 last_message_created_at,
11254 last_message_idx,
11255 last_message_created_at,
11256 );
11257 }
11258
11259 let ended_at_candidate = if used_append_tail_plan {
11260 inserted_last_created_at
11261 } else {
11262 conv.messages.iter().filter_map(|m| m.created_at).max()
11263 };
11264 franken_update_external_conversation_tail_lookup_key(
11265 tx,
11266 &lookup_key,
11267 ended_at_candidate,
11268 inserted_last_idx,
11269 inserted_last_created_at,
11270 )
11271}
11272
11273fn franken_find_existing_conversation_by_key(
11274 tx: &FrankenTransaction<'_>,
11275 key: &PendingConversationKey,
11276 conv: Option<&Conversation>,
11277) -> Result<Option<i64>> {
11278 franken_find_existing_conversation_by_key_impl(tx, key, conv, false)
11279}
11280
11281fn franken_find_existing_conversation_by_key_after_conflict(
11282 tx: &FrankenTransaction<'_>,
11283 key: &PendingConversationKey,
11284 conv: Option<&Conversation>,
11285) -> Result<Option<i64>> {
11286 franken_find_existing_conversation_by_key_impl(tx, key, conv, true)
11287}
11288
11289fn franken_find_existing_conversation_by_key_impl(
11290 tx: &FrankenTransaction<'_>,
11291 key: &PendingConversationKey,
11292 conv: Option<&Conversation>,
11293 allow_legacy_external_scan: bool,
11294) -> Result<Option<i64>> {
11295 match key {
11296 PendingConversationKey::External {
11297 source_id,
11298 agent_id,
11299 external_id,
11300 } => {
11301 let lookup_key = conversation_external_lookup_key(source_id, *agent_id, external_id);
11302 if let Some(existing_id) = franken_find_external_conversation_lookup(tx, &lookup_key)? {
11303 return Ok(Some(existing_id));
11304 }
11305 if !allow_legacy_external_scan {
11306 return Ok(None);
11307 }
11308
11309 let existing_id = tx
11310 .query_row_map(
11311 "SELECT id
11312 FROM conversations
11313 WHERE source_id = ?1 AND agent_id = ?2 AND external_id = ?3",
11314 fparams![source_id.as_str(), *agent_id, external_id.as_str()],
11315 |row| row.get_typed(0),
11316 )
11317 .optional()?;
11318 if let Some(existing_id) = existing_id {
11319 let tail_state = franken_existing_conversation_append_tail_state(tx, existing_id)?;
11320 franken_insert_external_conversation_tail_lookup_key(
11321 tx,
11322 &lookup_key,
11323 existing_id,
11324 tail_state.and_then(|state| state.ended_at),
11325 tail_state.map(|state| state.last_message_idx),
11326 tail_state.map(|state| state.last_message_created_at),
11327 )?;
11328 Ok(Some(existing_id))
11329 } else {
11330 Ok(None)
11331 }
11332 }
11333 PendingConversationKey::SourcePath {
11334 source_id,
11335 agent_id,
11336 source_path,
11337 started_at,
11338 } => {
11339 let exact_match = tx
11340 .query_row_map(
11341 "SELECT c.id
11342 FROM conversations c
11343 WHERE c.source_id = ?1
11344 AND c.agent_id = ?2
11345 AND c.source_path = ?3
11346 AND ((
11347 COALESCE(
11348 c.started_at,
11349 (SELECT MIN(created_at)
11350 FROM messages
11351 WHERE conversation_id = c.id
11352 AND created_at IS NOT NULL)
11353 ) IS NULL
11354 AND ?4 IS NULL
11355 ) OR COALESCE(
11356 c.started_at,
11357 (SELECT MIN(created_at)
11358 FROM messages
11359 WHERE conversation_id = c.id
11360 AND created_at IS NOT NULL)
11361 ) = ?4)
11362 ORDER BY c.id
11363 LIMIT 1",
11364 fparams![
11365 source_id.as_str(),
11366 *agent_id,
11367 source_path.as_str(),
11368 *started_at
11369 ],
11370 |row| row.get_typed(0),
11371 )
11372 .optional()?;
11373 if exact_match.is_some() {
11374 return Ok(exact_match);
11375 }
11376
11377 let Some(conv) = conv else {
11378 return Ok(None);
11379 };
11380 let incoming_fingerprints = conversation_message_fingerprints(conv);
11381 if incoming_fingerprints.is_empty() {
11382 return Ok(None);
11383 }
11384 let incoming_replay_fingerprints = conversation_message_replay_fingerprints(conv);
11385
11386 let candidates: Vec<(i64, Option<i64>)> = tx.query_map_collect(
11387 "SELECT
11388 c.id,
11389 COALESCE(
11390 c.started_at,
11391 (SELECT MIN(created_at)
11392 FROM messages
11393 WHERE conversation_id = c.id
11394 AND created_at IS NOT NULL)
11395 ) AS effective_started_at
11396 FROM conversations c
11397 WHERE c.source_id = ?1
11398 AND c.agent_id = ?2
11399 AND c.source_path = ?3
11400 ORDER BY c.id",
11401 fparams![source_id.as_str(), *agent_id, source_path.as_str()],
11402 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
11403 )?;
11404
11405 let mut best_candidate: Option<(i64, ConversationMergeEvidence)> = None;
11406 for (candidate_id, candidate_started_at) in candidates {
11407 let existing_fingerprints =
11408 franken_existing_message_fingerprints(tx, candidate_id)?;
11409 let existing_replay_fingerprints =
11410 replay_fingerprints_from_merge_set(&existing_fingerprints);
11411 let Some(evidence) = conversation_merge_evidence(
11412 &incoming_fingerprints,
11413 &incoming_replay_fingerprints,
11414 &existing_fingerprints,
11415 &existing_replay_fingerprints,
11416 *started_at,
11417 candidate_started_at,
11418 ) else {
11419 continue;
11420 };
11421
11422 let candidate_key = (
11423 evidence.exact_overlap,
11424 evidence.replay_overlap,
11425 evidence.started_close,
11426 evidence.smaller_replay_set,
11427 std::cmp::Reverse(evidence.start_distance_ms),
11428 );
11429 let should_replace = best_candidate
11430 .as_ref()
11431 .map(|(_, best_evidence)| {
11432 candidate_key
11433 > (
11434 best_evidence.exact_overlap,
11435 best_evidence.replay_overlap,
11436 best_evidence.started_close,
11437 best_evidence.smaller_replay_set,
11438 std::cmp::Reverse(best_evidence.start_distance_ms),
11439 )
11440 })
11441 .unwrap_or(true);
11442
11443 if should_replace {
11444 best_candidate = Some((candidate_id, evidence));
11445 }
11446 }
11447
11448 Ok(best_candidate.map(|(candidate_id, _)| candidate_id))
11449 }
11450 }
11451}
11452
11453fn franken_insert_conversation_or_get_existing(
11454 tx: &FrankenTransaction<'_>,
11455 agent_id: i64,
11456 workspace_id: Option<i64>,
11457 conv: &Conversation,
11458) -> Result<ConversationInsertStatus> {
11459 let conversation_key = conversation_merge_key(agent_id, conv);
11460 if let Some(existing_id) =
11461 franken_find_existing_conversation_by_key(tx, &conversation_key, Some(conv))?
11462 {
11463 return Ok(ConversationInsertStatus::Existing(existing_id));
11464 }
11465
11466 franken_insert_conversation_or_get_existing_after_miss(
11467 tx,
11468 agent_id,
11469 workspace_id,
11470 conv,
11471 &conversation_key,
11472 )
11473}
11474
11475fn franken_insert_conversation_or_get_existing_after_miss(
11476 tx: &FrankenTransaction<'_>,
11477 agent_id: i64,
11478 workspace_id: Option<i64>,
11479 conv: &Conversation,
11480 conversation_key: &PendingConversationKey,
11481) -> Result<ConversationInsertStatus> {
11482 match franken_insert_conversation(tx, agent_id, workspace_id, conv) {
11483 Ok(Some(conv_id)) => Ok(ConversationInsertStatus::Inserted(conv_id)),
11484 Ok(None) => {
11485 let existing_id =
11488 franken_find_existing_conversation_by_key_after_conflict(
11489 tx,
11490 conversation_key,
11491 Some(conv),
11492 )?
11493 .with_context(|| {
11494 format!(
11495 "conversation INSERT produced a duplicate conflict but existing row was not found for source_id={} agent_id={} external_id={:?} source_path={}",
11496 conv.source_id,
11497 agent_id,
11498 conv.external_id,
11499 conv.source_path.display()
11500 )
11501 })?;
11502 tracing::warn!(
11503 source_id = %conv.source_id,
11504 agent_id,
11505 external_id = ?conv.external_id,
11506 existing_id,
11507 source_path = %conv.source_path.display(),
11508 "conversation INSERT: duplicate gracefully recovered, reusing existing row"
11509 );
11510 Ok(ConversationInsertStatus::Existing(existing_id))
11511 }
11512 Err(error) => {
11513 tracing::error!(
11514 source_id = %conv.source_id,
11515 agent_id,
11516 external_id = ?conv.external_id,
11517 error = %error,
11518 source_path = %conv.source_path.display(),
11519 "franken_insert_conversation failed"
11520 );
11521 Err(error)
11522 }
11523 }
11524}
11525
11526fn franken_insert_conversation(
11532 tx: &FrankenTransaction<'_>,
11533 agent_id: i64,
11534 workspace_id: Option<i64>,
11535 conv: &Conversation,
11536) -> Result<Option<i64>> {
11537 let (metadata_json_str, metadata_bin) = franken_metadata_insert_payload(&conv.metadata_json)?;
11538 let (last_message_idx, last_message_created_at) = conversation_tail_state(conv);
11539 let metadata_bin_bytes = metadata_bin.as_deref();
11540
11541 match tx.execute_compat(
11542 "INSERT INTO conversations(
11543 agent_id, workspace_id, source_id, external_id, title, source_path,
11544 started_at, ended_at, approx_tokens, metadata_json, origin_host, metadata_bin,
11545 last_message_idx, last_message_created_at
11546 ) VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14)",
11547 fparams![
11548 agent_id,
11549 workspace_id,
11550 conv.source_id.as_str(),
11551 conv.external_id.as_deref(),
11552 conv.title.as_deref(),
11553 path_to_string(&conv.source_path),
11554 conv.started_at,
11555 conv.ended_at,
11556 conv.approx_tokens,
11557 metadata_json_str.as_deref(),
11558 conv.origin_host.as_deref(),
11559 metadata_bin_bytes,
11560 last_message_idx,
11561 last_message_created_at
11562 ],
11563 ) {
11564 Ok(_) => {
11565 let conv_id = franken_last_rowid(tx)?;
11566 franken_insert_conversation_tail_state(
11567 tx,
11568 conv_id,
11569 conv.ended_at,
11570 last_message_idx,
11571 last_message_created_at,
11572 )?;
11573 if let Some(external_id) = conv.external_id.as_deref() {
11574 franken_insert_external_conversation_tail_lookup(
11575 tx,
11576 conv.source_id.as_str(),
11577 agent_id,
11578 external_id,
11579 ExistingConversationWithTail {
11580 id: conv_id,
11581 tail_state: existing_conversation_tail_state_from_cached(
11582 last_message_idx,
11583 last_message_created_at,
11584 conv.ended_at,
11585 ),
11586 },
11587 )?;
11588 }
11589 Ok(Some(conv_id))
11590 }
11591 Err(frankensqlite::FrankenError::UniqueViolation { .. }) => {
11592 tracing::debug!(
11593 source_id = %conv.source_id,
11594 agent_id,
11595 external_id = ?conv.external_id,
11596 source_path = %conv.source_path.display(),
11597 "conversation INSERT: duplicate provenance conflict"
11598 );
11599 Ok(None)
11600 }
11601 Err(error) => Err(error.into()),
11602 }
11603}
11604
11605type MetadataInsertPayload<'a> = (Option<Cow<'a, str>>, Option<Vec<u8>>);
11606
11607fn franken_metadata_insert_payload(value: &serde_json::Value) -> Result<MetadataInsertPayload<'_>> {
11608 if let Some(raw) = historical_raw_json(value) {
11609 Ok((Some(Cow::Borrowed(raw)), None))
11610 } else if value.is_null() {
11611 Ok((Some(Cow::Borrowed("null")), None))
11612 } else if value.as_object().is_some_and(|object| object.is_empty()) {
11613 Ok((None, None))
11614 } else if let Some(metadata_bin) = serialize_json_to_msgpack(value) {
11615 Ok((None, Some(metadata_bin)))
11616 } else {
11617 Ok((Some(Cow::Owned(serde_json::to_string(value)?)), None))
11618 }
11619}
11620
11621fn franken_insert_new_message(
11622 tx: &FrankenTransaction<'_>,
11623 conversation_id: i64,
11624 msg: &Message,
11625) -> Result<i64> {
11626 let (extra_json_str, extra_bin) = franken_message_insert_payload(msg)?;
11627 let extra_bin_bytes = extra_bin.as_deref();
11628
11629 tx.execute_compat(
11630 "INSERT INTO messages(conversation_id, idx, role, author, created_at, content, extra_json, extra_bin)
11631 VALUES(?1,?2,?3,?4,?5,?6,?7,?8)",
11632 fparams![
11633 conversation_id,
11634 msg.idx,
11635 role_as_str(&msg.role),
11636 msg.author.as_deref(),
11637 msg.created_at,
11638 msg.content.as_str(),
11639 extra_json_str.as_deref(),
11640 extra_bin_bytes
11641 ],
11642 )?;
11643 franken_last_rowid(tx)
11644}
11645
11646type MessageInsertPayload<'a> = (Option<Cow<'a, str>>, Option<Vec<u8>>);
11647
11648fn franken_message_insert_payload(msg: &Message) -> Result<MessageInsertPayload<'_>> {
11649 if let Some(raw) = historical_raw_json(&msg.extra_json) {
11650 Ok((Some(Cow::Borrowed(raw)), None))
11651 } else if msg.extra_json.is_null() {
11652 Ok((None, None))
11653 } else {
11654 let extra_bin = serialize_json_to_msgpack(&msg.extra_json);
11655 if extra_bin.is_some() {
11656 Ok((None, extra_bin))
11657 } else {
11658 Ok((
11659 Some(Cow::Owned(serde_json::to_string(&msg.extra_json)?)),
11660 None,
11661 ))
11662 }
11663 }
11664}
11665
11666const MESSAGE_INSERT_BATCH_SIZE: usize = 100;
11671
11672const APPEND_MESSAGE_INSERT_BATCH_SIZE: usize = 50;
11678
11679fn message_insert_batch_sql(row_count: usize) -> &'static str {
11680 static MESSAGE_INSERT_BATCH_SQL: std::sync::OnceLock<Vec<String>> = std::sync::OnceLock::new();
11681
11682 let max_batch_size = MESSAGE_INSERT_BATCH_SIZE.max(APPEND_MESSAGE_INSERT_BATCH_SIZE);
11683 let cached_sql = MESSAGE_INSERT_BATCH_SQL.get_or_init(|| {
11684 let mut sql_by_row_count = Vec::with_capacity(max_batch_size + 1);
11685 sql_by_row_count.push(String::new());
11686 for row_count in 1..=max_batch_size {
11687 let placeholders = (0..row_count)
11688 .map(|idx| {
11689 let base = idx * 8;
11690 format!(
11691 "(?{},?{},?{},?{},?{},?{},?{},?{})",
11692 base + 1,
11693 base + 2,
11694 base + 3,
11695 base + 4,
11696 base + 5,
11697 base + 6,
11698 base + 7,
11699 base + 8
11700 )
11701 })
11702 .collect::<Vec<_>>()
11703 .join(",");
11704 sql_by_row_count.push(format!(
11705 "INSERT INTO messages(conversation_id, idx, role, author, created_at, content, extra_json, extra_bin) VALUES {placeholders}"
11706 ));
11707 }
11708 sql_by_row_count
11709 });
11710
11711 cached_sql
11712 .get(row_count)
11713 .map(String::as_str)
11714 .expect("message insert batch size must be covered by the cached SQL table")
11715}
11716
11717fn franken_batch_insert_new_messages(
11718 tx: &FrankenTransaction<'_>,
11719 conversation_id: i64,
11720 messages: &[&Message],
11721) -> Result<Vec<i64>> {
11722 franken_batch_insert_new_messages_with_batch_size(
11723 tx,
11724 conversation_id,
11725 messages,
11726 MESSAGE_INSERT_BATCH_SIZE,
11727 )
11728}
11729
11730fn franken_append_insert_new_messages(
11731 tx: &FrankenTransaction<'_>,
11732 conversation_id: i64,
11733 messages: &[&Message],
11734) -> Result<Vec<i64>> {
11735 franken_batch_insert_new_messages_with_batch_size(
11736 tx,
11737 conversation_id,
11738 messages,
11739 APPEND_MESSAGE_INSERT_BATCH_SIZE,
11740 )
11741}
11742
11743fn franken_batch_insert_new_messages_with_batch_size(
11744 tx: &FrankenTransaction<'_>,
11745 conversation_id: i64,
11746 messages: &[&Message],
11747 batch_size: usize,
11748) -> Result<Vec<i64>> {
11749 let batch_size = batch_size.max(1);
11750 let mut inserted_ids = Vec::with_capacity(messages.len());
11751 for chunk in messages.chunks(batch_size) {
11752 if chunk.len() == 1 {
11753 inserted_ids.push(franken_insert_new_message(tx, conversation_id, chunk[0])?);
11754 continue;
11755 }
11756 let sql = message_insert_batch_sql(chunk.len());
11757
11758 let mut param_values: Vec<SqliteValue> = Vec::with_capacity(chunk.len() * 8);
11759 for msg in chunk {
11760 let (extra_json_str, extra_bin) = franken_message_insert_payload(msg)?;
11761 param_values.push(SqliteValue::from(conversation_id));
11762 param_values.push(SqliteValue::from(msg.idx));
11763 param_values.push(SqliteValue::from(role_as_str(&msg.role)));
11764 param_values.push(SqliteValue::from(msg.author.as_deref()));
11765 param_values.push(SqliteValue::from(msg.created_at));
11766 param_values.push(SqliteValue::from(msg.content.as_str()));
11767 param_values.push(SqliteValue::from(extra_json_str.as_deref()));
11768 param_values.push(SqliteValue::from(extra_bin.as_deref()));
11769 }
11770
11771 tx.execute_with_params(sql, ¶m_values)?;
11772
11773 let last_id = franken_last_rowid(tx)?;
11774 let first_id = last_id
11775 .checked_sub((chunk.len() - 1) as i64)
11776 .with_context(|| {
11777 format!(
11778 "inferring rowid range for {}-row message batch ending at {last_id}",
11779 chunk.len()
11780 )
11781 })?;
11782 inserted_ids.extend((0..chunk.len()).map(|offset| first_id + offset as i64));
11783 }
11784
11785 Ok(inserted_ids)
11786}
11787
11788#[cfg(test)]
11789fn franken_insert_new_message_with_profile(
11790 tx: &FrankenTransaction<'_>,
11791 conversation_id: i64,
11792 msg: &Message,
11793 profile: &mut MessageInsertSubstageProfile,
11794) -> Result<i64> {
11795 profile.single_row_calls += 1;
11796 profile.batch_rows += 1;
11797
11798 let payload_start = Instant::now();
11799 let (extra_json_str, extra_bin) = franken_message_insert_payload(msg)?;
11800 profile.payload_duration += payload_start.elapsed();
11801 let extra_bin_bytes = extra_bin.as_deref();
11802
11803 let execute_start = Instant::now();
11804 tx.execute_compat(
11805 "INSERT INTO messages(conversation_id, idx, role, author, created_at, content, extra_json, extra_bin)
11806 VALUES(?1,?2,?3,?4,?5,?6,?7,?8)",
11807 fparams![
11808 conversation_id,
11809 msg.idx,
11810 role_as_str(&msg.role),
11811 msg.author.as_deref(),
11812 msg.created_at,
11813 msg.content.as_str(),
11814 extra_json_str.as_deref(),
11815 extra_bin_bytes
11816 ],
11817 )?;
11818 profile.execute_duration += execute_start.elapsed();
11819
11820 let rowid_start = Instant::now();
11821 let rowid = franken_last_rowid(tx)?;
11822 profile.rowid_duration += rowid_start.elapsed();
11823 Ok(rowid)
11824}
11825
11826#[cfg(test)]
11827fn franken_batch_insert_new_messages_with_profile(
11828 tx: &FrankenTransaction<'_>,
11829 conversation_id: i64,
11830 messages: &[&Message],
11831 profile: &mut MessageInsertSubstageProfile,
11832) -> Result<Vec<i64>> {
11833 franken_batch_insert_new_messages_with_profile_batch_size(
11834 tx,
11835 conversation_id,
11836 messages,
11837 profile,
11838 MESSAGE_INSERT_BATCH_SIZE,
11839 )
11840}
11841
11842#[cfg(test)]
11843fn franken_append_insert_new_messages_with_profile(
11844 tx: &FrankenTransaction<'_>,
11845 conversation_id: i64,
11846 messages: &[&Message],
11847 profile: &mut MessageInsertSubstageProfile,
11848) -> Result<Vec<i64>> {
11849 franken_batch_insert_new_messages_with_profile_batch_size(
11850 tx,
11851 conversation_id,
11852 messages,
11853 profile,
11854 APPEND_MESSAGE_INSERT_BATCH_SIZE,
11855 )
11856}
11857
11858#[cfg(test)]
11859fn franken_batch_insert_new_messages_with_profile_batch_size(
11860 tx: &FrankenTransaction<'_>,
11861 conversation_id: i64,
11862 messages: &[&Message],
11863 profile: &mut MessageInsertSubstageProfile,
11864 batch_size: usize,
11865) -> Result<Vec<i64>> {
11866 let batch_size = batch_size.max(1);
11867 let mut inserted_ids = Vec::with_capacity(messages.len());
11868 for chunk in messages.chunks(batch_size) {
11869 if chunk.len() == 1 {
11870 inserted_ids.push(franken_insert_new_message_with_profile(
11871 tx,
11872 conversation_id,
11873 chunk[0],
11874 profile,
11875 )?);
11876 continue;
11877 }
11878
11879 profile.batch_calls += 1;
11880 profile.batch_rows += chunk.len();
11881
11882 let sql_build_start = Instant::now();
11883 let sql = message_insert_batch_sql(chunk.len());
11884 profile.sql_build_duration += sql_build_start.elapsed();
11885
11886 let mut param_values: Vec<SqliteValue> = Vec::with_capacity(chunk.len() * 8);
11887 for msg in chunk {
11888 let payload_start = Instant::now();
11889 let (extra_json_str, extra_bin) = franken_message_insert_payload(msg)?;
11890 profile.payload_duration += payload_start.elapsed();
11891
11892 let param_build_start = Instant::now();
11893 param_values.push(SqliteValue::from(conversation_id));
11894 param_values.push(SqliteValue::from(msg.idx));
11895 param_values.push(SqliteValue::from(role_as_str(&msg.role)));
11896 param_values.push(SqliteValue::from(msg.author.as_deref()));
11897 param_values.push(SqliteValue::from(msg.created_at));
11898 param_values.push(SqliteValue::from(msg.content.as_str()));
11899 param_values.push(SqliteValue::from(extra_json_str.as_deref()));
11900 param_values.push(SqliteValue::from(extra_bin.as_deref()));
11901 profile.param_build_duration += param_build_start.elapsed();
11902 }
11903
11904 let execute_start = Instant::now();
11905 tx.execute_with_params(sql, ¶m_values)?;
11906 profile.execute_duration += execute_start.elapsed();
11907
11908 let rowid_start = Instant::now();
11909 let last_id = franken_last_rowid(tx)?;
11910 let first_id = last_id
11911 .checked_sub((chunk.len() - 1) as i64)
11912 .with_context(|| {
11913 format!(
11914 "inferring rowid range for {}-row message batch ending at {last_id}",
11915 chunk.len()
11916 )
11917 })?;
11918 inserted_ids.extend((0..chunk.len()).map(|offset| first_id + offset as i64));
11919 profile.rowid_duration += rowid_start.elapsed();
11920 }
11921
11922 Ok(inserted_ids)
11923}
11924
11925fn franken_insert_snippets(
11927 tx: &FrankenTransaction<'_>,
11928 message_id: i64,
11929 snippets: &[Snippet],
11930) -> Result<()> {
11931 for snip in snippets {
11932 let file_path_str = snip.file_path.as_ref().map(path_to_string);
11933 tx.execute_compat(
11934 "INSERT INTO snippets(message_id, file_path, start_line, end_line, language, snippet_text)
11935 VALUES(?1,?2,?3,?4,?5,?6)",
11936 fparams![
11937 message_id,
11938 file_path_str.as_deref(),
11939 snip.start_line,
11940 snip.end_line,
11941 snip.language.as_deref(),
11942 snip.snippet_text.as_deref()
11943 ],
11944 )?;
11945 }
11946 Ok(())
11947}
11948
11949fn franken_existing_message_fingerprints(
11950 tx: &FrankenTransaction<'_>,
11951 conversation_id: i64,
11952) -> Result<HashSet<MessageMergeFingerprint>> {
11953 let rows = tx.query_params(
11954 "SELECT idx, role, author, created_at, content
11955 FROM messages
11956 WHERE conversation_id = ?1",
11957 fparams![conversation_id],
11958 )?;
11959 let mut fingerprints = HashSet::with_capacity(rows.len());
11960 for row in rows {
11961 let role: String = row.get_typed(1)?;
11962 let content: String = row.get_typed(4)?;
11963 fingerprints.insert(MessageMergeFingerprint {
11964 idx: row.get_typed(0)?,
11965 created_at: row.get_typed(3)?,
11966 role: role_from_str(&role),
11967 author: row.get_typed(2)?,
11968 content_hash: *blake3::hash(content.as_bytes()).as_bytes(),
11969 });
11970 }
11971 Ok(fingerprints)
11972}
11973
11974struct ExistingMessageLookup {
11975 by_idx: HashMap<i64, MessageMergeFingerprint>,
11976 replay: HashSet<MessageReplayFingerprint>,
11977}
11978
11979fn franken_existing_message_lookup(
11980 tx: &FrankenTransaction<'_>,
11981 conversation_id: i64,
11982 incoming_messages: &[Message],
11983) -> Result<ExistingMessageLookup> {
11984 if incoming_messages.is_empty() {
11985 return Ok(ExistingMessageLookup {
11986 by_idx: HashMap::new(),
11987 replay: HashSet::new(),
11988 });
11989 }
11990
11991 let min_idx = incoming_messages
11992 .iter()
11993 .map(|msg| msg.idx)
11994 .min()
11995 .unwrap_or(0);
11996 let max_idx = incoming_messages
11997 .iter()
11998 .map(|msg| msg.idx)
11999 .max()
12000 .unwrap_or(min_idx);
12001 let requires_full_scan = incoming_messages.iter().any(|msg| msg.created_at.is_none());
12002 let created_bounds = incoming_messages
12003 .iter()
12004 .filter_map(|msg| msg.created_at)
12005 .fold(None, |bounds: Option<(i64, i64)>, created_at| {
12006 Some(match bounds {
12007 Some((min_created_at, max_created_at)) => (
12008 min_created_at.min(created_at),
12009 max_created_at.max(created_at),
12010 ),
12011 None => (created_at, created_at),
12012 })
12013 });
12014
12015 let mut indexed_by_idx = HashMap::with_capacity(incoming_messages.len());
12016 let mut indexed_replay = HashSet::with_capacity(incoming_messages.len());
12017 let mut exact_idx_match = true;
12018 for msg in incoming_messages {
12019 record_message_lookup_exact_idx_probe();
12020 let Some((role, author, created_at, content)) = tx
12021 .query_row_map(
12022 "SELECT role, author, created_at, content
12023 FROM messages INDEXED BY sqlite_autoindex_messages_1
12024 WHERE conversation_id = ?1 AND idx = ?2
12025 LIMIT 1",
12026 fparams![conversation_id, msg.idx],
12027 |row| {
12028 Ok((
12029 row.get_typed::<String>(0)?,
12030 row.get_typed::<Option<String>>(1)?,
12031 row.get_typed::<Option<i64>>(2)?,
12032 row.get_typed::<String>(3)?,
12033 ))
12034 },
12035 )
12036 .optional()?
12037 else {
12038 exact_idx_match = false;
12039 break;
12040 };
12041 let role = role_from_str(&role);
12042 let content_hash = *blake3::hash(content.as_bytes()).as_bytes();
12043 let fingerprint = MessageMergeFingerprint {
12044 idx: msg.idx,
12045 created_at,
12046 role: role.clone(),
12047 author: author.clone(),
12048 content_hash,
12049 };
12050 if fingerprint != message_merge_fingerprint(msg) {
12051 exact_idx_match = false;
12052 break;
12053 }
12054 indexed_by_idx.insert(msg.idx, fingerprint);
12055 indexed_replay.insert(MessageReplayFingerprint {
12056 created_at,
12057 role,
12058 author,
12059 content_hash,
12060 });
12061 }
12062
12063 if exact_idx_match {
12064 return Ok(ExistingMessageLookup {
12065 by_idx: indexed_by_idx,
12066 replay: indexed_replay,
12067 });
12068 }
12069
12070 let (rows, replay_full_scan) = if requires_full_scan {
12071 let rows = tx.query_params(
12072 "SELECT idx, role, author, created_at, content
12073 FROM messages INDEXED BY sqlite_autoindex_messages_1
12074 WHERE conversation_id = ?1",
12075 fparams![conversation_id],
12076 )?;
12077 record_message_lookup_full_scan_query(rows.len());
12078 (rows, true)
12079 } else if let Some((min_created_at, max_created_at)) = created_bounds {
12080 let mut rows = tx.query_params(
12081 "SELECT idx, role, author, created_at, content
12082 FROM messages INDEXED BY sqlite_autoindex_messages_1
12083 WHERE conversation_id = ?1
12084 AND idx >= ?2
12085 AND idx <= ?3",
12086 fparams![conversation_id, min_idx, max_idx],
12087 )?;
12088 rows.extend(tx.query_params(
12089 "SELECT idx, role, author, created_at, content
12090 FROM messages INDEXED BY sqlite_autoindex_messages_1
12091 WHERE conversation_id = ?1
12092 AND created_at IS NOT NULL
12093 AND created_at >= ?2
12094 AND created_at <= ?3",
12095 fparams![conversation_id, min_created_at, max_created_at],
12096 )?);
12097 record_message_lookup_bounded_queries(2, rows.len());
12098 (rows, false)
12099 } else {
12100 let rows = tx.query_params(
12101 "SELECT idx, role, author, created_at, content
12102 FROM messages INDEXED BY sqlite_autoindex_messages_1
12103 WHERE conversation_id = ?1",
12104 fparams![conversation_id],
12105 )?;
12106 record_message_lookup_full_scan_query(rows.len());
12107 (rows, true)
12108 };
12109
12110 let mut by_idx = HashMap::with_capacity(rows.len());
12111 let mut replay = HashSet::with_capacity(rows.len());
12112 for row in rows {
12113 let idx: i64 = row.get_typed(0)?;
12114 let role: String = row.get_typed(1)?;
12115 let author: Option<String> = row.get_typed(2)?;
12116 let created_at: Option<i64> = row.get_typed(3)?;
12117 let content: String = row.get_typed(4)?;
12118 let role = role_from_str(&role);
12119 let content_hash = *blake3::hash(content.as_bytes()).as_bytes();
12120
12121 if idx >= min_idx && idx <= max_idx {
12122 by_idx.insert(
12123 idx,
12124 MessageMergeFingerprint {
12125 idx,
12126 created_at,
12127 role: role.clone(),
12128 author: author.clone(),
12129 content_hash,
12130 },
12131 );
12132 }
12133
12134 let replay_matches = if replay_full_scan {
12135 true
12136 } else if let Some((min_created_at, max_created_at)) = created_bounds {
12137 created_at.is_some_and(|ts| ts >= min_created_at && ts <= max_created_at)
12138 } else {
12139 true
12140 };
12141 if replay_matches {
12142 replay.insert(MessageReplayFingerprint {
12143 created_at,
12144 role,
12145 author,
12146 content_hash,
12147 });
12148 }
12149 }
12150
12151 Ok(ExistingMessageLookup { by_idx, replay })
12152}
12153
12154fn franken_existing_message_lookup_with_pending(
12155 tx: &FrankenTransaction<'_>,
12156 conversation_id: i64,
12157 incoming_messages: &[Message],
12158 pending_message_fingerprints: &mut HashMap<i64, HashMap<i64, MessageMergeFingerprint>>,
12159 pending_message_replay_fingerprints: &mut HashMap<i64, HashSet<MessageReplayFingerprint>>,
12160) -> Result<ExistingMessageLookup> {
12161 if let (Some(by_idx), Some(replay)) = (
12162 pending_message_fingerprints.get(&conversation_id),
12163 pending_message_replay_fingerprints.get(&conversation_id),
12164 ) {
12165 if incoming_messages.iter().all(|msg| {
12166 by_idx.contains_key(&msg.idx) || replay.contains(&message_replay_fingerprint(msg))
12167 }) {
12168 return Ok(ExistingMessageLookup {
12169 by_idx: by_idx.clone(),
12170 replay: replay.clone(),
12171 });
12172 }
12173
12174 let fresh = franken_existing_message_lookup(tx, conversation_id, incoming_messages)?;
12175 let mut merged_by_idx = by_idx.clone();
12176 let mut merged_replay = replay.clone();
12177 merged_by_idx.extend(fresh.by_idx);
12178 merged_replay.extend(fresh.replay);
12179 pending_message_fingerprints.insert(conversation_id, merged_by_idx.clone());
12180 pending_message_replay_fingerprints.insert(conversation_id, merged_replay.clone());
12181 return Ok(ExistingMessageLookup {
12182 by_idx: merged_by_idx,
12183 replay: merged_replay,
12184 });
12185 }
12186
12187 let lookup = franken_existing_message_lookup(tx, conversation_id, incoming_messages)?;
12188 pending_message_fingerprints.insert(conversation_id, lookup.by_idx.clone());
12189 pending_message_replay_fingerprints.insert(conversation_id, lookup.replay.clone());
12190 Ok(lookup)
12191}
12192
12193fn franken_batch_insert_fts(tx: &FrankenTransaction<'_>, entries: &[FtsEntry]) -> Result<usize> {
12195 if entries.is_empty() {
12196 return Ok(0);
12197 }
12198
12199 let mut inserted = 0;
12200
12201 for chunk in entries.chunks(FTS5_BATCH_SIZE) {
12202 let placeholders: String = chunk
12203 .iter()
12204 .enumerate()
12205 .map(|(i, _)| {
12206 let base = i * 7 + 1; format!(
12208 "(?{},?{},?{},?{},?{},?{},?{})",
12209 base,
12210 base + 1,
12211 base + 2,
12212 base + 3,
12213 base + 4,
12214 base + 5,
12215 base + 6
12216 )
12217 })
12218 .collect::<Vec<_>>()
12219 .join(",");
12220
12221 let sql = format!(
12222 "INSERT INTO fts_messages(rowid, content, title, agent, workspace, source_path, created_at) VALUES {placeholders}"
12223 );
12224
12225 let mut param_values: Vec<SqliteValue> = Vec::with_capacity(chunk.len() * 7);
12226 for entry in chunk {
12227 param_values.push(SqliteValue::from(entry.message_id));
12228 param_values.push(SqliteValue::from(entry.content.as_str()));
12229 param_values.push(SqliteValue::from(entry.title.as_str()));
12230 param_values.push(SqliteValue::from(entry.agent.as_str()));
12231 param_values.push(SqliteValue::from(entry.workspace.as_str()));
12232 param_values.push(SqliteValue::from(entry.source_path.as_str()));
12233 param_values.push(SqliteValue::from(entry.created_at));
12234 }
12235
12236 match tx.execute_with_params(&sql, ¶m_values) {
12237 Ok(_) => {
12238 inserted += chunk.len();
12239 }
12240 Err(err) => {
12241 tracing::warn!(
12242 error = %err,
12243 chunk_docs = chunk.len(),
12244 "frankensqlite FTS batch insert failed; skipping db-resident FTS maintenance because Tantivy is authoritative"
12245 );
12246 return Ok(inserted);
12247 }
12248 }
12249 }
12250
12251 Ok(inserted)
12252}
12253
12254fn franken_batch_insert_fts_on_connection(
12255 conn: &FrankenConnection,
12256 entries: &[FtsEntry],
12257) -> Result<usize> {
12258 if entries.is_empty() {
12259 return Ok(0);
12260 }
12261
12262 let mut inserted = 0;
12263
12264 for chunk in entries.chunks(FTS5_BATCH_SIZE) {
12265 let placeholders: String = chunk
12266 .iter()
12267 .enumerate()
12268 .map(|(i, _)| {
12269 let base = i * 7 + 1;
12270 format!(
12271 "(?{},?{},?{},?{},?{},?{},?{})",
12272 base,
12273 base + 1,
12274 base + 2,
12275 base + 3,
12276 base + 4,
12277 base + 5,
12278 base + 6
12279 )
12280 })
12281 .collect::<Vec<_>>()
12282 .join(",");
12283
12284 let sql = format!(
12285 "INSERT INTO fts_messages(rowid, content, title, agent, workspace, source_path, created_at) VALUES {placeholders}"
12286 );
12287
12288 let mut param_values: Vec<SqliteValue> = Vec::with_capacity(chunk.len() * 7);
12289 for entry in chunk {
12290 param_values.push(SqliteValue::from(entry.message_id));
12291 param_values.push(SqliteValue::from(entry.content.as_str()));
12292 param_values.push(SqliteValue::from(entry.title.as_str()));
12293 param_values.push(SqliteValue::from(entry.agent.as_str()));
12294 param_values.push(SqliteValue::from(entry.workspace.as_str()));
12295 param_values.push(SqliteValue::from(entry.source_path.as_str()));
12296 param_values.push(SqliteValue::from(entry.created_at));
12297 }
12298
12299 conn.execute_with_params(&sql, ¶m_values)
12300 .with_context(|| {
12301 format!(
12302 "inserting {} rows into fts_messages during streaming FTS maintenance",
12303 chunk.len()
12304 )
12305 })?;
12306 inserted += chunk.len();
12307 }
12308
12309 Ok(inserted)
12310}
12311
12312fn franken_update_daily_stats_in_tx(
12314 storage: &FrankenStorage,
12315 tx: &FrankenTransaction<'_>,
12316 agent_slug: &str,
12317 source_id: &str,
12318 started_at: Option<i64>,
12319 delta: StatsDelta,
12320) -> Result<()> {
12321 let day_id = started_at
12322 .map(FrankenStorage::day_id_from_millis)
12323 .unwrap_or(0);
12324 let now = FrankenStorage::now_millis();
12325
12326 let targets = [
12327 DailyStatsTarget {
12328 day_id,
12329 agent_slug,
12330 source_id,
12331 },
12332 DailyStatsTarget {
12333 day_id,
12334 agent_slug: "all",
12335 source_id,
12336 },
12337 DailyStatsTarget {
12338 day_id,
12339 agent_slug,
12340 source_id: "all",
12341 },
12342 DailyStatsTarget {
12343 day_id,
12344 agent_slug: "all",
12345 source_id: "all",
12346 },
12347 ];
12348
12349 if agent_slug != "all"
12350 && source_id != "all"
12351 && franken_update_ensured_daily_stats_targets_in_tx(storage, tx, &targets, now, delta)?
12352 {
12353 return Ok(());
12354 }
12355
12356 for target in targets {
12357 franken_apply_daily_stats_delta_in_tx(storage, tx, target, now, delta)?;
12358 }
12359
12360 Ok(())
12361}
12362
12363#[derive(Clone, Copy)]
12364struct DailyStatsTarget<'a> {
12365 day_id: i64,
12366 agent_slug: &'a str,
12367 source_id: &'a str,
12368}
12369
12370fn franken_update_ensured_daily_stats_targets_in_tx(
12371 storage: &FrankenStorage,
12372 tx: &FrankenTransaction<'_>,
12373 targets: &[DailyStatsTarget<'_>; 4],
12374 now: i64,
12375 delta: StatsDelta,
12376) -> Result<bool> {
12377 let cache_keys = targets.map(|target| {
12378 EnsuredDailyStatsKey::new(target.day_id, target.agent_slug, target.source_id)
12379 });
12380 if !storage.daily_stats_keys_already_ensured(&cache_keys) {
12381 return Ok(false);
12382 }
12383
12384 let primary = targets[0];
12385 let rows_changed = tx.execute_compat(
12386 "UPDATE daily_stats
12387 SET session_count = session_count + ?4,
12388 message_count = message_count + ?5,
12389 total_chars = total_chars + ?6,
12390 last_updated = ?7
12391 WHERE day_id = ?1
12392 AND ((agent_slug = ?2 AND source_id = ?3)
12393 OR (agent_slug = 'all' AND source_id = ?3)
12394 OR (agent_slug = ?2 AND source_id = 'all')
12395 OR (agent_slug = 'all' AND source_id = 'all'))",
12396 fparams![
12397 primary.day_id,
12398 primary.agent_slug,
12399 primary.source_id,
12400 delta.session_count_delta,
12401 delta.message_count_delta,
12402 delta.total_chars_delta,
12403 now
12404 ],
12405 )?;
12406 if rows_changed == targets.len() {
12407 return Ok(true);
12408 }
12409
12410 for (target, cache_key) in targets.iter().copied().zip(cache_keys) {
12411 let exists = tx
12412 .query_row_map(
12413 "SELECT 1 FROM daily_stats
12414 WHERE day_id = ?1 AND agent_slug = ?2 AND source_id = ?3
12415 LIMIT 1",
12416 fparams![target.day_id, target.agent_slug, target.source_id],
12417 |row| row.get_typed::<i64>(0),
12418 )
12419 .optional()?
12420 .is_some();
12421 if exists {
12422 continue;
12423 }
12424
12425 tx.execute_compat(
12426 "INSERT INTO daily_stats(day_id, agent_slug, source_id, session_count, message_count, total_chars, last_updated)
12427 VALUES(?1,?2,?3,?4,?5,?6,?7)",
12428 fparams![
12429 target.day_id,
12430 target.agent_slug,
12431 target.source_id,
12432 delta.session_count_delta,
12433 delta.message_count_delta,
12434 delta.total_chars_delta,
12435 now
12436 ],
12437 )?;
12438 storage.mark_daily_stats_key_ensured(cache_key);
12439 }
12440
12441 Ok(true)
12442}
12443
12444fn franken_apply_daily_stats_delta_in_tx(
12445 storage: &FrankenStorage,
12446 tx: &FrankenTransaction<'_>,
12447 target: DailyStatsTarget<'_>,
12448 now: i64,
12449 delta: StatsDelta,
12450) -> Result<()> {
12451 let cache_key = EnsuredDailyStatsKey::new(target.day_id, target.agent_slug, target.source_id);
12452 if storage.daily_stats_key_already_ensured(&cache_key) {
12453 let rows_changed = tx.execute_compat(
12454 "UPDATE daily_stats
12455 SET session_count = session_count + ?4,
12456 message_count = message_count + ?5,
12457 total_chars = total_chars + ?6,
12458 last_updated = ?7
12459 WHERE day_id = ?1 AND agent_slug = ?2 AND source_id = ?3",
12460 fparams![
12461 target.day_id,
12462 target.agent_slug,
12463 target.source_id,
12464 delta.session_count_delta,
12465 delta.message_count_delta,
12466 delta.total_chars_delta,
12467 now
12468 ],
12469 )?;
12470 if rows_changed > 0 {
12471 return Ok(());
12472 }
12473 }
12474
12475 tx.execute_compat(
12476 "INSERT INTO daily_stats(day_id, agent_slug, source_id, session_count, message_count, total_chars, last_updated)
12477 VALUES(?1,?2,?3,?4,?5,?6,?7)
12478 ON CONFLICT(day_id, agent_slug, source_id) DO UPDATE SET
12479 session_count = session_count + excluded.session_count,
12480 message_count = message_count + excluded.message_count,
12481 total_chars = total_chars + excluded.total_chars,
12482 last_updated = excluded.last_updated",
12483 fparams![
12484 target.day_id,
12485 target.agent_slug,
12486 target.source_id,
12487 delta.session_count_delta,
12488 delta.message_count_delta,
12489 delta.total_chars_delta,
12490 now
12491 ],
12492 )?;
12493 storage.mark_daily_stats_key_ensured(cache_key);
12494 Ok(())
12495}
12496
12497fn franken_update_daily_stats_batched_in_tx(
12503 tx: &FrankenTransaction<'_>,
12504 entries: &[(i64, String, String, StatsDelta)],
12505) -> Result<usize> {
12506 if entries.is_empty() {
12507 return Ok(0);
12508 }
12509
12510 let now = FrankenStorage::now_millis();
12511 let mut total_affected = 0;
12512
12513 for (day_id, agent, source, delta) in entries {
12518 total_affected += tx.execute_compat(
12519 "INSERT INTO daily_stats (day_id, agent_slug, source_id, session_count, message_count, total_chars, last_updated)
12520 VALUES(?1,?2,?3,?4,?5,?6,?7)
12521 ON CONFLICT(day_id, agent_slug, source_id) DO UPDATE SET
12522 session_count = session_count + excluded.session_count,
12523 message_count = message_count + excluded.message_count,
12524 total_chars = total_chars + excluded.total_chars,
12525 last_updated = excluded.last_updated",
12526 fparams![
12527 *day_id,
12528 agent.as_str(),
12529 source.as_str(),
12530 delta.session_count_delta,
12531 delta.message_count_delta,
12532 delta.total_chars_delta,
12533 now
12534 ],
12535 )?;
12536 }
12537
12538 Ok(total_affected)
12539}
12540
12541fn franken_insert_token_usage_batched_in_tx(
12547 tx: &FrankenTransaction<'_>,
12548 entries: &[TokenUsageEntry],
12549) -> Result<usize> {
12550 if entries.is_empty() {
12551 return Ok(0);
12552 }
12553
12554 let mut total_inserted = 0;
12555
12556 for e in entries {
12557 let params_vec: Vec<ParamValue> = vec![
12558 ParamValue::from(e.message_id),
12559 ParamValue::from(e.conversation_id),
12560 ParamValue::from(e.agent_id),
12561 ParamValue::from(e.workspace_id),
12562 ParamValue::from(e.source_id.clone()),
12563 ParamValue::from(e.timestamp_ms),
12564 ParamValue::from(e.day_id),
12565 ParamValue::from(e.model_name.clone()),
12566 ParamValue::from(e.model_family.clone()),
12567 ParamValue::from(e.model_tier.clone()),
12568 ParamValue::from(e.service_tier.clone()),
12569 ParamValue::from(e.provider.clone()),
12570 ParamValue::from(e.input_tokens),
12571 ParamValue::from(e.output_tokens),
12572 ParamValue::from(e.cache_read_tokens),
12573 ParamValue::from(e.cache_creation_tokens),
12574 ParamValue::from(e.thinking_tokens),
12575 ParamValue::from(e.total_tokens),
12576 ParamValue::from(e.estimated_cost_usd),
12577 ParamValue::from(e.role.clone()),
12578 ParamValue::from(e.content_chars),
12579 ParamValue::from(e.has_tool_calls as i64),
12580 ParamValue::from(e.tool_call_count as i64),
12581 ParamValue::from(e.data_source.clone()),
12582 ];
12583
12584 let values = param_slice_to_values(¶ms_vec);
12585 total_inserted += tx.execute_with_params(
12586 "INSERT OR IGNORE INTO token_usage (
12587 message_id, conversation_id, agent_id, workspace_id, source_id,
12588 timestamp_ms, day_id,
12589 model_name, model_family, model_tier, service_tier, provider,
12590 input_tokens, output_tokens, cache_read_tokens, cache_creation_tokens,
12591 thinking_tokens, total_tokens, estimated_cost_usd,
12592 role, content_chars, has_tool_calls, tool_call_count, data_source
12593 )
12594 VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19,?20,?21,?22,?23,?24)",
12595 &values,
12596 )?;
12597 }
12598
12599 Ok(total_inserted)
12600}
12601
12602fn franken_update_token_daily_stats_batched_in_tx(
12604 tx: &FrankenTransaction<'_>,
12605 entries: &[(i64, String, String, String, TokenStatsDelta)],
12606) -> Result<usize> {
12607 if entries.is_empty() {
12608 return Ok(0);
12609 }
12610
12611 let now = FrankenStorage::now_millis();
12612 let mut total_affected = 0;
12613
12614 for (day_id, agent, source, model, delta) in entries {
12615 total_affected += tx.execute_compat(
12616 "INSERT INTO token_daily_stats (
12617 day_id, agent_slug, source_id, model_family,
12618 api_call_count, user_message_count, assistant_message_count, tool_message_count,
12619 total_input_tokens, total_output_tokens, total_cache_read_tokens,
12620 total_cache_creation_tokens, total_thinking_tokens, grand_total_tokens,
12621 total_content_chars, total_tool_calls, estimated_cost_usd, session_count,
12622 last_updated
12623 )
12624 VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19)
12625 ON CONFLICT(day_id, agent_slug, source_id, model_family) DO UPDATE SET
12626 api_call_count = api_call_count + excluded.api_call_count,
12627 user_message_count = user_message_count + excluded.user_message_count,
12628 assistant_message_count = assistant_message_count + excluded.assistant_message_count,
12629 tool_message_count = tool_message_count + excluded.tool_message_count,
12630 total_input_tokens = total_input_tokens + excluded.total_input_tokens,
12631 total_output_tokens = total_output_tokens + excluded.total_output_tokens,
12632 total_cache_read_tokens = total_cache_read_tokens + excluded.total_cache_read_tokens,
12633 total_cache_creation_tokens = total_cache_creation_tokens + excluded.total_cache_creation_tokens,
12634 total_thinking_tokens = total_thinking_tokens + excluded.total_thinking_tokens,
12635 grand_total_tokens = grand_total_tokens + excluded.grand_total_tokens,
12636 total_content_chars = total_content_chars + excluded.total_content_chars,
12637 total_tool_calls = total_tool_calls + excluded.total_tool_calls,
12638 estimated_cost_usd = estimated_cost_usd + excluded.estimated_cost_usd,
12639 session_count = session_count + excluded.session_count,
12640 last_updated = excluded.last_updated",
12641 fparams![
12642 *day_id,
12643 agent.as_str(),
12644 source.as_str(),
12645 model.as_str(),
12646 delta.api_call_count,
12647 delta.user_message_count,
12648 delta.assistant_message_count,
12649 delta.tool_message_count,
12650 delta.total_input_tokens,
12651 delta.total_output_tokens,
12652 delta.total_cache_read_tokens,
12653 delta.total_cache_creation_tokens,
12654 delta.total_thinking_tokens,
12655 delta.grand_total_tokens,
12656 delta.total_content_chars,
12657 delta.total_tool_calls,
12658 delta.estimated_cost_usd,
12659 delta.session_count,
12660 now
12661 ],
12662 )?;
12663 }
12664
12665 Ok(total_affected)
12666}
12667
12668fn franken_insert_message_metrics_batched_in_tx(
12674 tx: &FrankenTransaction<'_>,
12675 entries: &[MessageMetricsEntry],
12676) -> Result<usize> {
12677 if entries.is_empty() {
12678 return Ok(0);
12679 }
12680
12681 let mut total_inserted = 0;
12682
12683 for e in entries {
12684 let params_vec: Vec<ParamValue> = vec![
12685 ParamValue::from(e.message_id),
12686 ParamValue::from(e.created_at_ms),
12687 ParamValue::from(e.hour_id),
12688 ParamValue::from(e.day_id),
12689 ParamValue::from(e.agent_slug.clone()),
12690 ParamValue::from(e.workspace_id),
12691 ParamValue::from(e.source_id.clone()),
12692 ParamValue::from(e.role.clone()),
12693 ParamValue::from(e.content_chars),
12694 ParamValue::from(e.content_tokens_est),
12695 ParamValue::from(e.model_name.clone()),
12696 ParamValue::from(e.model_family.clone()),
12697 ParamValue::from(e.model_tier.clone()),
12698 ParamValue::from(e.provider.clone()),
12699 ParamValue::from(e.api_input_tokens),
12700 ParamValue::from(e.api_output_tokens),
12701 ParamValue::from(e.api_cache_read_tokens),
12702 ParamValue::from(e.api_cache_creation_tokens),
12703 ParamValue::from(e.api_thinking_tokens),
12704 ParamValue::from(e.api_service_tier.clone()),
12705 ParamValue::from(e.api_data_source.clone()),
12706 ParamValue::from(e.tool_call_count),
12707 ParamValue::from(e.has_tool_calls as i64),
12708 ParamValue::from(e.has_plan as i64),
12709 ];
12710
12711 let values = param_slice_to_values(¶ms_vec);
12712 total_inserted += tx.execute_with_params(
12713 "INSERT OR IGNORE INTO message_metrics (
12714 message_id, created_at_ms, hour_id, day_id,
12715 agent_slug, workspace_id, source_id, role,
12716 content_chars, content_tokens_est,
12717 model_name, model_family, model_tier, provider,
12718 api_input_tokens, api_output_tokens, api_cache_read_tokens,
12719 api_cache_creation_tokens, api_thinking_tokens,
12720 api_service_tier, api_data_source,
12721 tool_call_count, has_tool_calls, has_plan
12722 )
12723 VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19,?20,?21,?22,?23,?24)",
12724 &values,
12725 )?;
12726 }
12727
12728 Ok(total_inserted)
12729}
12730
12731fn franken_flush_rollup_table(
12733 tx: &FrankenTransaction<'_>,
12734 table: &str,
12735 bucket_col: &str,
12736 deltas: &HashMap<(i64, String, i64, String), UsageRollupDelta>,
12737 now: i64,
12738) -> Result<usize> {
12739 if deltas.is_empty() {
12740 return Ok(0);
12741 }
12742
12743 let mut total_affected = 0;
12744
12745 for ((bucket_id, agent, workspace_id, source), d) in deltas {
12746 let sql = format!(
12747 "INSERT INTO {table} (
12748 {bucket_col}, agent_slug, workspace_id, source_id,
12749 message_count, user_message_count, assistant_message_count,
12750 tool_call_count, plan_message_count, plan_content_tokens_est_total,
12751 plan_api_tokens_total, api_coverage_message_count,
12752 content_tokens_est_total, content_tokens_est_user, content_tokens_est_assistant,
12753 api_tokens_total, api_input_tokens_total, api_output_tokens_total,
12754 api_cache_read_tokens_total, api_cache_creation_tokens_total,
12755 api_thinking_tokens_total, last_updated
12756 )
12757 VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19,?20,?21,?22)
12758 ON CONFLICT({bucket_col}, agent_slug, workspace_id, source_id) DO UPDATE SET
12759 message_count = message_count + excluded.message_count,
12760 user_message_count = user_message_count + excluded.user_message_count,
12761 assistant_message_count = assistant_message_count + excluded.assistant_message_count,
12762 tool_call_count = tool_call_count + excluded.tool_call_count,
12763 plan_message_count = plan_message_count + excluded.plan_message_count,
12764 plan_content_tokens_est_total = plan_content_tokens_est_total + excluded.plan_content_tokens_est_total,
12765 plan_api_tokens_total = plan_api_tokens_total + excluded.plan_api_tokens_total,
12766 api_coverage_message_count = api_coverage_message_count + excluded.api_coverage_message_count,
12767 content_tokens_est_total = content_tokens_est_total + excluded.content_tokens_est_total,
12768 content_tokens_est_user = content_tokens_est_user + excluded.content_tokens_est_user,
12769 content_tokens_est_assistant = content_tokens_est_assistant + excluded.content_tokens_est_assistant,
12770 api_tokens_total = api_tokens_total + excluded.api_tokens_total,
12771 api_input_tokens_total = api_input_tokens_total + excluded.api_input_tokens_total,
12772 api_output_tokens_total = api_output_tokens_total + excluded.api_output_tokens_total,
12773 api_cache_read_tokens_total = api_cache_read_tokens_total + excluded.api_cache_read_tokens_total,
12774 api_cache_creation_tokens_total = api_cache_creation_tokens_total + excluded.api_cache_creation_tokens_total,
12775 api_thinking_tokens_total = api_thinking_tokens_total + excluded.api_thinking_tokens_total,
12776 last_updated = excluded.last_updated"
12777 );
12778
12779 total_affected += tx.execute_compat(
12780 &sql,
12781 fparams![
12782 *bucket_id,
12783 agent.as_str(),
12784 *workspace_id,
12785 source.as_str(),
12786 d.message_count,
12787 d.user_message_count,
12788 d.assistant_message_count,
12789 d.tool_call_count,
12790 d.plan_message_count,
12791 d.plan_content_tokens_est_total,
12792 d.plan_api_tokens_total,
12793 d.api_coverage_message_count,
12794 d.content_tokens_est_total,
12795 d.content_tokens_est_user,
12796 d.content_tokens_est_assistant,
12797 d.api_tokens_total,
12798 d.api_input_tokens_total,
12799 d.api_output_tokens_total,
12800 d.api_cache_read_tokens_total,
12801 d.api_cache_creation_tokens_total,
12802 d.api_thinking_tokens_total,
12803 now
12804 ],
12805 )?;
12806 }
12807
12808 Ok(total_affected)
12809}
12810
12811fn franken_flush_model_daily_rollup_table(
12813 tx: &FrankenTransaction<'_>,
12814 deltas: &HashMap<(i64, String, i64, String, String, String), UsageRollupDelta>,
12815 now: i64,
12816) -> Result<usize> {
12817 if deltas.is_empty() {
12818 return Ok(0);
12819 }
12820
12821 let mut total_affected = 0;
12822
12823 for ((day_id, agent, workspace_id, source, model_family, model_tier), d) in deltas {
12824 total_affected += tx.execute_compat(
12825 "INSERT INTO usage_models_daily (
12826 day_id, agent_slug, workspace_id, source_id, model_family, model_tier,
12827 message_count, user_message_count, assistant_message_count,
12828 tool_call_count, plan_message_count, api_coverage_message_count,
12829 content_tokens_est_total, content_tokens_est_user, content_tokens_est_assistant,
12830 api_tokens_total, api_input_tokens_total, api_output_tokens_total,
12831 api_cache_read_tokens_total, api_cache_creation_tokens_total,
12832 api_thinking_tokens_total, last_updated
12833 )
12834 VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19,?20,?21,?22)
12835 ON CONFLICT(day_id, agent_slug, workspace_id, source_id, model_family, model_tier) DO UPDATE SET
12836 message_count = message_count + excluded.message_count,
12837 user_message_count = user_message_count + excluded.user_message_count,
12838 assistant_message_count = assistant_message_count + excluded.assistant_message_count,
12839 tool_call_count = tool_call_count + excluded.tool_call_count,
12840 plan_message_count = plan_message_count + excluded.plan_message_count,
12841 api_coverage_message_count = api_coverage_message_count + excluded.api_coverage_message_count,
12842 content_tokens_est_total = content_tokens_est_total + excluded.content_tokens_est_total,
12843 content_tokens_est_user = content_tokens_est_user + excluded.content_tokens_est_user,
12844 content_tokens_est_assistant = content_tokens_est_assistant + excluded.content_tokens_est_assistant,
12845 api_tokens_total = api_tokens_total + excluded.api_tokens_total,
12846 api_input_tokens_total = api_input_tokens_total + excluded.api_input_tokens_total,
12847 api_output_tokens_total = api_output_tokens_total + excluded.api_output_tokens_total,
12848 api_cache_read_tokens_total = api_cache_read_tokens_total + excluded.api_cache_read_tokens_total,
12849 api_cache_creation_tokens_total = api_cache_creation_tokens_total + excluded.api_cache_creation_tokens_total,
12850 api_thinking_tokens_total = api_thinking_tokens_total + excluded.api_thinking_tokens_total,
12851 last_updated = excluded.last_updated",
12852 fparams![
12853 *day_id,
12854 agent.as_str(),
12855 *workspace_id,
12856 source.as_str(),
12857 model_family.as_str(),
12858 model_tier.as_str(),
12859 d.message_count,
12860 d.user_message_count,
12861 d.assistant_message_count,
12862 d.tool_call_count,
12863 d.plan_message_count,
12864 d.api_coverage_message_count,
12865 d.content_tokens_est_total,
12866 d.content_tokens_est_user,
12867 d.content_tokens_est_assistant,
12868 d.api_tokens_total,
12869 d.api_input_tokens_total,
12870 d.api_output_tokens_total,
12871 d.api_cache_read_tokens_total,
12872 d.api_cache_creation_tokens_total,
12873 d.api_thinking_tokens_total,
12874 now
12875 ],
12876 )?;
12877 }
12878
12879 Ok(total_affected)
12880}
12881
12882fn franken_flush_analytics_rollups_in_tx(
12884 tx: &FrankenTransaction<'_>,
12885 agg: &AnalyticsRollupAggregator,
12886) -> Result<(usize, usize, usize)> {
12887 let now = FrankenStorage::now_millis();
12888
12889 let hourly_affected =
12890 franken_flush_rollup_table(tx, "usage_hourly", "hour_id", &agg.hourly, now)?;
12891 let daily_affected = franken_flush_rollup_table(tx, "usage_daily", "day_id", &agg.daily, now)?;
12892 let models_daily_affected = franken_flush_model_daily_rollup_table(tx, &agg.models_daily, now)?;
12893
12894 Ok((hourly_affected, daily_affected, models_daily_affected))
12895}
12896
12897fn franken_update_conversation_token_summaries_in_tx(
12899 tx: &FrankenTransaction<'_>,
12900 conversation_id: i64,
12901) -> Result<()> {
12902 tx.execute_compat(
12903 "UPDATE conversations SET
12904 total_input_tokens = (SELECT SUM(input_tokens) FROM token_usage WHERE conversation_id = ?1),
12905 total_output_tokens = (SELECT SUM(output_tokens) FROM token_usage WHERE conversation_id = ?1),
12906 total_cache_read_tokens = (SELECT SUM(cache_read_tokens) FROM token_usage WHERE conversation_id = ?1),
12907 total_cache_creation_tokens = (SELECT SUM(cache_creation_tokens) FROM token_usage WHERE conversation_id = ?1),
12908 grand_total_tokens = (SELECT SUM(total_tokens) FROM token_usage WHERE conversation_id = ?1),
12909 estimated_cost_usd = (SELECT SUM(estimated_cost_usd) FROM token_usage WHERE conversation_id = ?1),
12910 primary_model = (SELECT model_name FROM token_usage WHERE conversation_id = ?1
12911 AND model_name IS NOT NULL
12912 GROUP BY model_name ORDER BY COUNT(*) DESC LIMIT 1),
12913 api_call_count = (SELECT COUNT(*) FROM token_usage WHERE conversation_id = ?1
12914 AND data_source = 'api'),
12915 tool_call_count = (SELECT SUM(tool_call_count) FROM token_usage WHERE conversation_id = ?1),
12916 user_message_count = (SELECT COUNT(*) FROM token_usage WHERE conversation_id = ?1
12917 AND role = 'user'),
12918 assistant_message_count = (SELECT COUNT(*) FROM token_usage WHERE conversation_id = ?1
12919 AND role IN ('assistant', 'agent'))
12920 WHERE id = ?1",
12921 fparams![conversation_id],
12922 )?;
12923 Ok(())
12924}
12925
12926impl FrankenStorage {
12927 pub fn rebuild_token_daily_stats(&self) -> Result<usize> {
12929 const CONVERSATION_BATCH_SIZE: usize = 1_000;
12930 const TOKEN_USAGE_BATCH_SIZE: usize = 10_000;
12931
12932 let total_usage_rows: i64 =
12933 self.conn
12934 .query_row_map("SELECT COUNT(*) FROM token_usage", fparams![], |row| {
12935 row.get_typed(0)
12936 })?;
12937 tracing::info!(
12938 target: "cass::analytics",
12939 total_usage_rows,
12940 "token_daily_stats_rebuild_start"
12941 );
12942
12943 let mut tx = self.conn.transaction()?;
12944 tx.execute("DELETE FROM token_daily_stats")?;
12945
12946 let mut last_conversation_id = 0_i64;
12947 let mut rows_created = 0_usize;
12948
12949 loop {
12950 let conversation_rows = tx.query_map_collect(
12951 "SELECT c.id, c.started_at, c.source_id,
12952 COALESCE((SELECT a.slug FROM agents a WHERE a.id = c.agent_id), 'unknown')
12953 FROM conversations c
12954 WHERE c.id > ?1
12955 ORDER BY c.id
12956 LIMIT ?2",
12957 fparams![last_conversation_id, CONVERSATION_BATCH_SIZE as i64],
12958 |row| {
12959 Ok((
12960 row.get_typed::<i64>(0)?,
12961 row.get_typed::<Option<i64>>(1)?,
12962 row.get_typed::<String>(2)?,
12963 row.get_typed::<String>(3)?,
12964 ))
12965 },
12966 )?;
12967 if conversation_rows.is_empty() {
12968 break;
12969 }
12970
12971 let mut aggregate = TokenStatsAggregator::new();
12972
12973 for (conversation_id, started_at, source_id, agent_slug) in conversation_rows {
12974 last_conversation_id = conversation_id;
12975 let conversation_day_id = started_at.map(Self::day_id_from_millis).unwrap_or(0);
12976 let mut last_token_usage_id = 0_i64;
12977 let mut session_model_family = String::from("unknown");
12978
12979 loop {
12980 let usage_rows = tx.query_map_collect(
12981 "SELECT id, day_id, role,
12982 COALESCE(model_family, 'unknown'),
12983 input_tokens, output_tokens, cache_read_tokens,
12984 cache_creation_tokens, thinking_tokens,
12985 has_tool_calls, tool_call_count,
12986 content_chars, estimated_cost_usd
12987 FROM token_usage
12988 WHERE conversation_id = ?1
12989 AND id > ?2
12990 ORDER BY id
12991 LIMIT ?3",
12992 fparams![
12993 conversation_id,
12994 last_token_usage_id,
12995 TOKEN_USAGE_BATCH_SIZE as i64
12996 ],
12997 |row| {
12998 Ok((
12999 row.get_typed::<i64>(0)?,
13000 row.get_typed::<i64>(1)?,
13001 row.get_typed::<String>(2)?,
13002 row.get_typed::<String>(3)?,
13003 row.get_typed::<Option<i64>>(4)?,
13004 row.get_typed::<Option<i64>>(5)?,
13005 row.get_typed::<Option<i64>>(6)?,
13006 row.get_typed::<Option<i64>>(7)?,
13007 row.get_typed::<Option<i64>>(8)?,
13008 row.get_typed::<i64>(9)?,
13009 row.get_typed::<i64>(10)?,
13010 row.get_typed::<i64>(11)?,
13011 row.get_typed::<Option<f64>>(12)?,
13012 ))
13013 },
13014 )?;
13015 if usage_rows.is_empty() {
13016 break;
13017 }
13018
13019 for (
13020 token_usage_id,
13021 day_id,
13022 role,
13023 model_family,
13024 input_tokens,
13025 output_tokens,
13026 cache_read_tokens,
13027 cache_creation_tokens,
13028 thinking_tokens,
13029 has_tool_calls,
13030 tool_call_count,
13031 content_chars,
13032 estimated_cost_usd,
13033 ) in usage_rows
13034 {
13035 last_token_usage_id = token_usage_id;
13036 if model_family != "unknown" {
13037 session_model_family = model_family.clone();
13038 }
13039 let usage = crate::connectors::ExtractedTokenUsage {
13040 model_name: None,
13041 provider: None,
13042 input_tokens,
13043 output_tokens,
13044 cache_read_tokens,
13045 cache_creation_tokens,
13046 thinking_tokens,
13047 service_tier: None,
13048 has_tool_calls: has_tool_calls != 0,
13049 tool_call_count: u32::try_from(tool_call_count.max(0)).unwrap_or(0),
13050 data_source: franken_agent_detection::TokenDataSource::Api,
13051 };
13052 aggregate.record(
13053 &agent_slug,
13054 &source_id,
13055 day_id,
13056 &model_family,
13057 &role,
13058 &usage,
13059 content_chars,
13060 estimated_cost_usd.unwrap_or(0.0),
13061 );
13062 }
13063 }
13064
13065 aggregate.record_session(
13066 &agent_slug,
13067 &source_id,
13068 conversation_day_id,
13069 &session_model_family,
13070 );
13071 }
13072
13073 let entries = aggregate.expand();
13074 rows_created = rows_created.saturating_add(entries.len());
13075 franken_update_token_daily_stats_batched_in_tx(&tx, &entries)?;
13076 }
13077
13078 tx.commit()?;
13079
13080 tracing::info!(
13081 target: "cass::analytics",
13082 rows_created,
13083 "token_daily_stats_rebuild_complete"
13084 );
13085
13086 Ok(rows_created)
13087 }
13088
13089 pub fn rebuild_analytics(&self) -> Result<AnalyticsRebuildResult> {
13092 let start = Instant::now();
13093
13094 let total_messages: i64 =
13095 self.conn
13096 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
13097 row.get_typed(0)
13098 })?;
13099 tracing::info!(
13100 target: "cass::analytics",
13101 total_messages,
13102 "analytics_rebuild_start"
13103 );
13104
13105 let mut tx = self.conn.transaction()?;
13106
13107 tx.execute("DELETE FROM message_metrics")?;
13108 tx.execute("DELETE FROM usage_hourly")?;
13109 tx.execute("DELETE FROM usage_daily")?;
13110 tx.execute("DELETE FROM usage_models_daily")?;
13111
13112 const CHUNK_SIZE: i64 = 10_000;
13113 let mut offset: i64 = 0;
13114 let mut total_inserted: usize = 0;
13115 let mut usage_hourly_rows: usize = 0;
13116 let mut usage_daily_rows: usize = 0;
13117 let mut usage_models_daily_rows: usize = 0;
13118
13119 loop {
13120 #[allow(clippy::type_complexity)]
13121 let rows: Vec<(
13122 i64,
13123 String,
13124 String,
13125 Option<serde_json::Value>,
13126 Option<i64>,
13127 Option<i64>,
13128 String,
13129 Option<i64>,
13130 String,
13131 )> = tx.query_map_collect(
13132 "SELECT m.id, m.idx, m.role, m.content, m.extra_json, m.extra_bin,
13138 m.created_at,
13139 c.id AS conv_id, c.started_at AS conv_started_at,
13140 c.source_id, c.workspace_id,
13141 COALESCE((SELECT a.slug FROM agents a WHERE a.id = c.agent_id), 'unknown') AS agent_slug
13142 FROM messages m
13143 JOIN conversations c ON m.conversation_id = c.id
13144 ORDER BY m.id
13145 LIMIT ?1 OFFSET ?2",
13146 fparams![CHUNK_SIZE, offset],
13147 |row| {
13148 let msg_id: i64 = row.get_typed(0)?;
13149 let role: String = row.get_typed(2)?;
13150 let content: String = row.get_typed(3)?;
13151 let extra_json = row
13152 .get_typed::<Option<String>>(4)?
13153 .and_then(|s| serde_json::from_str(&s).ok())
13154 .or_else(|| {
13155 row.get_typed::<Option<Vec<u8>>>(5)
13156 .ok()
13157 .flatten()
13158 .and_then(|b| rmp_serde::from_slice(&b).ok())
13159 });
13160 let msg_ts: Option<i64> = row.get_typed(6)?;
13161 let conv_started_at: Option<i64> = row.get_typed(8)?;
13162 let source_id: String = row.get_typed(9)?;
13163 let workspace_id: Option<i64> = row.get_typed(10)?;
13164 let agent_slug: String = row.get_typed(11)?;
13165 let effective_ts = msg_ts.or(conv_started_at).unwrap_or(0);
13166
13167 Ok((
13168 msg_id,
13169 role,
13170 content,
13171 extra_json,
13172 Some(effective_ts),
13173 workspace_id,
13174 source_id,
13175 conv_started_at,
13176 agent_slug,
13177 ))
13178 },
13179 )?;
13180
13181 if rows.is_empty() {
13182 break;
13183 }
13184
13185 let chunk_len = rows.len();
13186 let mut entries = Vec::with_capacity(chunk_len);
13187 let mut rollup_agg = AnalyticsRollupAggregator::new();
13188
13189 for (
13190 msg_id,
13191 role,
13192 content,
13193 extra_json,
13194 effective_ts,
13195 workspace_id,
13196 source_id,
13197 _conv_started_at,
13198 agent_slug,
13199 ) in &rows
13200 {
13201 let ts = effective_ts.unwrap_or(0);
13202 let day_id = Self::day_id_from_millis(ts);
13203 let hour_id = Self::hour_id_from_millis(ts);
13204 let content_chars = content.len() as i64;
13205 let content_tokens_est = content_chars / 4;
13206 let extra = extra_json
13207 .as_ref()
13208 .cloned()
13209 .unwrap_or(serde_json::Value::Null);
13210 let usage =
13211 crate::connectors::extract_tokens_for_agent(agent_slug, &extra, content, role);
13212 let model_info = usage
13213 .model_name
13214 .as_deref()
13215 .map(crate::connectors::normalize_model);
13216 let model_family = model_info
13217 .as_ref()
13218 .map(|i| i.family.clone())
13219 .unwrap_or_else(|| "unknown".into());
13220 let model_tier = model_info
13221 .as_ref()
13222 .map(|i| i.tier.clone())
13223 .unwrap_or_else(|| "unknown".into());
13224 let provider = usage
13225 .provider
13226 .clone()
13227 .or_else(|| model_info.as_ref().map(|i| i.provider.clone()))
13228 .unwrap_or_else(|| "unknown".into());
13229
13230 let entry = MessageMetricsEntry {
13231 message_id: *msg_id,
13232 created_at_ms: ts,
13233 hour_id,
13234 day_id,
13235 agent_slug: agent_slug.clone(),
13236 workspace_id: workspace_id.unwrap_or(0),
13237 source_id: source_id.clone(),
13238 role: role.clone(),
13239 content_chars,
13240 content_tokens_est,
13241 model_name: usage.model_name.clone(),
13242 model_family,
13243 model_tier,
13244 provider,
13245 api_input_tokens: usage.input_tokens,
13246 api_output_tokens: usage.output_tokens,
13247 api_cache_read_tokens: usage.cache_read_tokens,
13248 api_cache_creation_tokens: usage.cache_creation_tokens,
13249 api_thinking_tokens: usage.thinking_tokens,
13250 api_service_tier: usage.service_tier,
13251 api_data_source: usage.data_source.as_str().to_string(),
13252 tool_call_count: usage.tool_call_count as i64,
13253 has_tool_calls: usage.has_tool_calls,
13254 has_plan: has_plan_for_role(role, content),
13255 };
13256 rollup_agg.record(&entry);
13257 entries.push(entry);
13258 }
13259
13260 total_inserted += franken_insert_message_metrics_batched_in_tx(&tx, &entries)?;
13261 let (hourly, daily, models_daily) =
13262 franken_flush_analytics_rollups_in_tx(&tx, &rollup_agg)?;
13263 usage_hourly_rows += hourly;
13264 usage_daily_rows += daily;
13265 usage_models_daily_rows += models_daily;
13266 offset += chunk_len as i64;
13267
13268 tracing::debug!(
13269 target: "cass::analytics",
13270 offset,
13271 chunk = chunk_len,
13272 inserted = entries.len(),
13273 total = total_inserted,
13274 "analytics_rebuild_chunk"
13275 );
13276
13277 if (chunk_len as i64) < CHUNK_SIZE {
13278 break;
13279 }
13280 }
13281
13282 tx.commit()?;
13283
13284 let elapsed = start.elapsed();
13285 let elapsed_ms = elapsed.as_millis() as u64;
13286 let msgs_per_sec = if elapsed_ms > 0 {
13287 (total_inserted as f64) / (elapsed_ms as f64 / 1000.0)
13288 } else {
13289 0.0
13290 };
13291
13292 tracing::info!(
13293 target: "cass::analytics",
13294 message_metrics_rows = total_inserted,
13295 usage_hourly_rows,
13296 usage_daily_rows,
13297 usage_models_daily_rows,
13298 elapsed_ms,
13299 messages_per_sec = format!("{:.0}", msgs_per_sec),
13300 "analytics_rebuild_complete"
13301 );
13302
13303 Ok(AnalyticsRebuildResult {
13304 message_metrics_rows: total_inserted,
13305 usage_hourly_rows,
13306 usage_daily_rows,
13307 usage_models_daily_rows,
13308 elapsed_ms,
13309 messages_per_sec: msgs_per_sec,
13310 })
13311 }
13312
13313 pub fn rebuild_daily_stats(&self) -> Result<DailyStatsRebuildResult> {
13315 const DAILY_STATS_REBUILD_CONVERSATION_BATCH_SIZE: usize = 1_000;
13316 const DAILY_STATS_REBUILD_MESSAGE_BATCH_SIZE: usize = 10_000;
13317
13318 let mut conversation_batch_size = rebuild_batch_size_env(
13319 "CASS_DAILY_STATS_REBUILD_CONVERSATION_BATCH_SIZE",
13320 DAILY_STATS_REBUILD_CONVERSATION_BATCH_SIZE,
13321 );
13322 let mut message_batch_size = rebuild_batch_size_env(
13323 "CASS_DAILY_STATS_REBUILD_MESSAGE_BATCH_SIZE",
13324 DAILY_STATS_REBUILD_MESSAGE_BATCH_SIZE,
13325 );
13326
13327 let total_messages: i64 =
13328 self.conn
13329 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
13330 row.get_typed(0)
13331 })?;
13332 let message_metrics_rows: i64 =
13333 self.conn
13334 .query_row_map("SELECT COUNT(*) FROM message_metrics", fparams![], |row| {
13335 row.get_typed(0)
13336 })?;
13337 let use_message_metrics = total_messages > 0 && total_messages == message_metrics_rows;
13338
13339 tracing::info!(
13340 target: "cass::perf::daily_stats",
13341 total_messages,
13342 message_metrics_rows,
13343 use_message_metrics,
13344 "daily_stats rebuild selected message source"
13345 );
13346
13347 let mut tx = self.conn.transaction()?;
13348 tx.execute("DELETE FROM daily_stats")?;
13349
13350 let mut last_conversation_id = 0_i64;
13351 let mut conversation_batch_count = 0_usize;
13352 let mut conversations_processed = 0_usize;
13353 let mut messages_processed = 0_usize;
13354 let mut message_batch_count = 0_usize;
13355 let mut raw_entries_flushed = 0_usize;
13356 let mut expanded_entries_flushed = 0_usize;
13357 let message_scan_sql = if use_message_metrics {
13358 "SELECT m.idx, mm.content_chars
13359 FROM messages m
13360 JOIN message_metrics mm ON mm.message_id = m.id
13361 WHERE m.conversation_id = ?1
13362 AND m.idx > ?2
13363 ORDER BY m.conversation_id, m.idx
13364 LIMIT ?3"
13365 } else {
13366 "SELECT m.idx, COALESCE(LENGTH(CAST(m.content AS BLOB)), 0)
13367 FROM messages m
13368 WHERE m.conversation_id = ?1
13369 AND m.idx > ?2
13370 ORDER BY m.conversation_id, m.idx
13371 LIMIT ?3"
13372 };
13373
13374 loop {
13375 let conversation_rows = match self.conn.query_with_params(
13381 "SELECT c.id, c.started_at,
13382 COALESCE((SELECT a.slug FROM agents a WHERE a.id = c.agent_id), 'unknown'),
13383 c.source_id
13384 FROM conversations c
13385 WHERE c.id > ?1
13386 ORDER BY c.id
13387 LIMIT ?2",
13388 ¶ms_from_iter([
13389 ParamValue::from(last_conversation_id),
13390 ParamValue::from(conversation_batch_size as i64),
13391 ]),
13392 ) {
13393 Ok(rows) => rows,
13394 Err(err) if is_out_of_memory_error(&err) && conversation_batch_size > 1 => {
13395 let previous_batch_size = conversation_batch_size;
13396 conversation_batch_size = (conversation_batch_size / 2).max(1);
13397 tracing::warn!(
13398 previous_batch_size,
13399 conversation_batch_size,
13400 last_conversation_id,
13401 "daily_stats conversation scan ran out of memory; retrying with smaller batch"
13402 );
13403 continue;
13404 }
13405 Err(err) => return Err(err.into()),
13406 };
13407 if conversation_rows.is_empty() {
13408 break;
13409 }
13410
13411 let mut aggregate = StatsAggregator::new();
13412 let mut conversation_batch_meta: Vec<(i64, i64, String, String)> =
13413 Vec::with_capacity(conversation_rows.len());
13414 for row in &conversation_rows {
13415 let conversation_id: i64 = row.get_typed(0)?;
13416 let started_at: Option<i64> = row.get_typed(1)?;
13417 let agent_slug: String = row.get_typed(2)?;
13418 let source_id: String = row.get_typed(3)?;
13419 last_conversation_id = conversation_id;
13420 let day_id = started_at.map(Self::day_id_from_millis).unwrap_or(0);
13421 aggregate.record_delta(&agent_slug, &source_id, day_id, 1, 0, 0);
13422 conversation_batch_meta.push((conversation_id, day_id, agent_slug, source_id));
13423 conversations_processed += 1;
13424 }
13425
13426 conversation_batch_count += 1;
13427 raw_entries_flushed += aggregate.raw_entry_count();
13428 let entries = aggregate.expand();
13429 expanded_entries_flushed += entries.len();
13430 if !entries.is_empty() {
13431 franken_update_daily_stats_batched_in_tx(&tx, &entries)?;
13432 }
13433 if conversation_batch_count.is_multiple_of(25) {
13434 tracing::info!(
13435 target: "cass::perf::daily_stats",
13436 conversations_processed,
13437 batches = conversation_batch_count,
13438 batch_size = conversation_batch_size,
13439 last_conversation_id,
13440 "daily_stats rebuild conversation scan progress"
13441 );
13442 }
13443 if conversation_batch_meta.is_empty() {
13444 continue;
13445 }
13446
13447 for (conversation_id, day_id, agent_slug, source_id) in conversation_batch_meta {
13448 let mut cursor_message_idx = -1_i64;
13449 loop {
13450 let message_rows = match self.conn.query_with_params(
13451 message_scan_sql,
13452 ¶ms_from_iter([
13453 ParamValue::from(conversation_id),
13454 ParamValue::from(cursor_message_idx),
13455 ParamValue::from(message_batch_size as i64),
13456 ]),
13457 ) {
13458 Ok(rows) => rows,
13459 Err(err) if is_out_of_memory_error(&err) && message_batch_size > 1 => {
13460 let previous_batch_size = message_batch_size;
13461 message_batch_size = (message_batch_size / 2).max(1);
13462 tracing::warn!(
13463 previous_batch_size,
13464 message_batch_size,
13465 conversation_id,
13466 cursor_message_idx,
13467 "daily_stats message scan ran out of memory; retrying with smaller batch"
13468 );
13469 continue;
13470 }
13471 Err(err) => return Err(err.into()),
13472 };
13473 if message_rows.is_empty() {
13474 break;
13475 }
13476
13477 let mut aggregate = StatsAggregator::new();
13478 for row in &message_rows {
13479 let message_idx: i64 = row.get_typed(0)?;
13480 let content_len: i64 = row.get_typed(1)?;
13481 cursor_message_idx = message_idx;
13482 aggregate.record_delta(&agent_slug, &source_id, day_id, 0, 1, content_len);
13483 messages_processed += 1;
13484 }
13485
13486 message_batch_count += 1;
13487 raw_entries_flushed += aggregate.raw_entry_count();
13488 let entries = aggregate.expand();
13489 expanded_entries_flushed += entries.len();
13490 if !entries.is_empty() {
13491 franken_update_daily_stats_batched_in_tx(&tx, &entries)?;
13492 }
13493 if message_batch_count.is_multiple_of(50) {
13494 tracing::info!(
13495 target: "cass::perf::daily_stats",
13496 messages_processed,
13497 batches = message_batch_count,
13498 batch_size = message_batch_size,
13499 source = if use_message_metrics {
13500 "message_metrics"
13501 } else {
13502 "messages"
13503 },
13504 conversation_id,
13505 cursor_message_idx,
13506 "daily_stats rebuild message scan progress"
13507 );
13508 }
13509 }
13510 }
13511 }
13512
13513 let rows_created: i64 =
13514 tx.query_row_map("SELECT COUNT(*) FROM daily_stats", fparams![], |row| {
13515 row.get_typed(0)
13516 })?;
13517 let total_sessions: i64 = tx.query_row_map(
13518 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats WHERE agent_slug = 'all' AND source_id = 'all'",
13519 fparams![],
13520 |row| row.get_typed(0),
13521 )?;
13522
13523 tx.commit()?;
13524
13525 tracing::info!(
13526 target: "cass::perf::daily_stats",
13527 rows_created,
13528 total_sessions,
13529 conversations_processed,
13530 conversation_batches = conversation_batch_count,
13531 conversation_batch_size,
13532 message_batches = message_batch_count,
13533 message_batch_size,
13534 messages_processed,
13535 use_message_metrics,
13536 raw_entries_flushed,
13537 expanded_entries_flushed,
13538 "Daily stats rebuilt from conversations"
13539 );
13540
13541 Ok(DailyStatsRebuildResult {
13542 rows_created,
13543 total_sessions,
13544 })
13545 }
13546}
13547
13548#[derive(Debug, Default)]
13575pub struct IndexingCache {
13576 agent_ids: HashMap<String, i64>,
13577 workspace_ids: HashMap<PathBuf, i64>,
13578 hits: u64,
13579 misses: u64,
13580}
13581
13582pub trait IndexingCacheStorage {
13583 fn ensure_indexing_agent(&self, agent: &Agent) -> Result<i64>;
13584 fn ensure_indexing_workspace(&self, path: &Path, display_name: Option<&str>) -> Result<i64>;
13585}
13586
13587impl IndexingCacheStorage for FrankenStorage {
13588 fn ensure_indexing_agent(&self, agent: &Agent) -> Result<i64> {
13589 self.ensure_agent(agent)
13590 }
13591
13592 fn ensure_indexing_workspace(&self, path: &Path, display_name: Option<&str>) -> Result<i64> {
13593 self.ensure_workspace(path, display_name)
13594 }
13595}
13596
13597impl IndexingCache {
13600 pub fn new() -> Self {
13602 Self {
13603 agent_ids: HashMap::new(),
13604 workspace_ids: HashMap::new(),
13605 hits: 0,
13606 misses: 0,
13607 }
13608 }
13609
13610 pub fn is_enabled() -> bool {
13613 dotenvy::var("CASS_SQLITE_CACHE")
13614 .map(|v| v != "0" && v.to_lowercase() != "false")
13615 .unwrap_or(true)
13616 }
13617
13618 pub fn get_or_insert_agent<S>(&mut self, storage: &S, agent: &Agent) -> Result<i64>
13623 where
13624 S: IndexingCacheStorage + ?Sized,
13625 {
13626 if let Some(&cached) = self.agent_ids.get(&agent.slug) {
13627 self.hits += 1;
13628 return Ok(cached);
13629 }
13630
13631 self.misses += 1;
13632 let id = storage.ensure_indexing_agent(agent)?;
13633 self.agent_ids.insert(agent.slug.clone(), id);
13634 Ok(id)
13635 }
13636
13637 pub fn get_or_insert_workspace(
13642 &mut self,
13643 storage: &(impl IndexingCacheStorage + ?Sized),
13644 path: &Path,
13645 display_name: Option<&str>,
13646 ) -> Result<i64> {
13647 if let Some(&cached) = self.workspace_ids.get(path) {
13648 self.hits += 1;
13649 return Ok(cached);
13650 }
13651
13652 self.misses += 1;
13653 let id = storage.ensure_indexing_workspace(path, display_name)?;
13654 self.workspace_ids.insert(path.to_path_buf(), id);
13655 Ok(id)
13656 }
13657
13658 pub fn stats(&self) -> (u64, u64, f64) {
13660 let total = self.hits + self.misses;
13661 let hit_rate = if total > 0 {
13662 self.hits as f64 / total as f64
13663 } else {
13664 0.0
13665 };
13666 (self.hits, self.misses, hit_rate)
13667 }
13668
13669 pub fn clear(&mut self) {
13671 self.agent_ids.clear();
13672 self.workspace_ids.clear();
13673 self.hits = 0;
13674 self.misses = 0;
13675 }
13676
13677 pub fn agent_count(&self) -> usize {
13679 self.agent_ids.len()
13680 }
13681
13682 pub fn workspace_count(&self) -> usize {
13684 self.workspace_ids.len()
13685 }
13686}
13687
13688#[derive(Clone, Copy, Debug, Default)]
13697pub struct StatsDelta {
13698 pub session_count_delta: i64,
13699 pub message_count_delta: i64,
13700 pub total_chars_delta: i64,
13701}
13702
13703#[derive(Debug, Default)]
13719pub struct StatsAggregator {
13720 deltas: HashMap<(i64, String, String), StatsDelta>,
13723}
13724
13725impl StatsAggregator {
13726 pub fn new() -> Self {
13728 Self {
13729 deltas: HashMap::new(),
13730 }
13731 }
13732
13733 pub fn record(
13744 &mut self,
13745 agent_slug: &str,
13746 source_id: &str,
13747 day_id: i64,
13748 message_count: i64,
13749 total_chars: i64,
13750 ) {
13751 self.record_delta(agent_slug, source_id, day_id, 1, message_count, total_chars);
13752 }
13753
13754 pub fn record_delta(
13757 &mut self,
13758 agent_slug: &str,
13759 source_id: &str,
13760 day_id: i64,
13761 session_count_delta: i64,
13762 message_count_delta: i64,
13763 total_chars_delta: i64,
13764 ) {
13765 if session_count_delta == 0 && message_count_delta == 0 && total_chars_delta == 0 {
13766 return;
13767 }
13768 let key = (day_id, agent_slug.to_owned(), source_id.to_owned());
13769 let delta = self.deltas.entry(key).or_default();
13770 delta.session_count_delta += session_count_delta;
13771 delta.message_count_delta += message_count_delta;
13772 delta.total_chars_delta += total_chars_delta;
13773 }
13774
13775 pub fn expand(&self) -> Vec<(i64, String, String, StatsDelta)> {
13783 let mut expanded: HashMap<(i64, String, String), StatsDelta> = HashMap::new();
13784
13785 for ((day_id, agent, source), delta) in &self.deltas {
13786 let permutations = [
13787 (agent.as_str(), source.as_str()),
13788 ("all", source.as_str()),
13789 (agent.as_str(), "all"),
13790 ("all", "all"),
13791 ];
13792
13793 for idx in 0..permutations.len() {
13795 let (a, s) = permutations[idx];
13796 if permutations[..idx].contains(&(a, s)) {
13797 continue;
13798 }
13799 let key = (*day_id, a.to_owned(), s.to_owned());
13800 let entry = expanded.entry(key).or_default();
13801 entry.session_count_delta += delta.session_count_delta;
13802 entry.message_count_delta += delta.message_count_delta;
13803 entry.total_chars_delta += delta.total_chars_delta;
13804 }
13805 }
13806
13807 let mut out: Vec<(i64, String, String, StatsDelta)> = expanded
13808 .into_iter()
13809 .map(|((d, a, s), delta)| (d, a, s, delta))
13810 .collect();
13811 out.sort_by(|(d1, a1, s1, _), (d2, a2, s2, _)| {
13812 d1.cmp(d2).then_with(|| a1.cmp(a2)).then_with(|| s1.cmp(s2))
13813 });
13814 out
13815 }
13816
13817 pub fn is_empty(&self) -> bool {
13819 self.deltas.is_empty()
13820 }
13821
13822 pub fn raw_entry_count(&self) -> usize {
13824 self.deltas.len()
13825 }
13826}
13827
13828#[derive(Clone, Debug, Default)]
13837pub struct TokenStatsDelta {
13838 pub api_call_count: i64,
13839 pub user_message_count: i64,
13840 pub assistant_message_count: i64,
13841 pub tool_message_count: i64,
13842 pub total_input_tokens: i64,
13843 pub total_output_tokens: i64,
13844 pub total_cache_read_tokens: i64,
13845 pub total_cache_creation_tokens: i64,
13846 pub total_thinking_tokens: i64,
13847 pub grand_total_tokens: i64,
13848 pub total_content_chars: i64,
13849 pub total_tool_calls: i64,
13850 pub estimated_cost_usd: f64,
13851 pub session_count: i64,
13852}
13853
13854#[derive(Debug, Default)]
13860pub struct TokenStatsAggregator {
13861 deltas: HashMap<(i64, String, String, String), TokenStatsDelta>,
13863}
13864
13865impl TokenStatsAggregator {
13866 pub fn new() -> Self {
13867 Self {
13868 deltas: HashMap::new(),
13869 }
13870 }
13871
13872 #[allow(clippy::too_many_arguments)]
13874 pub fn record(
13875 &mut self,
13876 agent_slug: &str,
13877 source_id: &str,
13878 day_id: i64,
13879 model_family: &str,
13880 role: &str,
13881 usage: &crate::connectors::ExtractedTokenUsage,
13882 content_chars: i64,
13883 estimated_cost_usd: f64,
13884 ) {
13885 let key = (
13886 day_id,
13887 agent_slug.to_owned(),
13888 source_id.to_owned(),
13889 model_family.to_owned(),
13890 );
13891 let delta = self.deltas.entry(key).or_default();
13892
13893 delta.api_call_count += 1;
13894 match role {
13895 "user" => delta.user_message_count += 1,
13896 "assistant" | "agent" => delta.assistant_message_count += 1,
13897 "tool" => delta.tool_message_count += 1,
13898 _ => {}
13899 }
13900
13901 delta.total_input_tokens += usage.input_tokens.unwrap_or(0);
13902 delta.total_output_tokens += usage.output_tokens.unwrap_or(0);
13903 delta.total_cache_read_tokens += usage.cache_read_tokens.unwrap_or(0);
13904 delta.total_cache_creation_tokens += usage.cache_creation_tokens.unwrap_or(0);
13905 delta.total_thinking_tokens += usage.thinking_tokens.unwrap_or(0);
13906 delta.grand_total_tokens += usage.total_tokens().unwrap_or(0);
13907 delta.total_content_chars += content_chars;
13908 delta.total_tool_calls += usage.tool_call_count as i64;
13909 delta.estimated_cost_usd += estimated_cost_usd;
13910 }
13911
13912 pub fn record_session(
13914 &mut self,
13915 agent_slug: &str,
13916 source_id: &str,
13917 day_id: i64,
13918 model_family: &str,
13919 ) {
13920 let key = (
13921 day_id,
13922 agent_slug.to_owned(),
13923 source_id.to_owned(),
13924 model_family.to_owned(),
13925 );
13926 self.deltas.entry(key).or_default().session_count += 1;
13927 }
13928
13929 pub fn expand(&self) -> Vec<(i64, String, String, String, TokenStatsDelta)> {
13936 let mut expanded: HashMap<(i64, String, String, String), TokenStatsDelta> = HashMap::new();
13937
13938 for ((day_id, agent, source, model), delta) in &self.deltas {
13939 let permutations = [
13940 (agent.as_str(), source.as_str(), model.as_str()),
13941 ("all", source.as_str(), model.as_str()),
13942 (agent.as_str(), "all", model.as_str()),
13943 (agent.as_str(), source.as_str(), "all"),
13944 ("all", "all", "all"),
13945 ];
13946
13947 for idx in 0..permutations.len() {
13948 let (a, s, m) = permutations[idx];
13949 if permutations[..idx].contains(&(a, s, m)) {
13951 continue;
13952 }
13953 let key = (*day_id, a.to_owned(), s.to_owned(), m.to_owned());
13954 let entry = expanded.entry(key).or_default();
13955 entry.api_call_count += delta.api_call_count;
13956 entry.user_message_count += delta.user_message_count;
13957 entry.assistant_message_count += delta.assistant_message_count;
13958 entry.tool_message_count += delta.tool_message_count;
13959 entry.total_input_tokens += delta.total_input_tokens;
13960 entry.total_output_tokens += delta.total_output_tokens;
13961 entry.total_cache_read_tokens += delta.total_cache_read_tokens;
13962 entry.total_cache_creation_tokens += delta.total_cache_creation_tokens;
13963 entry.total_thinking_tokens += delta.total_thinking_tokens;
13964 entry.grand_total_tokens += delta.grand_total_tokens;
13965 entry.total_content_chars += delta.total_content_chars;
13966 entry.total_tool_calls += delta.total_tool_calls;
13967 entry.estimated_cost_usd += delta.estimated_cost_usd;
13968 entry.session_count += delta.session_count;
13969 }
13970 }
13971
13972 let mut out: Vec<(i64, String, String, String, TokenStatsDelta)> = expanded
13973 .into_iter()
13974 .map(|((d, a, s, m), delta)| (d, a, s, m, delta))
13975 .collect();
13976 out.sort_by(|(d1, a1, s1, m1, _), (d2, a2, s2, m2, _)| {
13977 d1.cmp(d2)
13978 .then_with(|| a1.cmp(a2))
13979 .then_with(|| s1.cmp(s2))
13980 .then_with(|| m1.cmp(m2))
13981 });
13982 out
13983 }
13984
13985 pub fn is_empty(&self) -> bool {
13986 self.deltas.is_empty()
13987 }
13988
13989 pub fn raw_entry_count(&self) -> usize {
13990 self.deltas.len()
13991 }
13992}
13993
13994#[derive(Clone, Debug, Default)]
14002pub struct UsageRollupDelta {
14003 pub message_count: i64,
14004 pub user_message_count: i64,
14005 pub assistant_message_count: i64,
14006 pub tool_call_count: i64,
14007 pub plan_message_count: i64,
14008 pub plan_content_tokens_est_total: i64,
14009 pub plan_api_tokens_total: i64,
14010 pub api_coverage_message_count: i64,
14011 pub content_tokens_est_total: i64,
14012 pub content_tokens_est_user: i64,
14013 pub content_tokens_est_assistant: i64,
14014 pub api_tokens_total: i64,
14015 pub api_input_tokens_total: i64,
14016 pub api_output_tokens_total: i64,
14017 pub api_cache_read_tokens_total: i64,
14018 pub api_cache_creation_tokens_total: i64,
14019 pub api_thinking_tokens_total: i64,
14020}
14021
14022#[derive(Debug, Clone)]
14024pub struct MessageMetricsEntry {
14025 pub message_id: i64,
14026 pub created_at_ms: i64,
14027 pub hour_id: i64,
14028 pub day_id: i64,
14029 pub agent_slug: String,
14030 pub workspace_id: i64,
14031 pub source_id: String,
14032 pub role: String,
14033 pub content_chars: i64,
14034 pub content_tokens_est: i64,
14035 pub model_name: Option<String>,
14036 pub model_family: String,
14037 pub model_tier: String,
14038 pub provider: String,
14039 pub api_input_tokens: Option<i64>,
14040 pub api_output_tokens: Option<i64>,
14041 pub api_cache_read_tokens: Option<i64>,
14042 pub api_cache_creation_tokens: Option<i64>,
14043 pub api_thinking_tokens: Option<i64>,
14044 pub api_service_tier: Option<String>,
14045 pub api_data_source: String,
14046 pub tool_call_count: i64,
14047 pub has_tool_calls: bool,
14048 pub has_plan: bool,
14049}
14050
14051#[derive(Debug, Default)]
14056pub struct AnalyticsRollupAggregator {
14057 hourly: HashMap<(i64, String, i64, String), UsageRollupDelta>,
14058 daily: HashMap<(i64, String, i64, String), UsageRollupDelta>,
14059 models_daily: HashMap<(i64, String, i64, String, String, String), UsageRollupDelta>,
14060}
14061
14062impl AnalyticsRollupAggregator {
14063 pub fn new() -> Self {
14064 Self::default()
14065 }
14066
14067 pub fn record(&mut self, entry: &MessageMetricsEntry) {
14069 let content_est = entry.content_tokens_est;
14070 let api_total = entry.api_input_tokens.unwrap_or(0)
14071 + entry.api_output_tokens.unwrap_or(0)
14072 + entry.api_cache_read_tokens.unwrap_or(0)
14073 + entry.api_cache_creation_tokens.unwrap_or(0)
14074 + entry.api_thinking_tokens.unwrap_or(0);
14075 let is_api = entry.api_data_source == "api";
14076 let is_user = entry.role == "user";
14077 let is_assistant = entry.role == "assistant" || entry.role == "agent";
14078
14079 for (map, bucket_id) in [
14081 (&mut self.hourly, entry.hour_id),
14082 (&mut self.daily, entry.day_id),
14083 ] {
14084 let key = (
14085 bucket_id,
14086 entry.agent_slug.clone(),
14087 entry.workspace_id,
14088 entry.source_id.clone(),
14089 );
14090 let d = map.entry(key).or_default();
14091 d.message_count += 1;
14092 if is_user {
14093 d.user_message_count += 1;
14094 d.content_tokens_est_user += content_est;
14095 }
14096 if is_assistant {
14097 d.assistant_message_count += 1;
14098 d.content_tokens_est_assistant += content_est;
14099 }
14100 d.tool_call_count += entry.tool_call_count;
14101 if entry.has_plan {
14102 d.plan_message_count += 1;
14103 d.plan_content_tokens_est_total += content_est;
14104 if is_api {
14105 d.plan_api_tokens_total += api_total;
14106 }
14107 }
14108 if is_api {
14109 d.api_coverage_message_count += 1;
14110 d.api_tokens_total += api_total;
14111 d.api_input_tokens_total += entry.api_input_tokens.unwrap_or(0);
14112 d.api_output_tokens_total += entry.api_output_tokens.unwrap_or(0);
14113 d.api_cache_read_tokens_total += entry.api_cache_read_tokens.unwrap_or(0);
14114 d.api_cache_creation_tokens_total += entry.api_cache_creation_tokens.unwrap_or(0);
14115 d.api_thinking_tokens_total += entry.api_thinking_tokens.unwrap_or(0);
14116 }
14117 d.content_tokens_est_total += content_est;
14118 }
14119
14120 let model_key = (
14121 entry.day_id,
14122 entry.agent_slug.clone(),
14123 entry.workspace_id,
14124 entry.source_id.clone(),
14125 entry.model_family.clone(),
14126 entry.model_tier.clone(),
14127 );
14128 let d = self.models_daily.entry(model_key).or_default();
14129 d.message_count += 1;
14130 if is_user {
14131 d.user_message_count += 1;
14132 d.content_tokens_est_user += content_est;
14133 }
14134 if is_assistant {
14135 d.assistant_message_count += 1;
14136 d.content_tokens_est_assistant += content_est;
14137 }
14138 d.tool_call_count += entry.tool_call_count;
14139 if entry.has_plan {
14140 d.plan_message_count += 1;
14141 d.plan_content_tokens_est_total += content_est;
14142 if is_api {
14143 d.plan_api_tokens_total += api_total;
14144 }
14145 }
14146 if is_api {
14147 d.api_coverage_message_count += 1;
14148 d.api_tokens_total += api_total;
14149 d.api_input_tokens_total += entry.api_input_tokens.unwrap_or(0);
14150 d.api_output_tokens_total += entry.api_output_tokens.unwrap_or(0);
14151 d.api_cache_read_tokens_total += entry.api_cache_read_tokens.unwrap_or(0);
14152 d.api_cache_creation_tokens_total += entry.api_cache_creation_tokens.unwrap_or(0);
14153 d.api_thinking_tokens_total += entry.api_thinking_tokens.unwrap_or(0);
14154 }
14155 d.content_tokens_est_total += content_est;
14156 }
14157
14158 pub fn is_empty(&self) -> bool {
14159 self.hourly.is_empty() && self.daily.is_empty() && self.models_daily.is_empty()
14160 }
14161
14162 pub fn hourly_entry_count(&self) -> usize {
14163 self.hourly.len()
14164 }
14165
14166 pub fn daily_entry_count(&self) -> usize {
14167 self.daily.len()
14168 }
14169
14170 pub fn models_daily_entry_count(&self) -> usize {
14171 self.models_daily.len()
14172 }
14173}
14174
14175fn has_plan_for_role(role: &str, content: &str) -> bool {
14179 let role = role.trim();
14180 (role.eq_ignore_ascii_case("assistant") || role.eq_ignore_ascii_case("agent"))
14181 && has_plan_heuristic(content)
14182}
14183
14184fn has_plan_heuristic(content: &str) -> bool {
14191 if content.len() < 24 {
14192 return false;
14193 }
14194
14195 let lower = content.to_lowercase();
14196
14197 let looks_like_tool_blob = lower.contains("```")
14199 || lower.contains("\"tool\"")
14200 || lower.contains("stdout:")
14201 || lower.contains("stderr:")
14202 || lower.contains("exit code:");
14203
14204 let mut lines: Vec<&str> = Vec::with_capacity(60);
14205 let mut in_fenced_code = false;
14206 for raw in lower.lines() {
14207 let line = raw.trim();
14208 if line.starts_with("```") {
14209 in_fenced_code = !in_fenced_code;
14210 continue;
14211 }
14212 if in_fenced_code || line.is_empty() {
14213 continue;
14214 }
14215 lines.push(line);
14216 if lines.len() >= 60 {
14217 break;
14218 }
14219 }
14220
14221 let header_pos = lines.iter().position(|line| {
14222 line.starts_with("## plan")
14223 || line.starts_with("# plan")
14224 || line.starts_with("plan:")
14225 || line.starts_with("implementation plan")
14226 || line.starts_with("next steps:")
14227 || line.starts_with("action plan:")
14228 });
14229 let preview_top = lines.iter().take(8).copied().collect::<Vec<_>>().join("\n");
14230 let header_near_top = header_pos.is_some_and(|idx| idx <= 6) || preview_top.contains("plan:");
14231
14232 if !header_near_top {
14233 return false;
14234 }
14235 if looks_like_tool_blob && header_pos.is_none() {
14236 return false;
14237 }
14238
14239 let numbered_steps = lines
14240 .iter()
14241 .filter(|line| is_numbered_step_line(line))
14242 .count();
14243 let bullet_steps = lines
14244 .iter()
14245 .filter(|line| {
14246 line.starts_with("- ")
14247 || line.starts_with("* ")
14248 || line.starts_with("+ ")
14249 || line.starts_with("- [ ] ")
14250 || line.starts_with("- [x] ")
14251 })
14252 .count();
14253
14254 numbered_steps >= 2 || (numbered_steps >= 1 && bullet_steps >= 1) || bullet_steps >= 3
14255}
14256
14257fn is_numbered_step_line(line: &str) -> bool {
14258 let trimmed = line.trim_start();
14259 let digit_count = trimmed.chars().take_while(|c| c.is_ascii_digit()).count();
14260 if digit_count == 0 || digit_count > 3 {
14261 return false;
14262 }
14263 let rest = &trimmed[digit_count..];
14264 rest.starts_with(". ") || rest.starts_with(") ")
14265}
14266
14267#[derive(Debug, Clone)]
14269pub struct TokenUsageEntry {
14270 pub message_id: i64,
14271 pub conversation_id: i64,
14272 pub agent_id: i64,
14273 pub workspace_id: Option<i64>,
14274 pub source_id: String,
14275 pub timestamp_ms: i64,
14276 pub day_id: i64,
14277 pub model_name: Option<String>,
14278 pub model_family: Option<String>,
14279 pub model_tier: Option<String>,
14280 pub service_tier: Option<String>,
14281 pub provider: Option<String>,
14282 pub input_tokens: Option<i64>,
14283 pub output_tokens: Option<i64>,
14284 pub cache_read_tokens: Option<i64>,
14285 pub cache_creation_tokens: Option<i64>,
14286 pub thinking_tokens: Option<i64>,
14287 pub total_tokens: Option<i64>,
14288 pub estimated_cost_usd: Option<f64>,
14289 pub role: String,
14290 pub content_chars: i64,
14291 pub has_tool_calls: bool,
14292 pub tool_call_count: u32,
14293 pub data_source: String,
14294}
14295
14296#[derive(Debug, Clone)]
14302pub struct PricingEntry {
14303 pub model_pattern: String,
14304 pub provider: String,
14305 pub input_cost_per_mtok: f64,
14306 pub output_cost_per_mtok: f64,
14307 pub cache_read_cost_per_mtok: Option<f64>,
14308 pub cache_creation_cost_per_mtok: Option<f64>,
14309 pub effective_day_id: i64,
14311}
14312
14313#[derive(Debug, Clone, Default)]
14315pub struct PricingDiagnostics {
14316 pub priced_count: u64,
14317 pub unpriced_count: u64,
14318 pub unknown_models: HashMap<String, u64>,
14320}
14321
14322impl PricingDiagnostics {
14323 fn record_priced(&mut self) {
14324 self.priced_count += 1;
14325 }
14326
14327 fn record_unpriced(&mut self, model_name: Option<&str>) {
14328 self.unpriced_count += 1;
14329 let key = model_name.unwrap_or("(none)").to_string();
14330 *self.unknown_models.entry(key).or_insert(0) += 1;
14331 }
14332
14333 pub fn log_summary(&self) {
14335 let total = self.priced_count + self.unpriced_count;
14336 if total == 0 {
14337 return;
14338 }
14339 let pct = (self.priced_count as f64 / total as f64) * 100.0;
14340 tracing::info!(
14341 target: "cass::analytics::pricing",
14342 priced = self.priced_count,
14343 unpriced = self.unpriced_count,
14344 total = total,
14345 coverage_pct = format!("{pct:.1}%"),
14346 "pricing coverage"
14347 );
14348 if !self.unknown_models.is_empty() {
14349 let mut sorted: Vec<_> = self.unknown_models.iter().collect();
14350 sorted.sort_by(|a, b| b.1.cmp(a.1));
14351 for (model, count) in sorted.iter().take(5) {
14352 tracing::debug!(
14353 target: "cass::analytics::pricing",
14354 model = model.as_str(),
14355 count = count,
14356 "unknown model (no pricing)"
14357 );
14358 }
14359 }
14360 }
14361}
14362
14363#[derive(Debug, Clone)]
14365pub struct PricingTable {
14366 entries: Vec<PricingEntry>,
14367}
14368
14369impl PricingTable {
14370 pub fn load(conn: &FrankenConnection) -> Result<Self> {
14372 Self::franken_load(conn)
14373 }
14374
14375 pub fn franken_load(conn: &FrankenConnection) -> Result<Self> {
14377 let rows = conn.query(
14378 "SELECT model_pattern, provider, input_cost_per_mtok, output_cost_per_mtok,
14379 cache_read_cost_per_mtok, cache_creation_cost_per_mtok, effective_date
14380 FROM model_pricing
14381 ORDER BY effective_date DESC",
14382 )?;
14383 let mut entries = Vec::with_capacity(rows.len());
14384 for row in &rows {
14385 let effective_date: String = row.get_typed(6)?;
14386 let effective_day_id = date_str_to_day_id(&effective_date)?;
14387 entries.push(PricingEntry {
14388 model_pattern: row.get_typed(0)?,
14389 provider: row.get_typed(1)?,
14390 input_cost_per_mtok: row.get_typed(2)?,
14391 output_cost_per_mtok: row.get_typed(3)?,
14392 cache_read_cost_per_mtok: row.get_typed(4)?,
14393 cache_creation_cost_per_mtok: row.get_typed(5)?,
14394 effective_day_id,
14395 });
14396 }
14397 Ok(Self { entries })
14398 }
14399
14400 pub fn lookup(&self, model_name: &str, message_day_id: i64) -> Option<&PricingEntry> {
14408 let mut best: Option<&PricingEntry> = None;
14409
14410 for entry in &self.entries {
14411 if entry.effective_day_id > message_day_id {
14412 continue;
14413 }
14414 if !sql_like_match(model_name, &entry.model_pattern) {
14415 continue;
14416 }
14417
14418 match best {
14419 None => best = Some(entry),
14420 Some(current) => {
14421 if entry.effective_day_id > current.effective_day_id
14422 || (entry.effective_day_id == current.effective_day_id
14423 && entry.model_pattern.len() > current.model_pattern.len())
14424 {
14425 best = Some(entry);
14426 }
14427 }
14428 }
14429 }
14430
14431 best
14432 }
14433
14434 pub fn compute_cost(
14438 &self,
14439 model_name: Option<&str>,
14440 message_day_id: i64,
14441 input_tokens: Option<i64>,
14442 output_tokens: Option<i64>,
14443 cache_read_tokens: Option<i64>,
14444 cache_creation_tokens: Option<i64>,
14445 ) -> Option<f64> {
14446 let model = model_name?;
14447 let pricing = self.lookup(model, message_day_id)?;
14448
14449 if input_tokens.is_none() && output_tokens.is_none() {
14450 return None;
14451 }
14452
14453 let mut cost = 0.0;
14454 let cache_read = cache_read_tokens.unwrap_or(0);
14455 let cache_creation = cache_creation_tokens.unwrap_or(0);
14456 let non_cache_input = input_tokens
14459 .unwrap_or(0)
14460 .saturating_sub(cache_read)
14461 .saturating_sub(cache_creation)
14462 .max(0);
14463 cost += non_cache_input as f64 * pricing.input_cost_per_mtok / 1_000_000.0;
14464 cost += output_tokens.unwrap_or(0) as f64 * pricing.output_cost_per_mtok / 1_000_000.0;
14465
14466 if let Some(cache_price) = pricing.cache_read_cost_per_mtok {
14467 cost += cache_read as f64 * cache_price / 1_000_000.0;
14468 }
14469 if let Some(cache_price) = pricing.cache_creation_cost_per_mtok {
14470 cost += cache_creation as f64 * cache_price / 1_000_000.0;
14471 }
14472
14473 Some(cost)
14474 }
14475
14476 pub fn is_empty(&self) -> bool {
14478 self.entries.is_empty()
14479 }
14480}
14481
14482fn date_str_to_day_id(s: &str) -> Result<i64> {
14485 use chrono::NaiveDate;
14486 const EPOCH_2020: NaiveDate = match NaiveDate::from_ymd_opt(2020, 1, 1) {
14487 Some(d) => d,
14488 None => unreachable!(),
14489 };
14490 NaiveDate::parse_from_str(s, "%Y-%m-%d")
14491 .map(|d| (d - EPOCH_2020).num_days())
14492 .with_context(|| format!("invalid effective_date '{s}'"))
14493}
14494
14495fn sql_like_match(value: &str, pattern: &str) -> bool {
14497 sql_like_match_bytes(
14498 value.to_ascii_lowercase().as_bytes(),
14499 pattern.to_ascii_lowercase().as_bytes(),
14500 )
14501}
14502
14503fn utf8_char_len(b: u8) -> usize {
14505 if b < 0x80 {
14506 1
14507 } else if b < 0xE0 {
14508 2
14509 } else if b < 0xF0 {
14510 3
14511 } else {
14512 4
14513 }
14514}
14515
14516fn sql_like_match_bytes(val: &[u8], pat: &[u8]) -> bool {
14517 if pat.is_empty() {
14518 return val.is_empty();
14519 }
14520 match pat[0] {
14521 b'%' => {
14522 let mut p = 1;
14523 while p < pat.len() && pat[p] == b'%' {
14524 p += 1;
14525 }
14526 let rest = &pat[p..];
14527 let mut i = 0;
14529 while i <= val.len() {
14530 if sql_like_match_bytes(&val[i..], rest) {
14531 return true;
14532 }
14533 if i < val.len() {
14534 i += utf8_char_len(val[i]);
14535 } else {
14536 break;
14537 }
14538 }
14539 false
14540 }
14541 b'_' => {
14542 if val.is_empty() {
14544 return false;
14545 }
14546 let char_len = utf8_char_len(val[0]);
14547 val.len() >= char_len && sql_like_match_bytes(&val[char_len..], &pat[1..])
14548 }
14549 c => !val.is_empty() && val[0] == c && sql_like_match_bytes(&val[1..], &pat[1..]),
14550 }
14551}
14552
14553fn rebuild_batch_size_env(var: &str, default: usize) -> usize {
14554 dotenvy::var(var)
14555 .ok()
14556 .and_then(|raw| raw.parse::<usize>().ok())
14557 .filter(|value| *value > 0)
14558 .unwrap_or(default)
14559}
14560
14561fn is_out_of_memory_error(err: &impl std::fmt::Display) -> bool {
14562 err.to_string()
14563 .to_ascii_lowercase()
14564 .contains("out of memory")
14565}
14566
14567#[derive(Debug, Clone)]
14573pub struct DailyCount {
14574 pub day_id: i64,
14575 pub sessions: i64,
14576 pub messages: i64,
14577 pub chars: i64,
14578}
14579
14580#[derive(Debug, Clone)]
14582pub struct AnalyticsRebuildResult {
14583 pub message_metrics_rows: usize,
14584 pub usage_hourly_rows: usize,
14585 pub usage_daily_rows: usize,
14586 pub usage_models_daily_rows: usize,
14587 pub elapsed_ms: u64,
14588 pub messages_per_sec: f64,
14589}
14590
14591#[derive(Debug, Clone)]
14593pub struct DailyStatsRebuildResult {
14594 pub rows_created: i64,
14595 pub total_sessions: i64,
14596}
14597
14598#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
14600pub struct AgentArchivePurgeResult {
14601 pub conversations_deleted: usize,
14602 pub messages_deleted: usize,
14603}
14604
14605#[derive(Debug, Clone)]
14607pub struct DailyStatsHealth {
14608 pub populated: bool,
14609 pub row_count: i64,
14610 pub oldest_update_ms: Option<i64>,
14611 pub conversation_count: i64,
14612 pub materialized_total: i64,
14613 pub drift: i64,
14614}
14615
14616const FTS5_BATCH_SIZE: usize = 100;
14624
14625#[derive(Debug, Clone)]
14626struct FtsRebuildMessageRow {
14627 rowid: i64,
14628 message_id: i64,
14629 conversation_id: i64,
14630 content: String,
14631 created_at: Option<i64>,
14632}
14633
14634#[derive(Debug, Clone)]
14635struct FtsConversationProjection {
14636 title: String,
14637 agent_id: Option<i64>,
14638 workspace_id: Option<i64>,
14639 source_path: String,
14640}
14641
14642#[derive(Debug, Clone)]
14644pub struct FtsEntry {
14645 pub content: String,
14646 pub title: String,
14647 pub agent: String,
14648 pub workspace: String,
14649 pub source_path: String,
14650 pub created_at: Option<i64>,
14651 pub message_id: i64,
14652}
14653
14654impl FtsEntry {
14655 pub fn from_message(message_id: i64, msg: &Message, conv: &Conversation) -> Self {
14657 FtsEntry {
14658 content: msg.content.clone(),
14659 title: conv.title.clone().unwrap_or_default(),
14660 agent: conv.agent_slug.clone(),
14661 workspace: conv
14662 .workspace
14663 .as_ref()
14664 .map(|p| p.to_string_lossy().into_owned())
14665 .unwrap_or_default(),
14666 source_path: path_to_string(&conv.source_path),
14667 created_at: msg.created_at.or(conv.started_at),
14668 message_id,
14669 }
14670 }
14671}
14672
14673const FTS_ENTRY_BATCH_MAX_DOCS: usize = 512;
14674const FTS_ENTRY_BATCH_MAX_CHARS: usize = 1024 * 1024;
14675
14676const FTS_REBUILD_BATCH_SIZE_DEFAULT: usize = 5_000;
14681
14682fn fts_rebuild_batch_size() -> usize {
14685 dotenvy::var("CASS_FTS_REBUILD_BATCH_SIZE")
14686 .ok()
14687 .and_then(|v| v.parse::<usize>().ok())
14688 .filter(|&n| n > 0)
14689 .unwrap_or(FTS_REBUILD_BATCH_SIZE_DEFAULT)
14690}
14691
14692fn flush_pending_fts_entries(
14693 storage: &FrankenStorage,
14694 tx: &FrankenTransaction<'_>,
14695 entries: &mut Vec<FtsEntry>,
14696 pending_chars: &mut usize,
14697 inserted_total: &mut usize,
14698) -> Result<()> {
14699 if entries.is_empty() {
14700 return Ok(());
14701 }
14702
14703 if storage.fts_messages_present_cached(tx) {
14704 *inserted_total += franken_batch_insert_fts(tx, entries)?;
14705 }
14706 entries.clear();
14707 *pending_chars = 0;
14708 Ok(())
14709}
14710
14711fn path_to_string<P: AsRef<Path>>(p: P) -> String {
14712 p.as_ref().to_string_lossy().into_owned()
14713}
14714
14715fn role_str(role: &MessageRole) -> String {
14716 role_as_str(role).to_owned()
14717}
14718
14719fn role_as_str(role: &MessageRole) -> &str {
14720 match role {
14721 MessageRole::User => "user",
14722 MessageRole::Agent => "agent",
14723 MessageRole::Tool => "tool",
14724 MessageRole::System => "system",
14725 MessageRole::Other(v) => v.as_str(),
14726 }
14727}
14728
14729fn agent_kind_str(kind: AgentKind) -> String {
14730 match kind {
14731 AgentKind::Cli => "cli".into(),
14732 AgentKind::VsCode => "vscode".into(),
14733 AgentKind::Hybrid => "hybrid".into(),
14734 }
14735}
14736
14737#[cfg(test)]
14742mod tests {
14743 use super::*;
14744 use serial_test::serial;
14745 use tempfile::TempDir;
14746
14747 struct EnvGuard {
14748 key: &'static str,
14749 previous: Option<String>,
14750 }
14751
14752 impl Drop for EnvGuard {
14753 fn drop(&mut self) {
14754 if let Some(value) = &self.previous {
14755 unsafe {
14757 std::env::set_var(self.key, value);
14758 }
14759 } else {
14760 unsafe {
14762 std::env::remove_var(self.key);
14763 }
14764 }
14765 }
14766 }
14767
14768 fn set_env_var(key: &'static str, value: impl AsRef<str>) -> EnvGuard {
14769 let previous = dotenvy::var(key).ok();
14770 unsafe {
14772 std::env::set_var(key, value.as_ref());
14773 }
14774 EnvGuard { key, previous }
14775 }
14776
14777 #[test]
14778 fn doctor_mutation_open_guard_only_targets_canonical_archive_db() {
14779 let dir = TempDir::new().unwrap();
14780 let canonical = dir.path().join("agent_search.db");
14781 let scratch = dir.path().join("scratch.db");
14782
14783 assert_eq!(
14784 doctor_mutation_lock_path_for_db_open(&canonical),
14785 Some(dir.path().join("doctor/locks/doctor-repair.lock"))
14786 );
14787 assert_eq!(doctor_mutation_lock_path_for_db_open(&scratch), None);
14788 }
14789
14790 #[test]
14791 fn doctor_lock_metadata_pid_detection_is_exact() {
14792 let current = std::process::id();
14793
14794 assert!(doctor_lock_metadata_pid_is_current_process(&format!(
14795 "schema_version=1\npid={current}\nmode=safe_auto_run\n"
14796 )));
14797 assert!(!doctor_lock_metadata_pid_is_current_process(
14798 "schema_version=1\npid=not-a-pid\n"
14799 ));
14800 assert!(!doctor_lock_metadata_pid_is_current_process(&format!(
14801 "pid={}\n",
14802 current.saturating_add(1)
14803 )));
14804 }
14805
14806 #[test]
14807 fn doctor_storage_open_refuses_active_doctor_mutation_lock_from_other_process() {
14808 use std::io::Write as _;
14809
14810 let dir = TempDir::new().unwrap();
14811 let db_path = dir.path().join("agent_search.db");
14812 {
14813 let storage = FrankenStorage::open(&db_path).unwrap();
14814 storage.close().unwrap();
14815 }
14816
14817 let lock_path = doctor_mutation_lock_path_for_db_open(&db_path).unwrap();
14818 let mut lock_file = fs::OpenOptions::new()
14819 .create(true)
14820 .truncate(false)
14821 .read(true)
14822 .write(true)
14823 .open(&lock_path)
14824 .unwrap();
14825 fs2::FileExt::try_lock_exclusive(&lock_file).unwrap();
14826 lock_file.set_len(0).unwrap();
14827 lock_file.write_all(b"schema_version=1\npid=1\n").unwrap();
14828 lock_file.sync_all().unwrap();
14829
14830 let err =
14831 open_franken_raw_readonly_connection_with_timeout(&db_path, Duration::from_millis(25))
14832 .expect_err("active doctor mutation lock must block canonical DB opens");
14833 let message = err.to_string();
14834 assert!(
14835 message.contains("doctor mutation lock") && message.contains("active"),
14836 "error should identify the active doctor mutation lock: {message}"
14837 );
14838
14839 fs2::FileExt::unlock(&lock_file).unwrap();
14840 }
14841
14842 #[test]
14843 fn doctor_storage_open_allows_current_doctor_process_probe() {
14844 use std::io::Write as _;
14845
14846 let dir = TempDir::new().unwrap();
14847 let db_path = dir.path().join("agent_search.db");
14848 {
14849 let storage = FrankenStorage::open(&db_path).unwrap();
14850 storage.close().unwrap();
14851 }
14852
14853 let lock_path = doctor_mutation_lock_path_for_db_open(&db_path).unwrap();
14854 let mut lock_file = fs::OpenOptions::new()
14855 .create(true)
14856 .truncate(false)
14857 .read(true)
14858 .write(true)
14859 .open(&lock_path)
14860 .unwrap();
14861 fs2::FileExt::try_lock_exclusive(&lock_file).unwrap();
14862 lock_file.set_len(0).unwrap();
14863 write!(lock_file, "schema_version=1\npid={}\n", std::process::id()).unwrap();
14864 lock_file.sync_all().unwrap();
14865
14866 let conn =
14867 open_franken_raw_readonly_connection_with_timeout(&db_path, Duration::from_millis(25))
14868 .expect(
14869 "doctor process must be able to run post-repair read probes under its own lock",
14870 );
14871 drop(conn);
14872
14873 fs2::FileExt::unlock(&lock_file).unwrap();
14874 }
14875
14876 #[test]
14877 fn autocommit_retain_disable_tries_compat_then_canonical_pragma() {
14878 let mut attempts = Vec::new();
14879
14880 let selected = disable_autocommit_retain(|pragma| {
14881 attempts.push(pragma);
14882 if pragma == "PRAGMA fsqlite.autocommit_retain = OFF;" {
14883 Err("compat namespace unavailable")
14884 } else {
14885 Ok(())
14886 }
14887 })
14888 .expect("canonical pragma should disable autocommit retain");
14889
14890 assert_eq!(selected, "PRAGMA autocommit_retain = OFF;");
14891 assert_eq!(attempts, AUTOCOMMIT_RETAIN_OFF_PRAGMAS);
14892 }
14893
14894 #[test]
14895 fn autocommit_retain_disable_fails_closed_when_no_pragma_works() {
14896 let mut attempts = Vec::new();
14897
14898 let err = disable_autocommit_retain(|pragma| {
14899 attempts.push(pragma);
14900 Err("unsupported pragma")
14901 })
14902 .expect_err("unsupported autocommit retain controls should fail closed");
14903
14904 assert_eq!(attempts, AUTOCOMMIT_RETAIN_OFF_PRAGMAS);
14905 let message = err.to_string();
14906 assert!(
14907 message.contains("refusing to keep a long-lived MVCC connection"),
14908 "error should force callers away from unbounded snapshot retention: {message}"
14909 );
14910 assert!(
14911 message.contains("PRAGMA fsqlite.autocommit_retain = OFF;")
14912 && message.contains("PRAGMA autocommit_retain = OFF;"),
14913 "error should preserve attempted PRAGMAs for diagnostics: {message}"
14914 );
14915 }
14916
14917 fn rusqlite_test_fixture_conn(db_path: &Path) -> rusqlite::Connection {
14926 rusqlite::Connection::open(db_path).expect("open rusqlite test fixture connection")
14927 }
14928
14929 fn seed_historical_db_direct(
14930 db_path: &Path,
14931 conversations: &[crate::model::types::Conversation],
14932 ) {
14933 if let Some(parent) = db_path.parent() {
14934 fs::create_dir_all(parent).unwrap();
14935 }
14936
14937 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
14938 conn.execute_batch(HISTORICAL_RECOVERY_CORE_SCHEMA).unwrap();
14939 conn.execute_compat(
14940 "INSERT INTO agents(id, slug, name, version, kind, created_at, updated_at)
14941 VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7)",
14942 fparams![1_i64, "codex", "Codex", "0.2.3", "cli", 0_i64, 0_i64],
14943 )
14944 .unwrap();
14945
14946 let mut next_message_id = 1_i64;
14947 for (conv_index, conv) in conversations.iter().enumerate() {
14948 let conversation_id = i64::try_from(conv_index + 1).unwrap();
14949 let workspace_id = conv.workspace.as_ref().map(|workspace| {
14950 let workspace_id = conversation_id;
14951 let workspace_path = workspace.to_string_lossy().into_owned();
14952 conn.execute_compat(
14953 "INSERT INTO workspaces(id, path, display_name) VALUES(?1, ?2, ?3)",
14954 fparams![
14955 workspace_id,
14956 workspace_path.as_str(),
14957 workspace_path.as_str()
14958 ],
14959 )
14960 .unwrap();
14961 workspace_id
14962 });
14963 let source_path = conv.source_path.to_string_lossy().into_owned();
14964 let metadata_json = conv.metadata_json.to_string();
14965 conn.execute_compat(
14966 "INSERT INTO conversations (
14967 id, agent_id, workspace_id, source_id, external_id, title, source_path,
14968 started_at, ended_at, approx_tokens, metadata_json, origin_host
14969 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
14970 fparams![
14971 conversation_id,
14972 1_i64,
14973 workspace_id,
14974 conv.source_id.as_str(),
14975 conv.external_id.as_deref(),
14976 conv.title.as_deref(),
14977 source_path.as_str(),
14978 conv.started_at,
14979 conv.ended_at,
14980 conv.approx_tokens,
14981 metadata_json.as_str(),
14982 conv.origin_host.as_deref()
14983 ],
14984 )
14985 .unwrap();
14986
14987 for msg in &conv.messages {
14988 let extra_json = msg.extra_json.to_string();
14989 let role = role_str(&msg.role);
14990 conn.execute_compat(
14991 "INSERT INTO messages(
14992 id, conversation_id, idx, role, author, created_at, content, extra_json
14993 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
14994 fparams![
14995 next_message_id,
14996 conversation_id,
14997 msg.idx,
14998 role.as_str(),
14999 msg.author.as_deref(),
15000 msg.created_at,
15001 msg.content.as_str(),
15002 extra_json.as_str()
15003 ],
15004 )
15005 .unwrap();
15006 next_message_id += 1;
15007 }
15008 }
15009 }
15010
15011 #[test]
15016 fn is_user_data_file_detects_bookmarks() {
15017 assert!(is_user_data_file(Path::new("/data/bookmarks.db")));
15018 assert!(is_user_data_file(Path::new("bookmarks.db")));
15019 }
15020
15021 #[test]
15022 fn is_user_data_file_detects_tui_state() {
15023 assert!(is_user_data_file(Path::new("/data/tui_state.json")));
15024 }
15025
15026 #[test]
15027 fn is_user_data_file_detects_sources_toml() {
15028 assert!(is_user_data_file(Path::new("/config/sources.toml")));
15029 }
15030
15031 #[test]
15032 fn is_user_data_file_detects_env() {
15033 assert!(is_user_data_file(Path::new(".env")));
15034 }
15035
15036 #[test]
15037 fn is_user_data_file_rejects_other_files() {
15038 assert!(!is_user_data_file(Path::new("index.db")));
15039 assert!(!is_user_data_file(Path::new("conversations.db")));
15040 assert!(!is_user_data_file(Path::new("random.txt")));
15041 }
15042
15043 #[test]
15048 fn create_backup_returns_none_for_nonexistent() {
15049 let dir = TempDir::new().unwrap();
15050 let db_path = dir.path().join("nonexistent.db");
15051 let result = create_backup(&db_path).unwrap();
15052 assert!(result.is_none());
15053 }
15054
15055 #[test]
15056 fn create_backup_creates_named_file() {
15057 let dir = TempDir::new().unwrap();
15058 let db_path = dir.path().join("test.db");
15059 std::fs::write(&db_path, b"test data").unwrap();
15060
15061 let backup_path = create_backup(&db_path).unwrap();
15062 assert!(backup_path.is_some());
15063 let backup = backup_path.unwrap();
15064 assert!(backup.exists());
15065 assert!(
15066 backup
15067 .file_name()
15068 .unwrap()
15069 .to_str()
15070 .unwrap()
15071 .contains("backup")
15072 );
15073 }
15074
15075 #[test]
15076 fn create_backup_paths_are_unique() {
15077 let dir = TempDir::new().unwrap();
15078 let db_path = dir.path().join("test.db");
15079 std::fs::write(&db_path, b"test data").unwrap();
15080
15081 let first = create_backup(&db_path).unwrap().unwrap();
15082 let second = create_backup(&db_path).unwrap().unwrap();
15083
15084 assert_ne!(first, second);
15085 assert!(first.exists());
15086 assert!(second.exists());
15087 }
15088
15089 #[test]
15090 fn lexical_rebuild_messages_query_uses_conversation_idx_access_path() {
15091 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
15092 use std::path::PathBuf;
15093
15094 let dir = TempDir::new().unwrap();
15095 let db_path = dir.path().join("agent_search.db");
15096 let storage = SqliteStorage::open(&db_path).unwrap();
15097
15098 let agent = Agent {
15099 id: None,
15100 slug: "claude_code".into(),
15101 name: "Claude Code".into(),
15102 version: None,
15103 kind: AgentKind::Cli,
15104 };
15105 let agent_id = storage.ensure_agent(&agent).unwrap();
15106 let conversation = Conversation {
15107 id: None,
15108 agent_slug: "claude_code".into(),
15109 workspace: Some(PathBuf::from("/tmp/workspace")),
15110 external_id: Some("conv-1".into()),
15111 title: Some("Lexical rebuild".into()),
15112 source_path: PathBuf::from("/tmp/conv-1.jsonl"),
15113 started_at: Some(1_700_000_000_000),
15114 ended_at: Some(1_700_000_000_100),
15115 approx_tokens: None,
15116 metadata_json: serde_json::Value::Null,
15117 messages: vec![
15118 Message {
15119 id: None,
15120 idx: 0,
15121 role: MessageRole::User,
15122 author: Some("user".into()),
15123 created_at: Some(1_700_000_000_010),
15124 content: "first".into(),
15125 extra_json: serde_json::Value::Null,
15126 snippets: Vec::new(),
15127 },
15128 Message {
15129 id: None,
15130 idx: 1,
15131 role: MessageRole::Agent,
15132 author: Some("assistant".into()),
15133 created_at: Some(1_700_000_000_020),
15134 content: "second".into(),
15135 extra_json: serde_json::Value::Null,
15136 snippets: Vec::new(),
15137 },
15138 ],
15139 source_id: LOCAL_SOURCE_ID.into(),
15140 origin_host: None,
15141 };
15142 storage
15143 .insert_conversation_tree(agent_id, None, &conversation)
15144 .unwrap();
15145 let conversation_id = storage
15146 .conn
15147 .query_row_map(
15148 "SELECT id FROM conversations WHERE external_id = ?1",
15149 fparams!["conv-1"],
15150 |row| row.get_typed::<i64>(0),
15151 )
15152 .unwrap();
15153
15154 let opcodes: Vec<String> = storage
15155 .conn
15156 .query_map_collect(
15157 "EXPLAIN \
15158 SELECT id, idx, role, author, created_at, content \
15159 FROM messages \
15160 WHERE conversation_id = ?1 ORDER BY idx",
15161 fparams![conversation_id],
15162 |row| row.get_typed(1),
15163 )
15164 .unwrap();
15165
15166 assert!(
15167 opcodes.iter().any(|opcode| opcode == "SeekGE"),
15168 "expected lexical rebuild message fetch to seek into the conversation_id/idx access path, got {opcodes:?}"
15169 );
15170 assert!(
15171 !opcodes.iter().any(|opcode| opcode == "SorterOpen"),
15172 "expected lexical rebuild message fetch to avoid sorter temp b-trees, got {opcodes:?}"
15173 );
15174 }
15175
15176 #[test]
15177 fn schema_check_rebuild_classification_ignores_transient_errors() {
15178 assert!(!schema_check_error_requires_rebuild(
15179 &frankensqlite::FrankenError::Busy
15180 ));
15181 assert!(!schema_check_error_requires_rebuild(
15182 &frankensqlite::FrankenError::DatabaseLocked {
15183 path: PathBuf::from("/tmp/test.db"),
15184 }
15185 ));
15186 assert!(!schema_check_error_requires_rebuild(
15187 &frankensqlite::FrankenError::CannotOpen {
15188 path: PathBuf::from("/tmp/test.db"),
15189 }
15190 ));
15191 assert!(!schema_check_error_requires_rebuild(
15192 &frankensqlite::FrankenError::Io(std::io::Error::other("disk hiccup"))
15193 ));
15194 }
15195
15196 #[test]
15197 fn schema_check_rebuild_classification_keeps_corruption_errors() {
15198 assert!(schema_check_error_requires_rebuild(
15199 &frankensqlite::FrankenError::DatabaseCorrupt {
15200 detail: "bad header".to_string(),
15201 }
15202 ));
15203 assert!(schema_check_error_requires_rebuild(
15204 &frankensqlite::FrankenError::WalCorrupt {
15205 detail: "bad wal".to_string(),
15206 }
15207 ));
15208 assert!(schema_check_error_requires_rebuild(
15209 &frankensqlite::FrankenError::NotADatabase {
15210 path: PathBuf::from("/tmp/test.db"),
15211 }
15212 ));
15213 assert!(schema_check_error_requires_rebuild(
15214 &frankensqlite::FrankenError::ShortRead {
15215 expected: 4096,
15216 actual: 64,
15217 }
15218 ));
15219 }
15220
15221 #[test]
15222 fn create_backup_refuses_raw_copy_after_retryable_vacuum_errors() {
15223 let retryable_errors = [
15224 frankensqlite::FrankenError::Busy,
15225 frankensqlite::FrankenError::BusyRecovery,
15226 frankensqlite::FrankenError::BusySnapshot {
15227 conflicting_pages: "1,2".to_string(),
15228 },
15229 frankensqlite::FrankenError::DatabaseLocked {
15230 path: PathBuf::from("/tmp/test.db"),
15231 },
15232 frankensqlite::FrankenError::LockFailed {
15233 detail: "fcntl lock still held".to_string(),
15234 },
15235 frankensqlite::FrankenError::WriteConflict { page: 7, holder: 9 },
15236 frankensqlite::FrankenError::SerializationFailure { page: 11 },
15237 frankensqlite::FrankenError::Internal("database is locked".to_string()),
15238 ];
15239
15240 for err in retryable_errors {
15241 assert!(
15242 backup_vacuum_error_requires_consistent_retry(&err),
15243 "retryable VACUUM failure must not fall back to raw bundle copy: {err}"
15244 );
15245 }
15246
15247 assert!(!backup_vacuum_error_requires_consistent_retry(
15248 &frankensqlite::FrankenError::NotADatabase {
15249 path: PathBuf::from("/tmp/test.db")
15250 }
15251 ));
15252 assert!(!backup_vacuum_error_requires_consistent_retry(
15253 &frankensqlite::FrankenError::DatabaseCorrupt {
15254 detail: "bad header".to_string()
15255 }
15256 ));
15257 }
15258
15259 #[test]
15260 fn create_backup_uses_hidden_vacuum_stage_path() {
15261 let backup_path = PathBuf::from("/tmp/test.db.backup.123.456.0");
15262 let stage_path = vacuum_stage_backup_path(&backup_path);
15263 let stage_name = stage_path
15264 .file_name()
15265 .and_then(|name| name.to_str())
15266 .unwrap_or_default();
15267
15268 assert!(stage_name.starts_with('.'));
15269 assert!(stage_name.ends_with(".vacuum-in-progress"));
15270 assert!(
15271 !is_backup_root_name(stage_name, "test.db.backup."),
15272 "incomplete VACUUM output must not be discoverable as a backup root"
15273 );
15274 }
15275
15276 #[test]
15277 fn create_backup_preserves_content() {
15278 let dir = TempDir::new().unwrap();
15279 let db_path = dir.path().join("test.db");
15280 let original_content = b"test database content 12345";
15281 std::fs::write(&db_path, original_content).unwrap();
15282
15283 let backup_path = create_backup(&db_path).unwrap().unwrap();
15284 let backup_content = std::fs::read(&backup_path).unwrap();
15285 assert_eq!(backup_content, original_content);
15286 }
15287
15288 #[test]
15289 fn create_backup_copies_sidecars_when_present() {
15290 let dir = TempDir::new().unwrap();
15291 let db_path = dir.path().join("test.db");
15292 std::fs::write(&db_path, b"db").unwrap();
15293 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
15294 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
15295
15296 let backup_path = create_backup(&db_path).unwrap().unwrap();
15297
15298 assert_eq!(
15299 std::fs::read(database_sidecar_path(&backup_path, "-wal")).unwrap(),
15300 b"wal"
15301 );
15302 assert_eq!(
15303 std::fs::read(database_sidecar_path(&backup_path, "-shm")).unwrap(),
15304 b"shm"
15305 );
15306 }
15307
15308 #[test]
15309 #[cfg(unix)]
15310 fn create_backup_rejects_symlink_root_during_raw_fallback() {
15311 use std::os::unix::fs::symlink;
15312
15313 let dir = TempDir::new().unwrap();
15314 let outside_db = dir.path().join("outside.db");
15315 let db_path = dir.path().join("test.db");
15316 std::fs::write(&outside_db, b"not sqlite").unwrap();
15317 symlink(&outside_db, &db_path).unwrap();
15318
15319 let err = create_backup(&db_path).unwrap_err();
15320
15321 assert!(
15322 err.to_string().contains("bundle symlink"),
15323 "unexpected error: {err:#}"
15324 );
15325 assert_eq!(std::fs::read(&outside_db).unwrap(), b"not sqlite");
15326 let backup_roots: Vec<_> = std::fs::read_dir(dir.path())
15327 .unwrap()
15328 .filter_map(|entry| entry.ok())
15329 .map(|entry| entry.file_name().to_string_lossy().into_owned())
15330 .filter(|name| name.starts_with("test.db.backup."))
15331 .collect();
15332 assert!(
15333 backup_roots.is_empty(),
15334 "symlinked backup source must not publish backup roots: {backup_roots:?}"
15335 );
15336 }
15337
15338 #[test]
15339 #[cfg(unix)]
15340 fn create_backup_rejects_symlink_sidecar_without_partial_backup() {
15341 use std::os::unix::fs::symlink;
15342
15343 let dir = TempDir::new().unwrap();
15344 let db_path = dir.path().join("test.db");
15345 let outside_wal = dir.path().join("outside.wal");
15346 let wal_path = database_sidecar_path(&db_path, "-wal");
15347 std::fs::write(&db_path, b"not sqlite").unwrap();
15348 std::fs::write(&outside_wal, b"outside wal").unwrap();
15349 symlink(&outside_wal, &wal_path).unwrap();
15350
15351 let err = create_backup(&db_path).unwrap_err();
15352
15353 assert!(
15354 err.to_string().contains("bundle symlink"),
15355 "unexpected error: {err:#}"
15356 );
15357 assert_eq!(std::fs::read(&outside_wal).unwrap(), b"outside wal");
15358 let backup_roots: Vec<_> = std::fs::read_dir(dir.path())
15359 .unwrap()
15360 .filter_map(|entry| entry.ok())
15361 .map(|entry| entry.file_name().to_string_lossy().into_owned())
15362 .filter(|name| name.starts_with("test.db.backup."))
15363 .collect();
15364 assert!(
15365 backup_roots.is_empty(),
15366 "sidecar preflight failure must not leave a partial backup root: {backup_roots:?}"
15367 );
15368 }
15369
15370 #[test]
15375 fn cleanup_old_backups_keeps_recent() {
15376 let dir = TempDir::new().unwrap();
15377 let db_path = dir.path().join("test.db");
15378
15379 for i in 0..5 {
15381 let backup_name = format!("test.db.backup.{}", 1000 + i);
15382 std::fs::write(dir.path().join(&backup_name), format!("backup {i}")).unwrap();
15383 }
15384
15385 cleanup_old_backups(&db_path, 3).unwrap();
15386
15387 let backups: Vec<_> = std::fs::read_dir(dir.path())
15389 .unwrap()
15390 .filter_map(|e| e.ok())
15391 .filter(|e| e.file_name().to_str().unwrap_or("").contains("backup"))
15392 .collect();
15393
15394 assert_eq!(backups.len(), 3);
15395 }
15396
15397 #[test]
15398 fn cleanup_old_backups_ignores_wal_and_shm_sidecars() {
15399 let dir = TempDir::new().unwrap();
15400 let db_path = dir.path().join("test.db");
15401
15402 for i in 0..3 {
15403 let backup_name = format!("test.db.backup.{}", 1000 + i);
15404 let backup_path = dir.path().join(&backup_name);
15405 std::fs::write(&backup_path, format!("backup {i}")).unwrap();
15406 std::fs::write(format!("{}-wal", backup_path.display()), b"wal").unwrap();
15407 std::fs::write(format!("{}-shm", backup_path.display()), b"shm").unwrap();
15408 std::thread::sleep(std::time::Duration::from_millis(20));
15409 }
15410
15411 cleanup_old_backups(&db_path, 2).unwrap();
15412
15413 let mut roots = Vec::new();
15414 let mut wals = Vec::new();
15415 let mut shms = Vec::new();
15416 for entry in std::fs::read_dir(dir.path())
15417 .unwrap()
15418 .filter_map(|e| e.ok())
15419 {
15420 let name = entry.file_name().to_string_lossy().into_owned();
15421 if name.ends_with("-wal") {
15422 wals.push(name);
15423 } else if name.ends_with("-shm") {
15424 shms.push(name);
15425 } else if name.contains("backup") {
15426 roots.push(name);
15427 }
15428 }
15429
15430 assert_eq!(roots.len(), 2, "should keep two backup roots");
15431 assert_eq!(
15432 wals.len(),
15433 2,
15434 "should keep WAL sidecars only for retained backups"
15435 );
15436 assert_eq!(
15437 shms.len(),
15438 2,
15439 "should keep SHM sidecars only for retained backups"
15440 );
15441 }
15442
15443 #[test]
15444 fn move_database_bundle_moves_database_and_sidecars() {
15445 let dir = TempDir::new().unwrap();
15446 let db_path = dir.path().join("test.db");
15447 let backup_path = dir.path().join("test.db.corrupt");
15448
15449 std::fs::write(&db_path, b"db").unwrap();
15450 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
15451 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
15452
15453 let moved = move_database_bundle(&db_path, &backup_path).unwrap();
15454 assert_eq!(
15455 moved,
15456 DatabaseBundleMoveResult {
15457 database: true,
15458 wal: true,
15459 shm: true
15460 }
15461 );
15462 assert!(moved.moved_any());
15463
15464 assert!(!db_path.exists());
15465 assert!(!database_sidecar_path(&db_path, "-wal").exists());
15466 assert!(!database_sidecar_path(&db_path, "-shm").exists());
15467
15468 assert_eq!(std::fs::read(&backup_path).unwrap(), b"db");
15469 assert_eq!(
15470 std::fs::read(database_sidecar_path(&backup_path, "-wal")).unwrap(),
15471 b"wal"
15472 );
15473 assert_eq!(
15474 std::fs::read(database_sidecar_path(&backup_path, "-shm")).unwrap(),
15475 b"shm"
15476 );
15477 }
15478
15479 #[test]
15480 fn move_database_bundle_preserves_orphan_sidecars_without_main_db() {
15481 let dir = TempDir::new().unwrap();
15482 let db_path = dir.path().join("test.db");
15483 let backup_path = dir.path().join("test.db.corrupt");
15484
15485 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
15486 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
15487
15488 let moved = move_database_bundle(&db_path, &backup_path).unwrap();
15489 assert_eq!(
15490 moved,
15491 DatabaseBundleMoveResult {
15492 database: false,
15493 wal: true,
15494 shm: true
15495 }
15496 );
15497 assert!(moved.moved_any());
15498 assert!(!db_path.exists());
15499 assert!(!database_sidecar_path(&db_path, "-wal").exists());
15500 assert!(!database_sidecar_path(&db_path, "-shm").exists());
15501 assert_eq!(
15502 std::fs::read(database_sidecar_path(&backup_path, "-wal")).unwrap(),
15503 b"wal"
15504 );
15505 assert_eq!(
15506 std::fs::read(database_sidecar_path(&backup_path, "-shm")).unwrap(),
15507 b"shm"
15508 );
15509 }
15510
15511 #[test]
15512 #[cfg(unix)]
15513 fn move_database_bundle_moves_dangling_symlink_database_root() {
15514 use std::os::unix::fs::symlink;
15515
15516 let dir = TempDir::new().unwrap();
15517 let db_path = dir.path().join("test.db");
15518 let backup_path = dir.path().join("test.db.corrupt");
15519 let missing_target = dir.path().join("missing-target.db");
15520
15521 symlink(&missing_target, &db_path).unwrap();
15522
15523 let moved = move_database_bundle(&db_path, &backup_path).unwrap();
15524
15525 assert_eq!(
15526 moved,
15527 DatabaseBundleMoveResult {
15528 database: true,
15529 wal: false,
15530 shm: false
15531 }
15532 );
15533 assert!(std::fs::symlink_metadata(&db_path).is_err());
15534 assert!(
15535 std::fs::symlink_metadata(&backup_path)
15536 .unwrap()
15537 .file_type()
15538 .is_symlink()
15539 );
15540 assert!(!missing_target.exists());
15541 }
15542
15543 #[test]
15544 #[cfg(unix)]
15545 fn move_database_bundle_moves_dangling_symlink_sidecars_without_main_db() {
15546 use std::os::unix::fs::symlink;
15547
15548 let dir = TempDir::new().unwrap();
15549 let db_path = dir.path().join("test.db");
15550 let backup_path = dir.path().join("test.db.corrupt");
15551 let missing_wal_target = dir.path().join("missing-wal");
15552 let missing_shm_target = dir.path().join("missing-shm");
15553 let wal_path = database_sidecar_path(&db_path, "-wal");
15554 let shm_path = database_sidecar_path(&db_path, "-shm");
15555
15556 symlink(&missing_wal_target, &wal_path).unwrap();
15557 symlink(&missing_shm_target, &shm_path).unwrap();
15558
15559 let moved = move_database_bundle(&db_path, &backup_path).unwrap();
15560
15561 assert_eq!(
15562 moved,
15563 DatabaseBundleMoveResult {
15564 database: false,
15565 wal: true,
15566 shm: true
15567 }
15568 );
15569 assert!(std::fs::symlink_metadata(&wal_path).is_err());
15570 assert!(std::fs::symlink_metadata(&shm_path).is_err());
15571 assert!(
15572 std::fs::symlink_metadata(database_sidecar_path(&backup_path, "-wal"))
15573 .unwrap()
15574 .file_type()
15575 .is_symlink()
15576 );
15577 assert!(
15578 std::fs::symlink_metadata(database_sidecar_path(&backup_path, "-shm"))
15579 .unwrap()
15580 .file_type()
15581 .is_symlink()
15582 );
15583 assert!(!missing_wal_target.exists());
15584 assert!(!missing_shm_target.exists());
15585 }
15586
15587 #[test]
15588 fn copy_database_bundle_copies_database_and_sidecars() {
15589 let dir = TempDir::new().unwrap();
15590 let db_path = dir.path().join("test.db");
15591 let copied_path = dir.path().join("copy.db");
15592
15593 std::fs::write(&db_path, b"db").unwrap();
15594 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
15595 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
15596
15597 copy_database_bundle(&db_path, &copied_path).unwrap();
15598
15599 assert_eq!(std::fs::read(&copied_path).unwrap(), b"db");
15600 assert_eq!(
15601 std::fs::read(database_sidecar_path(&copied_path, "-wal")).unwrap(),
15602 b"wal"
15603 );
15604 assert_eq!(
15605 std::fs::read(database_sidecar_path(&copied_path, "-shm")).unwrap(),
15606 b"shm"
15607 );
15608 assert_eq!(std::fs::read(&db_path).unwrap(), b"db");
15609 }
15610
15611 #[test]
15612 fn copy_database_bundle_creates_destination_parent() {
15613 let dir = TempDir::new().unwrap();
15614 let db_path = dir.path().join("test.db");
15615 let copied_path = dir.path().join("nested/copies/copy.db");
15616
15617 std::fs::write(&db_path, b"db").unwrap();
15618 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
15619
15620 copy_database_bundle(&db_path, &copied_path).unwrap();
15621
15622 assert!(copied_path.parent().unwrap().is_dir());
15623 assert_eq!(std::fs::read(&copied_path).unwrap(), b"db");
15624 assert_eq!(
15625 std::fs::read(database_sidecar_path(&copied_path, "-wal")).unwrap(),
15626 b"wal"
15627 );
15628 }
15629
15630 #[test]
15631 #[cfg(unix)]
15632 fn copy_database_bundle_rejects_symlink_source_root() {
15633 use std::os::unix::fs::symlink;
15634
15635 let dir = TempDir::new().unwrap();
15636 let outside_db = dir.path().join("outside.db");
15637 let db_path = dir.path().join("test.db");
15638 let copied_path = dir.path().join("copy.db");
15639
15640 std::fs::write(&outside_db, b"outside").unwrap();
15641 symlink(&outside_db, &db_path).unwrap();
15642
15643 let err = copy_database_bundle(&db_path, &copied_path).unwrap_err();
15644
15645 assert!(
15646 err.to_string().contains("bundle symlink"),
15647 "unexpected error: {err:#}"
15648 );
15649 assert!(!copied_path.exists());
15650 assert_eq!(std::fs::read(&outside_db).unwrap(), b"outside");
15651 }
15652
15653 #[test]
15654 #[cfg(unix)]
15655 fn copy_database_bundle_rejects_symlink_sidecar() {
15656 use std::os::unix::fs::symlink;
15657
15658 let dir = TempDir::new().unwrap();
15659 let db_path = dir.path().join("test.db");
15660 let copied_path = dir.path().join("copy.db");
15661 let outside_wal = dir.path().join("outside.wal");
15662 let wal_path = database_sidecar_path(&db_path, "-wal");
15663
15664 std::fs::write(&db_path, b"db").unwrap();
15665 std::fs::write(&outside_wal, b"outside wal").unwrap();
15666 symlink(&outside_wal, &wal_path).unwrap();
15667
15668 let err = copy_database_bundle(&db_path, &copied_path).unwrap_err();
15669
15670 assert!(
15671 err.to_string().contains("bundle symlink"),
15672 "unexpected error: {err:#}"
15673 );
15674 assert_eq!(std::fs::read(&outside_wal).unwrap(), b"outside wal");
15675 assert!(!copied_path.exists());
15676 assert!(!database_sidecar_path(&copied_path, "-wal").exists());
15677 }
15678
15679 #[test]
15680 fn move_database_bundle_creates_destination_parent_and_moves_sidecars() {
15681 let dir = TempDir::new().unwrap();
15682 let db_path = dir.path().join("test.db");
15683 let backup_path = dir.path().join("nested/backups/test.db.corrupt");
15684
15685 std::fs::write(&db_path, b"db").unwrap();
15686 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
15687 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
15688
15689 let moved = move_database_bundle(&db_path, &backup_path).unwrap();
15690 assert_eq!(
15691 moved,
15692 DatabaseBundleMoveResult {
15693 database: true,
15694 wal: true,
15695 shm: true
15696 }
15697 );
15698 assert!(backup_path.parent().unwrap().is_dir());
15699 assert_eq!(std::fs::read(&backup_path).unwrap(), b"db");
15700 assert_eq!(
15701 std::fs::read(database_sidecar_path(&backup_path, "-wal")).unwrap(),
15702 b"wal"
15703 );
15704 assert_eq!(
15705 std::fs::read(database_sidecar_path(&backup_path, "-shm")).unwrap(),
15706 b"shm"
15707 );
15708 }
15709
15710 #[test]
15711 fn remove_database_files_removes_orphan_sidecars_without_main_db() {
15712 let dir = TempDir::new().unwrap();
15713 let db_path = dir.path().join("test.db");
15714
15715 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
15716 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
15717
15718 remove_database_files(&db_path).unwrap();
15719
15720 assert!(!db_path.exists());
15721 assert!(!database_sidecar_path(&db_path, "-wal").exists());
15722 assert!(!database_sidecar_path(&db_path, "-shm").exists());
15723 }
15724
15725 #[test]
15726 fn cleanup_old_backups_ignores_backup_named_directories() {
15727 let dir = TempDir::new().unwrap();
15728 let db_path = dir.path().join("test.db");
15729
15730 for i in 0..3 {
15731 let backup_name = format!("test.db.backup.{}", 1000 + i);
15732 std::fs::write(dir.path().join(&backup_name), format!("backup {i}")).unwrap();
15733 }
15734 std::fs::create_dir(dir.path().join("test.db.backup.directory")).unwrap();
15735
15736 cleanup_old_backups(&db_path, 2).unwrap();
15737
15738 let mut backup_files = Vec::new();
15739 let mut backup_dirs = Vec::new();
15740 for entry in std::fs::read_dir(dir.path())
15741 .unwrap()
15742 .filter_map(|e| e.ok())
15743 {
15744 let name = entry.file_name().to_string_lossy().into_owned();
15745 if !name.starts_with("test.db.backup.") {
15746 continue;
15747 }
15748 if entry.path().is_dir() {
15749 backup_dirs.push(name);
15750 } else {
15751 backup_files.push(name);
15752 }
15753 }
15754
15755 assert_eq!(
15756 backup_files.len(),
15757 2,
15758 "only real backup files count toward retention"
15759 );
15760 assert_eq!(
15761 backup_dirs.len(),
15762 1,
15763 "backup-named directories should be ignored"
15764 );
15765 }
15766
15767 #[test]
15772 fn open_creates_new_database() {
15773 let dir = TempDir::new().unwrap();
15774 let db_path = dir.path().join("new.db");
15775 assert!(!db_path.exists());
15776
15777 let storage = SqliteStorage::open(&db_path).unwrap();
15778 assert!(db_path.exists());
15779 storage.close().unwrap();
15780 }
15781
15782 #[test]
15783 fn open_readonly_fails_for_nonexistent() {
15784 let dir = TempDir::new().unwrap();
15785 let db_path = dir.path().join("nonexistent.db");
15786 let result = SqliteStorage::open_readonly(&db_path);
15787 assert!(result.is_err());
15788 }
15789
15790 #[test]
15791 fn open_readonly_succeeds_for_existing() {
15792 let dir = TempDir::new().unwrap();
15793 let db_path = dir.path().join("existing.db");
15794
15795 let _storage = SqliteStorage::open(&db_path).unwrap();
15797 drop(_storage);
15798
15799 let storage = SqliteStorage::open_readonly(&db_path).unwrap();
15801 assert!(storage.schema_version().is_ok());
15802 }
15803
15804 #[test]
15805 fn reopen_existing_current_schema_is_idempotent() {
15806 let dir = TempDir::new().unwrap();
15807 let db_path = dir.path().join("existing.db");
15808
15809 {
15811 let storage = SqliteStorage::open(&db_path).unwrap();
15812 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
15813 }
15814
15815 let reopened = SqliteStorage::open(&db_path).unwrap();
15817 assert_eq!(
15818 reopened.schema_version().unwrap(),
15819 CURRENT_SCHEMA_VERSION,
15820 "reopening current schema DB should be idempotent"
15821 );
15822 }
15823
15824 #[test]
15825 fn open_or_rebuild_current_schema_does_not_trigger_rebuild() {
15826 let dir = TempDir::new().unwrap();
15827 let db_path = dir.path().join("existing.db");
15828
15829 {
15831 let storage = SqliteStorage::open(&db_path).unwrap();
15832 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
15833 }
15834
15835 let reopened = SqliteStorage::open_or_rebuild(&db_path)
15837 .expect("current schema DB should open without rebuild");
15838 assert_eq!(reopened.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
15839 }
15840
15841 #[test]
15842 fn open_or_rebuild_does_not_treat_non_database_paths_as_corruption() {
15843 let dir = TempDir::new().unwrap();
15844 let db_path = dir.path().join("db_dir");
15845 std::fs::create_dir(&db_path).unwrap();
15846
15847 let result = SqliteStorage::open_or_rebuild(&db_path);
15848
15849 match result {
15850 Err(MigrationError::Database(_)) | Err(MigrationError::Io(_)) => {}
15851 Err(MigrationError::RebuildRequired { reason, .. }) => {
15852 panic!("should not rebuild non-database path: {reason}")
15853 }
15854 Err(MigrationError::Other(msg)) => {
15855 panic!("should preserve underlying open error, got Other: {msg}")
15856 }
15857 Ok(_) => panic!("directory path must not open as a database"),
15858 }
15859
15860 assert!(
15861 db_path.is_dir(),
15862 "non-database directory must be left in place"
15863 );
15864 }
15865
15866 #[test]
15871 fn schema_version_returns_current() {
15872 let dir = TempDir::new().unwrap();
15873 let db_path = dir.path().join("test.db");
15874 let storage = SqliteStorage::open(&db_path).unwrap();
15875 let version = storage.schema_version().unwrap();
15876 assert!(version >= 5, "Schema version should be at least 5");
15877 }
15878
15879 #[test]
15884 fn migration_v13_creates_analytics_tables() {
15885 let dir = TempDir::new().unwrap();
15886 let db_path = dir.path().join("test.db");
15887 let storage = SqliteStorage::open(&db_path).unwrap();
15888
15889 let version = storage.schema_version().unwrap();
15891 assert_eq!(
15892 version, CURRENT_SCHEMA_VERSION,
15893 "Schema version must match CURRENT_SCHEMA_VERSION after migration"
15894 );
15895
15896 let conn = storage.raw();
15897
15898 fn col_names(conn: &FrankenConnection, table: &str) -> Vec<String> {
15900 conn.query_map_collect(
15901 &format!("PRAGMA table_info({})", table),
15902 fparams![],
15903 |row: &FrankenRow| row.get_typed(1),
15904 )
15905 .unwrap()
15906 }
15907
15908 fn idx_names(conn: &FrankenConnection, table: &str) -> Vec<String> {
15910 conn.query_map_collect(
15911 &format!("PRAGMA index_list({})", table),
15912 fparams![],
15913 |row: &FrankenRow| row.get_typed(1),
15914 )
15915 .unwrap()
15916 }
15917
15918 let mm_cols = col_names(conn, "message_metrics");
15920 for expected in &[
15921 "message_id",
15922 "hour_id",
15923 "day_id",
15924 "content_tokens_est",
15925 "model_name",
15926 "model_family",
15927 "model_tier",
15928 "provider",
15929 "api_input_tokens",
15930 "has_plan",
15931 "agent_slug",
15932 "role",
15933 "api_data_source",
15934 ] {
15935 assert!(
15936 mm_cols.contains(&expected.to_string()),
15937 "message_metrics missing column: {expected}"
15938 );
15939 }
15940
15941 let uh_cols = col_names(conn, "usage_hourly");
15943 for expected in &[
15944 "hour_id",
15945 "plan_message_count",
15946 "plan_content_tokens_est_total",
15947 "plan_api_tokens_total",
15948 "api_coverage_message_count",
15949 "content_tokens_est_user",
15950 "api_thinking_tokens_total",
15951 ] {
15952 assert!(
15953 uh_cols.contains(&expected.to_string()),
15954 "usage_hourly missing column: {expected}"
15955 );
15956 }
15957
15958 let ud_cols = col_names(conn, "usage_daily");
15960 for expected in &[
15961 "day_id",
15962 "plan_content_tokens_est_total",
15963 "plan_api_tokens_total",
15964 "api_thinking_tokens_total",
15965 "content_tokens_est_assistant",
15966 "message_count",
15967 ] {
15968 assert!(
15969 ud_cols.contains(&expected.to_string()),
15970 "usage_daily missing column: {expected}"
15971 );
15972 }
15973
15974 let umd_cols = col_names(conn, "usage_models_daily");
15976 for expected in &[
15977 "day_id",
15978 "model_family",
15979 "model_tier",
15980 "message_count",
15981 "api_tokens_total",
15982 "api_coverage_message_count",
15983 ] {
15984 assert!(
15985 umd_cols.contains(&expected.to_string()),
15986 "usage_models_daily missing column: {expected}"
15987 );
15988 }
15989
15990 let mm_idxs = idx_names(conn, "message_metrics");
15992 assert!(
15993 mm_idxs.iter().any(|n| n.contains("idx_mm_hour")),
15994 "message_metrics must have hour index"
15995 );
15996 assert!(
15997 mm_idxs.iter().any(|n| n.contains("idx_mm_agent_day")),
15998 "message_metrics must have agent+day index"
15999 );
16000 assert!(
16001 mm_idxs
16002 .iter()
16003 .any(|n| n.contains("idx_mm_model_family_day")),
16004 "message_metrics must have model_family+day index"
16005 );
16006
16007 let uh_idxs = idx_names(conn, "usage_hourly");
16009 assert!(
16010 uh_idxs.iter().any(|n| n.contains("idx_uh_agent")),
16011 "usage_hourly must have agent index"
16012 );
16013
16014 let ud_idxs = idx_names(conn, "usage_daily");
16016 assert!(
16017 ud_idxs.iter().any(|n| n.contains("idx_ud_agent")),
16018 "usage_daily must have agent index"
16019 );
16020
16021 let umd_idxs = idx_names(conn, "usage_models_daily");
16023 assert!(
16024 umd_idxs.iter().any(|n| n.contains("idx_umd_model_day")),
16025 "usage_models_daily must have model+day index"
16026 );
16027
16028 let conversation_cols = col_names(conn, "conversations");
16029 assert!(
16030 conversation_cols.contains(&"last_message_idx".to_string())
16031 && conversation_cols.contains(&"last_message_created_at".to_string()),
16032 "fresh schema must include V15 tail columns without ALTER TABLE on conversations"
16033 );
16034 let fts_schema_rows: i64 = conn
16035 .query_row_map(
16036 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
16037 fparams![],
16038 |row: &FrankenRow| row.get_typed(0),
16039 )
16040 .unwrap();
16041 assert_eq!(
16042 fts_schema_rows, 0,
16043 "fresh schema should not create and immediately drop derived fts_messages"
16044 );
16045 let integrity: Vec<String> = conn
16046 .query_map_collect("PRAGMA integrity_check;", fparams![], |row: &FrankenRow| {
16047 row.get_typed(0)
16048 })
16049 .unwrap();
16050 assert_eq!(
16051 integrity,
16052 vec!["ok".to_string()],
16053 "fresh schema must pass SQLite integrity_check"
16054 );
16055 }
16056
16057 #[test]
16058 fn hour_id_round_trip() {
16059 let ts_ms = 1_770_508_800_000_i64;
16061 let hour_id = SqliteStorage::hour_id_from_millis(ts_ms);
16062 let day_id = SqliteStorage::day_id_from_millis(ts_ms);
16063
16064 assert_eq!(hour_id / 24, day_id, "hour_id/24 should equal day_id");
16066
16067 let back = SqliteStorage::millis_from_hour_id(hour_id);
16069 assert!(
16070 back <= ts_ms && ts_ms - back < 3_600_000,
16071 "Round-trip should land within the same hour"
16072 );
16073 }
16074
16075 #[test]
16076 fn day_and_hour_ids_floor_negative_millis() {
16077 let ts_ms = -1_i64;
16080 let expected_secs = -1_i64;
16081 let epoch_2020_secs = 1_577_836_800_i64;
16082
16083 assert_eq!(
16084 SqliteStorage::day_id_from_millis(ts_ms),
16085 (expected_secs - epoch_2020_secs).div_euclid(86_400)
16086 );
16087 assert_eq!(
16088 SqliteStorage::hour_id_from_millis(ts_ms),
16089 (expected_secs - epoch_2020_secs).div_euclid(3_600)
16090 );
16091 }
16092
16093 #[test]
16094 fn migration_v13_from_v10() {
16095 let dir = TempDir::new().unwrap();
16096 let db_path = dir.path().join("test.db");
16097
16098 {
16100 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
16101 conn.execute_batch("PRAGMA journal_mode=WAL;").unwrap();
16102 conn.execute_batch(
16103 "CREATE TABLE IF NOT EXISTS meta (key TEXT PRIMARY KEY, value TEXT);",
16104 )
16105 .unwrap();
16106 conn.execute("INSERT OR REPLACE INTO meta(key, value) VALUES('schema_version', '10')")
16107 .unwrap();
16108 let mut tx = conn.transaction().unwrap();
16110 tx.execute_batch(MIGRATION_V1).unwrap();
16111 tx.execute_batch(MIGRATION_V2).unwrap();
16112 tx.execute_batch(MIGRATION_V4).unwrap();
16113 tx.execute_batch(MIGRATION_V5).unwrap();
16114 tx.execute_batch(MIGRATION_V6).unwrap();
16115 tx.execute_batch(MIGRATION_V7).unwrap();
16116 tx.execute_batch(MIGRATION_V8).unwrap();
16117 tx.execute_batch(MIGRATION_V9).unwrap();
16118 tx.execute_batch(MIGRATION_V10).unwrap();
16119 tx.execute("UPDATE meta SET value = '10' WHERE key = 'schema_version'")
16120 .unwrap();
16121 tx.commit().unwrap();
16122 }
16123 materialize_fresh_fts_schema_via_rusqlite(&db_path).unwrap();
16124
16125 let storage = SqliteStorage::open(&db_path).unwrap();
16127 let version = storage.schema_version().unwrap();
16128 assert_eq!(
16129 version, CURRENT_SCHEMA_VERSION,
16130 "Should have migrated from v10 to the current schema"
16131 );
16132
16133 let count: i64 = storage
16135 .raw()
16136 .query_row_map(
16137 "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name IN ('message_metrics', 'usage_hourly', 'usage_daily', 'usage_models_daily')",
16138 &[],
16139 |row: &FrankenRow| row.get_typed::<i64>(0),
16140 )
16141 .unwrap();
16142 assert_eq!(count, 4, "All 4 analytics tables should exist");
16143 }
16144
16145 #[test]
16150 fn analytics_ingest_populates_metrics_and_rollups() {
16151 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
16152 use std::path::PathBuf;
16153
16154 let dir = TempDir::new().unwrap();
16155 let db_path = dir.path().join("test.db");
16156 let storage = SqliteStorage::open(&db_path).unwrap();
16157
16158 let agent = Agent {
16160 id: None,
16161 slug: "claude_code".into(),
16162 name: "Claude Code".into(),
16163 version: Some("1.0".into()),
16164 kind: AgentKind::Cli,
16165 };
16166 let agent_id = storage.ensure_agent(&agent).unwrap();
16167
16168 let ts_ms = 1_770_551_400_000_i64;
16171 let expected_day = SqliteStorage::day_id_from_millis(ts_ms);
16172 let expected_hour = SqliteStorage::hour_id_from_millis(ts_ms);
16173
16174 let usage_json = serde_json::json!({
16176 "message": {
16177 "model": "claude-opus-4-6",
16178 "usage": {
16179 "input_tokens": 100,
16180 "output_tokens": 50,
16181 "cache_read_input_tokens": 200,
16182 "cache_creation_input_tokens": 30,
16183 "service_tier": "standard"
16184 }
16185 }
16186 });
16187
16188 let conv = Conversation {
16189 id: None,
16190 agent_slug: "claude_code".into(),
16191 workspace: None,
16192 external_id: Some("test-conv-1".into()),
16193 title: Some("Test conversation".into()),
16194 source_path: PathBuf::from("/tmp/test.jsonl"),
16195 started_at: Some(ts_ms),
16196 ended_at: Some(ts_ms + 60_000),
16197 approx_tokens: None,
16198 metadata_json: serde_json::Value::Null,
16199 messages: vec![
16200 Message {
16201 id: None,
16202 idx: 0,
16203 role: MessageRole::User,
16204 author: None,
16205 created_at: Some(ts_ms),
16206 content: "Hello, can you help me with a plan?".into(),
16207 extra_json: serde_json::Value::Null,
16208 snippets: vec![],
16209 },
16210 Message {
16211 id: None,
16212 idx: 1,
16213 role: MessageRole::Agent,
16214 author: None,
16215 created_at: Some(ts_ms + 30_000),
16216 content: "## Plan\n\n1. First step\n2. Second step\n3. Third step".into(),
16217 extra_json: usage_json,
16218 snippets: vec![],
16219 },
16220 Message {
16221 id: None,
16222 idx: 2,
16223 role: MessageRole::User,
16224 author: None,
16225 created_at: Some(ts_ms + 60_000),
16226 content: "Great, let's proceed!".into(),
16227 extra_json: serde_json::Value::Null,
16228 snippets: vec![],
16229 },
16230 ],
16231 source_id: "local".into(),
16232 origin_host: None,
16233 };
16234
16235 let outcomes = storage
16236 .insert_conversations_batched(&[(agent_id, None, &conv)])
16237 .unwrap();
16238 assert_eq!(outcomes.len(), 1);
16239 assert_eq!(outcomes[0].inserted_indices.len(), 3);
16240
16241 let conn = storage.raw();
16242
16243 let mm_count: i64 = conn
16245 .query_row_map("SELECT COUNT(*) FROM message_metrics", &[], |row| {
16246 row.get_typed::<i64>(0)
16247 })
16248 .unwrap();
16249 assert_eq!(mm_count, 3, "Should have 3 message_metrics rows");
16250
16251 #[allow(clippy::type_complexity)]
16253 let rows: Vec<(i64, i64, String, i64, i64, String, String, String, String)> = conn
16254 .query_map_collect(
16255 "SELECT hour_id, day_id, role, content_tokens_est, has_plan, api_data_source, model_family, model_tier, provider FROM message_metrics ORDER BY message_id",
16256 fparams![],
16257 |row: &FrankenRow| {
16258 Ok((
16259 row.get_typed(0)?,
16260 row.get_typed(1)?,
16261 row.get_typed(2)?,
16262 row.get_typed(3)?,
16263 row.get_typed(4)?,
16264 row.get_typed(5)?,
16265 row.get_typed(6)?,
16266 row.get_typed(7)?,
16267 row.get_typed(8)?,
16268 ))
16269 },
16270 )
16271 .unwrap();
16272
16273 assert_eq!(rows.len(), 3);
16274 assert_eq!(rows[0].0, expected_hour);
16276 assert_eq!(rows[0].1, expected_day);
16277 assert_eq!(rows[0].2, "user");
16279 assert_eq!(
16281 rows[1].4, 1,
16282 "Assistant message with plan should have has_plan=1"
16283 );
16284 assert_eq!(
16286 rows[1].5, "api",
16287 "Claude Code assistant message should have api data source"
16288 );
16289 assert_eq!(rows[0].5, "estimated");
16291 assert_eq!(rows[2].5, "estimated");
16292 assert_eq!(rows[1].6, "claude");
16293 assert_eq!(rows[1].7, "opus");
16294 assert_eq!(rows[1].8, "anthropic");
16295 assert_eq!(rows[0].6, "unknown");
16296 let user_chars = "Hello, can you help me with a plan?".len() as i64;
16298 assert_eq!(rows[0].3, user_chars / 4);
16299
16300 let (uh_msg, uh_user, uh_asst, uh_plan, uh_plan_content, uh_plan_api, uh_api_cov): (
16302 i64,
16303 i64,
16304 i64,
16305 i64,
16306 i64,
16307 i64,
16308 i64,
16309 ) = conn
16310 .query_row_map(
16311 "SELECT message_count, user_message_count, assistant_message_count, plan_message_count,
16312 plan_content_tokens_est_total, plan_api_tokens_total, api_coverage_message_count
16313 FROM usage_hourly WHERE hour_id = ?",
16314 fparams![expected_hour],
16315 |row: &FrankenRow| {
16316 Ok((
16317 row.get_typed(0)?,
16318 row.get_typed(1)?,
16319 row.get_typed(2)?,
16320 row.get_typed(3)?,
16321 row.get_typed(4)?,
16322 row.get_typed(5)?,
16323 row.get_typed(6)?,
16324 ))
16325 },
16326 )
16327 .unwrap();
16328 assert_eq!(uh_msg, 3, "Hourly rollup should have 3 messages");
16329 assert_eq!(uh_user, 2, "Hourly rollup should have 2 user messages");
16330 assert_eq!(uh_asst, 1, "Hourly rollup should have 1 assistant message");
16331 assert_eq!(uh_plan, 1, "Hourly rollup should have 1 plan message");
16332 assert!(
16333 uh_plan_content > 0,
16334 "Hourly rollup should include plan content tokens"
16335 );
16336 assert!(
16337 uh_plan_api > 0,
16338 "Hourly rollup should include plan API tokens"
16339 );
16340 assert_eq!(
16341 uh_api_cov, 1,
16342 "Hourly rollup should have 1 API-covered message"
16343 );
16344
16345 let (ud_msg, ud_api_cov): (i64, i64) = conn
16347 .query_row_map(
16348 "SELECT message_count, api_coverage_message_count FROM usage_daily WHERE day_id = ?",
16349 fparams![expected_day],
16350 |row: &FrankenRow| Ok((row.get_typed(0)?, row.get_typed(1)?)),
16351 )
16352 .unwrap();
16353 assert_eq!(ud_msg, 3, "Daily rollup should match hourly");
16354 assert_eq!(
16355 ud_api_cov, 1,
16356 "Daily api_coverage should be 1 (only assistant msg has real API data)"
16357 );
16358
16359 let api_only_input: i64 = conn
16361 .query_row_map(
16362 "SELECT COALESCE(SUM(api_input_tokens), 0) FROM message_metrics WHERE day_id = ? AND api_data_source = 'api'",
16363 fparams![expected_day],
16364 |row: &FrankenRow| row.get_typed::<i64>(0),
16365 )
16366 .unwrap();
16367 assert_eq!(
16368 api_only_input, 100,
16369 "Only API-sourced input tokens should be 100"
16370 );
16371
16372 let mm_total_content_est: i64 = conn
16374 .query_row_map(
16375 "SELECT SUM(content_tokens_est) FROM message_metrics WHERE day_id = ?",
16376 fparams![expected_day],
16377 |row| row.get_typed::<i64>(0),
16378 )
16379 .unwrap();
16380 let mm_plan_content_est: i64 = conn
16381 .query_row_map(
16382 "SELECT COALESCE(SUM(content_tokens_est), 0) FROM message_metrics WHERE day_id = ? AND has_plan = 1",
16383 fparams![expected_day],
16384 |row: &FrankenRow| row.get_typed::<i64>(0),
16385 )
16386 .unwrap();
16387 let mm_plan_api_total: i64 = conn
16388 .query_row_map(
16389 "SELECT COALESCE(SUM(COALESCE(api_input_tokens, 0) + COALESCE(api_output_tokens, 0) + COALESCE(api_cache_read_tokens, 0) + COALESCE(api_cache_creation_tokens, 0) + COALESCE(api_thinking_tokens, 0)), 0)
16390 FROM message_metrics WHERE day_id = ? AND has_plan = 1 AND api_data_source = 'api'",
16391 fparams![expected_day],
16392 |row: &FrankenRow| row.get_typed::<i64>(0),
16393 )
16394 .unwrap();
16395 let ud_content_est: i64 = conn
16396 .query_row_map(
16397 "SELECT content_tokens_est_total FROM usage_daily WHERE day_id = ?",
16398 fparams![expected_day],
16399 |row| row.get_typed::<i64>(0),
16400 )
16401 .unwrap();
16402 let (ud_plan_content_est, ud_plan_api_total): (i64, i64) = conn
16403 .query_row_map(
16404 "SELECT plan_content_tokens_est_total, plan_api_tokens_total FROM usage_daily WHERE day_id = ?",
16405 fparams![expected_day],
16406 |row: &FrankenRow| Ok((row.get_typed(0)?, row.get_typed(1)?)),
16407 )
16408 .unwrap();
16409 assert_eq!(
16410 mm_total_content_est, ud_content_est,
16411 "Daily rollup content_tokens_est_total must equal SUM of message_metrics"
16412 );
16413 assert_eq!(
16414 mm_plan_content_est, ud_plan_content_est,
16415 "Daily rollup plan_content_tokens_est_total must equal planned message_metrics content sum"
16416 );
16417 assert_eq!(
16418 mm_plan_api_total, ud_plan_api_total,
16419 "Daily rollup plan_api_tokens_total must equal planned message_metrics API token sum"
16420 );
16421
16422 let (claude_msg, claude_user, claude_asst, claude_api_total, claude_api_cov): (
16424 i64,
16425 i64,
16426 i64,
16427 i64,
16428 i64,
16429 ) = conn
16430 .query_row_map(
16431 "SELECT message_count, user_message_count, assistant_message_count, api_tokens_total, api_coverage_message_count
16432 FROM usage_models_daily
16433 WHERE day_id = ? AND model_family = 'claude' AND model_tier = 'opus'",
16434 fparams![expected_day],
16435 |row: &FrankenRow| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?, row.get_typed(3)?, row.get_typed(4)?)),
16436 )
16437 .unwrap();
16438 assert_eq!(claude_msg, 1);
16439 assert_eq!(claude_user, 0);
16440 assert_eq!(claude_asst, 1);
16441 assert_eq!(claude_api_total, 380);
16442 assert_eq!(claude_api_cov, 1);
16443
16444 let unknown_msg: i64 = conn
16445 .query_row_map(
16446 "SELECT message_count FROM usage_models_daily
16447 WHERE day_id = ? AND model_family = 'unknown' AND model_tier = 'unknown'",
16448 fparams![expected_day],
16449 |row| row.get_typed(0),
16450 )
16451 .unwrap();
16452 assert_eq!(
16453 unknown_msg, 2,
16454 "user messages should land in unknown model bucket"
16455 );
16456 }
16457
16458 #[test]
16459 fn has_plan_heuristic_detects_plans() {
16460 assert!(has_plan_heuristic(
16461 "## Plan\n\n1. First step\n2. Second step"
16462 ));
16463 assert!(has_plan_heuristic(
16464 "# Plan\nHere is what we will do:\n1. Step one\n2. Step two"
16465 ));
16466 assert!(has_plan_heuristic(
16467 "Plan:\n- Gather baseline\n- Implement changes\n- Validate with tests"
16468 ));
16469 assert!(has_plan_heuristic(
16470 "Next steps:\n1. Update schema\n2. Rebuild rollups"
16471 ));
16472 assert!(!has_plan_heuristic("Hello world"));
16473 assert!(!has_plan_heuristic("Short"));
16474 assert!(!has_plan_heuristic(
16475 "This is a regular message without plans"
16476 ));
16477 assert!(!has_plan_heuristic(
16478 "```json\n{\"tool\":\"shell\",\"stdout\":\"1. install\\n2. run\"}\n```"
16479 ));
16480 }
16481
16482 #[test]
16483 fn has_plan_for_role_only_counts_assistant_messages() {
16484 let plan_text = "## Plan\n1. First\n2. Second";
16485 assert!(has_plan_for_role("assistant", plan_text));
16486 assert!(has_plan_for_role("agent", plan_text));
16487 assert!(has_plan_for_role("Assistant", plan_text));
16488 assert!(!has_plan_for_role("user", plan_text));
16489 assert!(!has_plan_for_role("tool", plan_text));
16490 }
16491
16492 #[test]
16493 fn api_rollups_require_api_data_source() {
16494 let mut agg = AnalyticsRollupAggregator::new();
16495
16496 let estimated_plan = MessageMetricsEntry {
16497 message_id: 1,
16498 created_at_ms: 0,
16499 hour_id: 1,
16500 day_id: 1,
16501 agent_slug: "codex".into(),
16502 workspace_id: 0,
16503 source_id: "local".into(),
16504 role: "assistant".into(),
16505 content_chars: 120,
16506 content_tokens_est: 30,
16507 model_name: None,
16508 model_family: "unknown".into(),
16509 model_tier: "unknown".into(),
16510 provider: "unknown".into(),
16511 api_input_tokens: Some(100),
16512 api_output_tokens: Some(50),
16513 api_cache_read_tokens: Some(0),
16514 api_cache_creation_tokens: Some(0),
16515 api_thinking_tokens: Some(0),
16516 api_service_tier: None,
16517 api_data_source: "estimated".into(),
16518 tool_call_count: 0,
16519 has_tool_calls: false,
16520 has_plan: true,
16521 };
16522 agg.record(&estimated_plan);
16523
16524 let api_plan = MessageMetricsEntry {
16525 message_id: 2,
16526 created_at_ms: 0,
16527 hour_id: 1,
16528 day_id: 1,
16529 agent_slug: "codex".into(),
16530 workspace_id: 0,
16531 source_id: "local".into(),
16532 role: "assistant".into(),
16533 content_chars: 80,
16534 content_tokens_est: 20,
16535 model_name: None,
16536 model_family: "unknown".into(),
16537 model_tier: "unknown".into(),
16538 provider: "unknown".into(),
16539 api_input_tokens: Some(40),
16540 api_output_tokens: Some(10),
16541 api_cache_read_tokens: Some(0),
16542 api_cache_creation_tokens: Some(0),
16543 api_thinking_tokens: Some(0),
16544 api_service_tier: None,
16545 api_data_source: "api".into(),
16546 tool_call_count: 0,
16547 has_tool_calls: false,
16548 has_plan: true,
16549 };
16550 agg.record(&api_plan);
16551
16552 let key = (1_i64, "codex".to_string(), 0_i64, "local".to_string());
16553 let hourly = agg.hourly.get(&key).expect("hourly rollup key must exist");
16554 let daily = agg.daily.get(&key).expect("daily rollup key must exist");
16555 let model_key = (
16556 1_i64,
16557 "codex".to_string(),
16558 0_i64,
16559 "local".to_string(),
16560 "unknown".to_string(),
16561 "unknown".to_string(),
16562 );
16563 let models_daily = agg
16564 .models_daily
16565 .get(&model_key)
16566 .expect("model rollup key must exist");
16567
16568 assert_eq!(hourly.plan_message_count, 2);
16570 assert_eq!(hourly.plan_content_tokens_est_total, 50);
16571 assert_eq!(hourly.plan_api_tokens_total, 50);
16573 assert_eq!(daily.plan_api_tokens_total, 50);
16574 assert_eq!(models_daily.plan_api_tokens_total, 50);
16575 assert_eq!(hourly.api_tokens_total, 50);
16577 assert_eq!(hourly.api_input_tokens_total, 40);
16578 assert_eq!(hourly.api_output_tokens_total, 10);
16579 assert_eq!(hourly.api_coverage_message_count, 1);
16580 assert_eq!(daily.api_tokens_total, 50);
16581 assert_eq!(models_daily.api_tokens_total, 50);
16582 }
16583
16584 #[test]
16585 fn has_plan_heuristic_curated_corpus_thresholds() {
16586 let positives = [
16588 "## Plan\n1. Inspect current schema\n2. Add migration\n3. Verify rebuild",
16589 "Plan:\n1) Reproduce\n2) Patch\n3) Add tests",
16590 "Implementation plan:\n- Parse inputs\n- Update rollups\n- Run checks",
16591 "Next steps:\n1. Reserve file\n2. Implement\n3. Report status",
16592 "# Plan\n1. Gather requirements\n2. Ship changes",
16593 "Action plan:\n- Identify root cause\n- Fix it\n- Validate",
16594 ];
16595
16596 let negatives = [
16598 "The plan is to move fast and fix things later.",
16599 "```json\n{\"tool\":\"shell\",\"stdout\":\"1. ls\\n2. cat\"}\n```",
16600 "stdout:\n1. Build started\n2. Build finished\nexit code: 0",
16601 "I can help with that request. Let me know if you want details.",
16602 "Here is a list:\n- apples\n- oranges",
16603 "Status update: completed tasks and blockers below.",
16604 ];
16605
16606 let tp = positives
16607 .iter()
16608 .filter(|msg| has_plan_heuristic(msg))
16609 .count();
16610 let fp = negatives
16611 .iter()
16612 .filter(|msg| has_plan_heuristic(msg))
16613 .count();
16614
16615 let recall = tp as f64 / positives.len() as f64;
16616 let false_positive_rate = fp as f64 / negatives.len() as f64;
16617
16618 assert!(
16619 recall >= 0.80,
16620 "plan heuristic recall too low: got {recall:.2}"
16621 );
16622 assert!(
16623 false_positive_rate <= 0.20,
16624 "plan heuristic false-positive rate too high: got {false_positive_rate:.2}"
16625 );
16626 }
16627
16628 #[test]
16629 fn rebuild_analytics_repopulates_from_messages() {
16630 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
16631 use std::path::PathBuf;
16632
16633 let dir = TempDir::new().unwrap();
16634 let db_path = dir.path().join("test.db");
16635 let storage = SqliteStorage::open(&db_path).unwrap();
16636
16637 let agent = Agent {
16639 id: None,
16640 slug: "claude_code".into(),
16641 name: "Claude Code".into(),
16642 version: Some("1.0".into()),
16643 kind: AgentKind::Cli,
16644 };
16645 let agent_id = storage.ensure_agent(&agent).unwrap();
16646
16647 let ts_ms = 1_770_551_400_000_i64;
16649 let expected_day = SqliteStorage::day_id_from_millis(ts_ms);
16650 let expected_hour = SqliteStorage::hour_id_from_millis(ts_ms);
16651
16652 let usage_json = serde_json::json!({
16653 "message": {
16654 "model": "claude-opus-4-6",
16655 "usage": {
16656 "input_tokens": 100,
16657 "output_tokens": 50,
16658 "cache_read_input_tokens": 200,
16659 "cache_creation_input_tokens": 30,
16660 "service_tier": "standard"
16661 }
16662 }
16663 });
16664
16665 let conv = Conversation {
16666 id: None,
16667 agent_slug: "claude_code".into(),
16668 workspace: None,
16669 external_id: Some("test-rebuild-1".into()),
16670 title: Some("Test conversation".into()),
16671 source_path: PathBuf::from("/tmp/test.jsonl"),
16672 started_at: Some(ts_ms),
16673 ended_at: Some(ts_ms + 60_000),
16674 approx_tokens: None,
16675 metadata_json: serde_json::Value::Null,
16676 messages: vec![
16677 Message {
16678 id: None,
16679 idx: 0,
16680 role: MessageRole::User,
16681 author: None,
16682 created_at: Some(ts_ms),
16683 content: "Hello, can you help me with a plan?".into(),
16684 extra_json: serde_json::Value::Null,
16685 snippets: vec![],
16686 },
16687 Message {
16688 id: None,
16689 idx: 1,
16690 role: MessageRole::Agent,
16691 author: None,
16692 created_at: Some(ts_ms + 30_000),
16693 content: "## Plan\n\n1. First step\n2. Second step\n3. Third step".into(),
16694 extra_json: usage_json,
16695 snippets: vec![],
16696 },
16697 Message {
16698 id: None,
16699 idx: 2,
16700 role: MessageRole::User,
16701 author: None,
16702 created_at: Some(ts_ms + 60_000),
16703 content: "Great, let's proceed!".into(),
16704 extra_json: serde_json::Value::Null,
16705 snippets: vec![],
16706 },
16707 ],
16708 source_id: "local".into(),
16709 origin_host: None,
16710 };
16711
16712 storage
16713 .insert_conversations_batched(&[(agent_id, None, &conv)])
16714 .unwrap();
16715
16716 let conn = storage.raw();
16718 let orig_mm: i64 = conn
16719 .query_row_map("SELECT COUNT(*) FROM message_metrics", &[], |row| {
16720 row.get_typed(0)
16721 })
16722 .unwrap();
16723 let orig_hourly: i64 = conn
16724 .query_row_map("SELECT COUNT(*) FROM usage_hourly", &[], |row| {
16725 row.get_typed(0)
16726 })
16727 .unwrap();
16728 let orig_daily: i64 = conn
16729 .query_row_map("SELECT COUNT(*) FROM usage_daily", &[], |row| {
16730 row.get_typed(0)
16731 })
16732 .unwrap();
16733 let orig_models_daily: i64 = conn
16734 .query_row_map("SELECT COUNT(*) FROM usage_models_daily", &[], |row| {
16735 row.get_typed(0)
16736 })
16737 .unwrap();
16738 let orig_api_input: i64 = conn
16739 .query_row_map(
16740 "SELECT COALESCE(SUM(api_input_tokens), 0) FROM message_metrics WHERE api_data_source = 'api'",
16741 &[],
16742 |row: &FrankenRow| row.get_typed(0),
16743 )
16744 .unwrap();
16745
16746 assert_eq!(orig_mm, 3);
16747 assert!(orig_hourly > 0);
16748 assert!(orig_daily > 0);
16749 assert!(orig_models_daily > 0);
16750
16751 conn.execute("DELETE FROM message_metrics").unwrap();
16753 conn.execute("DELETE FROM usage_hourly").unwrap();
16754 conn.execute("DELETE FROM usage_daily").unwrap();
16755 conn.execute("DELETE FROM usage_models_daily").unwrap();
16756
16757 let zero: i64 = conn
16759 .query_row_map("SELECT COUNT(*) FROM message_metrics", &[], |row| {
16760 row.get_typed(0)
16761 })
16762 .unwrap();
16763 assert_eq!(zero, 0);
16764
16765 let result = storage.rebuild_analytics().unwrap();
16767
16768 assert_eq!(result.message_metrics_rows, 3);
16769 assert!(result.usage_hourly_rows > 0);
16770 assert!(result.usage_daily_rows > 0);
16771 assert!(result.usage_models_daily_rows > 0);
16772 assert!(
16773 result.elapsed_ms < 10_000,
16774 "Rebuild should be fast for 3 msgs"
16775 );
16776
16777 let conn = storage.raw();
16779 let rebuilt_mm: i64 = conn
16780 .query_row_map("SELECT COUNT(*) FROM message_metrics", &[], |row| {
16781 row.get_typed(0)
16782 })
16783 .unwrap();
16784 assert_eq!(
16785 rebuilt_mm, orig_mm,
16786 "Rebuilt message_metrics count should match"
16787 );
16788
16789 let rebuilt_hourly: i64 = conn
16790 .query_row_map("SELECT COUNT(*) FROM usage_hourly", &[], |row| {
16791 row.get_typed(0)
16792 })
16793 .unwrap();
16794 assert_eq!(
16795 rebuilt_hourly, orig_hourly,
16796 "Rebuilt hourly rows should match"
16797 );
16798
16799 let rebuilt_daily: i64 = conn
16800 .query_row_map("SELECT COUNT(*) FROM usage_daily", &[], |row| {
16801 row.get_typed(0)
16802 })
16803 .unwrap();
16804 assert_eq!(rebuilt_daily, orig_daily, "Rebuilt daily rows should match");
16805
16806 let rebuilt_models_daily: i64 = conn
16807 .query_row_map("SELECT COUNT(*) FROM usage_models_daily", &[], |row| {
16808 row.get_typed(0)
16809 })
16810 .unwrap();
16811 assert_eq!(
16812 rebuilt_models_daily, orig_models_daily,
16813 "Rebuilt model rollup rows should match"
16814 );
16815
16816 let rebuilt_api_input: i64 = conn
16818 .query_row_map(
16819 "SELECT COALESCE(SUM(api_input_tokens), 0) FROM message_metrics WHERE api_data_source = 'api'",
16820 &[],
16821 |row: &FrankenRow| row.get_typed(0),
16822 )
16823 .unwrap();
16824 assert_eq!(
16825 rebuilt_api_input, orig_api_input,
16826 "Rebuilt API input tokens should match original"
16827 );
16828
16829 let (uh_msg, uh_user, uh_asst, uh_plan, uh_plan_content, uh_plan_api): (
16831 i64,
16832 i64,
16833 i64,
16834 i64,
16835 i64,
16836 i64,
16837 ) = conn
16838 .query_row_map(
16839 "SELECT message_count, user_message_count, assistant_message_count, plan_message_count,
16840 plan_content_tokens_est_total, plan_api_tokens_total
16841 FROM usage_hourly WHERE hour_id = ?",
16842 fparams![expected_hour],
16843 |row: &FrankenRow| {
16844 Ok((
16845 row.get_typed(0)?,
16846 row.get_typed(1)?,
16847 row.get_typed(2)?,
16848 row.get_typed(3)?,
16849 row.get_typed(4)?,
16850 row.get_typed(5)?,
16851 ))
16852 },
16853 )
16854 .unwrap();
16855 assert_eq!(uh_msg, 3);
16856 assert_eq!(uh_user, 2);
16857 assert_eq!(uh_asst, 1);
16858 assert_eq!(uh_plan, 1);
16859 assert!(uh_plan_content > 0);
16860 assert!(uh_plan_api > 0);
16861
16862 let ud_msg: i64 = conn
16863 .query_row_map(
16864 "SELECT message_count FROM usage_daily WHERE day_id = ?",
16865 fparams![expected_day],
16866 |row| row.get_typed(0),
16867 )
16868 .unwrap();
16869 assert_eq!(ud_msg, 3);
16870 }
16871
16872 #[test]
16873 fn insert_conversations_batched_flushes_large_fts_batches() {
16874 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
16875 use std::path::PathBuf;
16876
16877 let dir = TempDir::new().unwrap();
16878 let db_path = dir.path().join("test.db");
16879 let storage = SqliteStorage::open(&db_path).unwrap();
16880 storage
16885 .ensure_search_fallback_fts_consistency()
16886 .expect("ensure FTS consistency before insert");
16887
16888 let agent = Agent {
16889 id: None,
16890 slug: "codex".into(),
16891 name: "Codex".into(),
16892 version: Some("0.2.3".into()),
16893 kind: AgentKind::Cli,
16894 };
16895 let agent_id = storage.ensure_agent(&agent).unwrap();
16896
16897 let content = "y".repeat((FTS_ENTRY_BATCH_MAX_CHARS / 2) + 1);
16898 let messages: Vec<_> = (0_i64..2)
16899 .map(|i| Message {
16900 id: None,
16901 idx: i,
16902 role: MessageRole::Agent,
16903 author: None,
16904 created_at: Some(1_700_000_000_000 + i),
16905 content: format!("{i}-{content}"),
16906 extra_json: serde_json::Value::Null,
16907 snippets: Vec::new(),
16908 })
16909 .collect();
16910 let conv = Conversation {
16911 id: None,
16912 agent_slug: "codex".into(),
16913 workspace: Some(PathBuf::from("/tmp/workspace")),
16914 external_id: Some("fts-large-batch".into()),
16915 title: Some("FTS Large Batch".into()),
16916 source_path: PathBuf::from("/tmp/rollout.jsonl"),
16917 started_at: Some(1_700_000_000_000),
16918 ended_at: Some(1_700_000_000_999),
16919 approx_tokens: None,
16920 metadata_json: serde_json::Value::Null,
16921 messages,
16922 source_id: "local".into(),
16923 origin_host: None,
16924 };
16925
16926 let outcomes = storage
16927 .insert_conversations_batched(&[(agent_id, None, &conv)])
16928 .unwrap();
16929 assert_eq!(outcomes.len(), 1);
16930 assert_eq!(outcomes[0].inserted_indices.len(), conv.messages.len());
16931
16932 let message_count: i64 = storage
16933 .conn
16934 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
16935 row.get_typed(0)
16936 })
16937 .unwrap();
16938 let fts_count: i64 = storage
16939 .conn
16940 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
16941 row.get_typed(0)
16942 })
16943 .unwrap();
16944
16945 assert_eq!(message_count, conv.messages.len() as i64);
16946 assert_eq!(fts_count, conv.messages.len() as i64);
16947 }
16948
16949 fn make_profiled_storage_remote_conversation(
16950 external_id: i64,
16951 msg_count: usize,
16952 ) -> Conversation {
16953 Conversation {
16954 id: None,
16955 agent_slug: "codex".into(),
16956 workspace: Some(PathBuf::from("/ws/profiled-storage-remote")),
16957 external_id: Some(format!("profiled-storage-remote-{external_id}")),
16958 title: Some(format!(
16959 "Profiled storage remote conversation {external_id}"
16960 )),
16961 source_path: PathBuf::from(format!("/log/profiled-storage-remote-{external_id}.jsonl")),
16962 started_at: Some(10_000 + external_id * 100),
16963 ended_at: Some(10_000 + external_id * 100 + msg_count as i64),
16964 approx_tokens: Some(msg_count as i64 * 32),
16965 metadata_json: serde_json::json!({ "bench": true }),
16966 messages: (0..msg_count)
16967 .map(|idx| Message {
16968 id: None,
16969 idx: idx as i64,
16970 role: if idx % 2 == 0 {
16971 MessageRole::User
16972 } else {
16973 MessageRole::Agent
16974 },
16975 author: Some("tester".into()),
16976 created_at: Some(20_000 + external_id * 100 + idx as i64),
16977 content: format!(
16978 "profiled storage remote content ext={external_id} idx={idx} {}",
16979 "x".repeat(64)
16980 ),
16981 extra_json: serde_json::json!({ "idx": idx }),
16982 snippets: Vec::new(),
16983 })
16984 .collect(),
16985 source_id: "profiled-storage-remote-source".into(),
16986 origin_host: Some("builder-profile".into()),
16987 }
16988 }
16989
16990 fn make_profiled_append_remote_merge_conversation(
16991 external_id: i64,
16992 msg_count: usize,
16993 ) -> Conversation {
16994 let base_ts = 100_000 + external_id * 1_000;
16995 Conversation {
16996 id: None,
16997 agent_slug: "codex".into(),
16998 workspace: Some(PathBuf::from("/ws/profiled-append-remote")),
16999 external_id: Some(format!("profiled-append-remote-{external_id}")),
17000 title: Some(format!("Profiled append remote conversation {external_id}")),
17001 source_path: PathBuf::from(format!("/log/profiled-append-remote-{external_id}.jsonl")),
17002 started_at: Some(base_ts),
17003 ended_at: Some(base_ts + msg_count as i64),
17004 approx_tokens: Some(msg_count as i64 * 50),
17005 metadata_json: serde_json::json!({ "bench": true }),
17006 messages: (0..msg_count)
17007 .map(|idx| Message {
17008 id: None,
17009 idx: idx as i64,
17010 role: if idx % 2 == 0 {
17011 MessageRole::User
17012 } else {
17013 MessageRole::Agent
17014 },
17015 author: Some(format!("model-{}", external_id % 5)),
17016 created_at: Some(base_ts + idx as i64),
17017 content: format!(
17018 "Profiled append remote conversation {} message {}: Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
17019 external_id, idx
17020 ),
17021 extra_json: serde_json::json!({ "bench": true }),
17022 snippets: Vec::new(),
17023 })
17024 .collect(),
17025 source_id: "profiled-append-remote-source".into(),
17026 origin_host: Some("builder-profile".into()),
17027 }
17028 }
17029
17030 #[test]
17031 fn insert_conversation_tree_batched_new_message_ids_match_snippet_rows() {
17032 let dir = TempDir::new().unwrap();
17033 let db_path = dir.path().join("batched-message-ids.db");
17034 let storage = SqliteStorage::open(&db_path).unwrap();
17035 let agent_id = storage
17036 .ensure_agent(&Agent {
17037 id: None,
17038 slug: "codex".into(),
17039 name: "Codex".into(),
17040 version: None,
17041 kind: AgentKind::Cli,
17042 })
17043 .unwrap();
17044 let workspace_id = storage
17045 .ensure_workspace(&PathBuf::from("/ws/profiled-storage-remote"), None)
17046 .unwrap();
17047 let mut conv = make_profiled_storage_remote_conversation(42, 5);
17048 for (idx, msg) in conv.messages.iter_mut().enumerate() {
17049 msg.snippets.push(Snippet {
17050 id: None,
17051 file_path: Some(PathBuf::from(format!("src/file_{idx}.rs"))),
17052 start_line: Some((idx + 1) as i64),
17053 end_line: Some((idx + 2) as i64),
17054 language: Some("rust".into()),
17055 snippet_text: Some(format!("fn snippet_{idx}() {{}}")),
17056 });
17057 }
17058 let outcome = storage
17059 .insert_conversation_tree(agent_id, Some(workspace_id), &conv)
17060 .unwrap();
17061
17062 let message_count: i64 = storage
17063 .conn
17064 .query_row_map(
17065 "SELECT COUNT(*) FROM messages WHERE conversation_id = ?1",
17066 fparams![outcome.conversation_id],
17067 |row| row.get_typed(0),
17068 )
17069 .unwrap();
17070 let joined_snippet_count: i64 = storage
17071 .conn
17072 .query_row_map(
17073 "SELECT COUNT(*)
17074 FROM snippets s
17075 JOIN messages m ON s.message_id = m.id
17076 WHERE m.conversation_id = ?1",
17077 fparams![outcome.conversation_id],
17078 |row| row.get_typed(0),
17079 )
17080 .unwrap();
17081
17082 assert_eq!(message_count, conv.messages.len() as i64);
17083 assert_eq!(joined_snippet_count, conv.messages.len() as i64);
17084 }
17085
17086 #[test]
17087 fn insert_conversation_tree_batched_appended_message_ids_match_snippet_rows() {
17088 let dir = TempDir::new().unwrap();
17089 let db_path = dir.path().join("batched-append-message-ids.db");
17090 let storage = SqliteStorage::open(&db_path).unwrap();
17091 let agent_id = storage
17092 .ensure_agent(&Agent {
17093 id: None,
17094 slug: "codex".into(),
17095 name: "Codex".into(),
17096 version: None,
17097 kind: AgentKind::Cli,
17098 })
17099 .unwrap();
17100 let workspace_id = storage
17101 .ensure_workspace(&PathBuf::from("/ws/profiled-storage-remote"), None)
17102 .unwrap();
17103
17104 let mut initial = make_profiled_storage_remote_conversation(77, 2);
17105 for (idx, msg) in initial.messages.iter_mut().enumerate() {
17106 msg.snippets.push(Snippet {
17107 id: None,
17108 file_path: Some(PathBuf::from(format!("src/append_initial_{idx}.rs"))),
17109 start_line: Some((idx + 1) as i64),
17110 end_line: Some((idx + 2) as i64),
17111 language: Some("rust".into()),
17112 snippet_text: Some(format!("fn append_initial_{idx}() {{}}")),
17113 });
17114 }
17115 let first = storage
17116 .insert_conversation_tree(agent_id, Some(workspace_id), &initial)
17117 .unwrap();
17118 assert_eq!(first.inserted_indices, vec![0, 1]);
17119
17120 let mut appended = make_profiled_storage_remote_conversation(77, 5);
17121 for (idx, msg) in appended.messages.iter_mut().enumerate() {
17122 msg.snippets.push(Snippet {
17123 id: None,
17124 file_path: Some(PathBuf::from(format!("src/append_full_{idx}.rs"))),
17125 start_line: Some((idx + 10) as i64),
17126 end_line: Some((idx + 11) as i64),
17127 language: Some("rust".into()),
17128 snippet_text: Some(format!("fn append_full_{idx}() {{}}")),
17129 });
17130 }
17131 let second = storage
17132 .insert_conversation_tree(agent_id, Some(workspace_id), &appended)
17133 .unwrap();
17134 assert_eq!(second.conversation_id, first.conversation_id);
17135 assert_eq!(second.inserted_indices, vec![2, 3, 4]);
17136
17137 let message_count: i64 = storage
17138 .conn
17139 .query_row_map(
17140 "SELECT COUNT(*) FROM messages WHERE conversation_id = ?1",
17141 fparams![first.conversation_id],
17142 |row| row.get_typed(0),
17143 )
17144 .unwrap();
17145 let joined_snippets: Vec<(i64, String)> = storage
17146 .conn
17147 .query_map_collect(
17148 "SELECT m.idx, s.file_path
17149 FROM snippets s
17150 JOIN messages m ON s.message_id = m.id
17151 WHERE m.conversation_id = ?1
17152 ORDER BY m.idx, s.id",
17153 fparams![first.conversation_id],
17154 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
17155 )
17156 .unwrap();
17157
17158 assert_eq!(message_count, 5);
17159 assert_eq!(
17160 joined_snippets,
17161 vec![
17162 (0, "src/append_initial_0.rs".to_string()),
17163 (1, "src/append_initial_1.rs".to_string()),
17164 (2, "src/append_full_2.rs".to_string()),
17165 (3, "src/append_full_3.rs".to_string()),
17166 (4, "src/append_full_4.rs".to_string()),
17167 ]
17168 );
17169 }
17170
17171 #[test]
17172 fn insert_conversation_tree_rehydrates_external_lookup_after_manual_clear() {
17173 let dir = TempDir::new().unwrap();
17174 let db_path = dir.path().join("external-lookup-rehydrate.db");
17175 let storage = SqliteStorage::open(&db_path).unwrap();
17176 let agent_id = storage
17177 .ensure_agent(&Agent {
17178 id: None,
17179 slug: "codex".into(),
17180 name: "Codex".into(),
17181 version: None,
17182 kind: AgentKind::Cli,
17183 })
17184 .unwrap();
17185 let workspace_id = storage
17186 .ensure_workspace(&PathBuf::from("/ws/profiled-storage-remote"), None)
17187 .unwrap();
17188
17189 let initial = make_profiled_storage_remote_conversation(88, 2);
17190 let first = storage
17191 .insert_conversation_tree(agent_id, Some(workspace_id), &initial)
17192 .unwrap();
17193 let external_id = initial.external_id.as_deref().unwrap();
17194 let lookup_key =
17195 conversation_external_lookup_key(&initial.source_id, agent_id, external_id);
17196 let lookup_id: i64 = storage
17197 .conn
17198 .query_row_map(
17199 "SELECT conversation_id
17200 FROM conversation_external_tail_lookup
17201 WHERE lookup_key = ?1",
17202 fparams![lookup_key.as_str()],
17203 |row| row.get_typed(0),
17204 )
17205 .unwrap();
17206 assert_eq!(lookup_id, first.conversation_id);
17207
17208 storage
17209 .conn
17210 .execute_compat(
17211 "DELETE FROM conversation_external_tail_lookup WHERE lookup_key = ?1",
17212 fparams![lookup_key.as_str()],
17213 )
17214 .unwrap();
17215
17216 let appended = make_profiled_storage_remote_conversation(88, 4);
17217 let second = storage
17218 .insert_conversation_tree(agent_id, Some(workspace_id), &appended)
17219 .unwrap();
17220 assert_eq!(second.conversation_id, first.conversation_id);
17221 assert_eq!(second.inserted_indices, vec![2, 3]);
17222
17223 let conversation_count: i64 = storage
17224 .conn
17225 .query_row_map(
17226 "SELECT COUNT(*)
17227 FROM conversations
17228 WHERE source_id = ?1 AND agent_id = ?2 AND external_id = ?3",
17229 fparams![initial.source_id.as_str(), agent_id, external_id],
17230 |row| row.get_typed(0),
17231 )
17232 .unwrap();
17233 let restored_lookup: (i64, Option<i64>, Option<i64>, Option<i64>) = storage
17234 .conn
17235 .query_row_map(
17236 "SELECT conversation_id, ended_at, last_message_idx, last_message_created_at
17237 FROM conversation_external_tail_lookup
17238 WHERE lookup_key = ?1",
17239 fparams![lookup_key.as_str()],
17240 |row| {
17241 Ok((
17242 row.get_typed(0)?,
17243 row.get_typed(1)?,
17244 row.get_typed(2)?,
17245 row.get_typed(3)?,
17246 ))
17247 },
17248 )
17249 .unwrap();
17250 let tail_state: (Option<i64>, Option<i64>, Option<i64>) = storage
17251 .conn
17252 .query_row_map(
17253 "SELECT ended_at, last_message_idx, last_message_created_at
17254 FROM conversation_tail_state
17255 WHERE conversation_id = ?1",
17256 fparams![first.conversation_id],
17257 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
17258 )
17259 .unwrap();
17260 assert_eq!(conversation_count, 1);
17261 assert_eq!(
17262 restored_lookup,
17263 (
17264 first.conversation_id,
17265 tail_state.0,
17266 tail_state.1,
17267 tail_state.2
17268 )
17269 );
17270 assert_eq!(
17271 tail_state,
17272 (
17273 appended.messages[3].created_at,
17274 Some(3),
17275 appended.messages[3].created_at
17276 )
17277 );
17278 }
17279
17280 #[test]
17281 fn insert_conversation_tree_recreates_daily_stats_after_manual_clear() {
17282 let dir = TempDir::new().unwrap();
17283 let db_path = dir.path().join("test.db");
17284 let storage = SqliteStorage::open(&db_path).unwrap();
17285 let agent_id = storage
17286 .ensure_agent(&Agent {
17287 id: None,
17288 slug: "codex".into(),
17289 name: "Codex".into(),
17290 version: None,
17291 kind: AgentKind::Cli,
17292 })
17293 .unwrap();
17294 let workspace = PathBuf::from("/ws/profiled-storage-remote");
17295 let workspace_id = storage.ensure_workspace(&workspace, None).unwrap();
17296
17297 storage
17298 .insert_conversation_tree(
17299 agent_id,
17300 Some(workspace_id),
17301 &make_profiled_storage_remote_conversation(0, 3),
17302 )
17303 .unwrap();
17304 storage.conn.execute("DELETE FROM daily_stats").unwrap();
17305
17306 storage
17307 .insert_conversation_tree(
17308 agent_id,
17309 Some(workspace_id),
17310 &make_profiled_storage_remote_conversation(1, 2),
17311 )
17312 .unwrap();
17313
17314 let row_count: i64 = storage
17315 .conn
17316 .query_row_map("SELECT COUNT(*) FROM daily_stats", fparams![], |row| {
17317 row.get_typed(0)
17318 })
17319 .unwrap();
17320 let (session_count, message_count): (i64, i64) = storage
17321 .conn
17322 .query_row_map(
17323 "SELECT session_count, message_count
17324 FROM daily_stats
17325 WHERE agent_slug = 'all' AND source_id = 'all'",
17326 fparams![],
17327 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
17328 )
17329 .unwrap();
17330
17331 assert_eq!(row_count, 4);
17332 assert_eq!(session_count, 1);
17333 assert_eq!(message_count, 2);
17334 }
17335
17336 #[test]
17337 #[serial]
17338 fn insert_conversation_tree_stage_profile_tracks_steady_state_remote_reuse() {
17339 let _defer_guard = set_env_var("CASS_DEFER_LEXICAL_UPDATES", "0");
17340
17341 for &(msg_count, iterations) in &[(5usize, 80usize), (20, 50), (50, 24)] {
17342 let dir = TempDir::new().unwrap();
17343 let db_path = dir.path().join(format!("profile-{msg_count}.db"));
17344 let storage = SqliteStorage::open(&db_path).unwrap();
17345 let agent_id = storage
17346 .ensure_agent(&Agent {
17347 id: None,
17348 slug: "codex".into(),
17349 name: "Codex".into(),
17350 version: None,
17351 kind: AgentKind::Cli,
17352 })
17353 .unwrap();
17354 let workspace = PathBuf::from("/ws/profiled-storage-remote");
17355 let workspace_id = storage.ensure_workspace(&workspace, None).unwrap();
17356
17357 storage
17358 .insert_conversation_tree(
17359 agent_id,
17360 Some(workspace_id),
17361 &make_profiled_storage_remote_conversation(0, msg_count),
17362 )
17363 .unwrap();
17364
17365 let mut profile = InsertConversationTreePerfProfile::default();
17366 for external_id in 1..=iterations {
17367 storage
17368 .insert_conversation_tree_with_profile(
17369 agent_id,
17370 Some(workspace_id),
17371 &make_profiled_storage_remote_conversation(external_id as i64, msg_count),
17372 &mut profile,
17373 )
17374 .unwrap();
17375 }
17376
17377 let accounted_duration = profile.source_duration
17378 + profile.tx_open_duration
17379 + profile.existing_lookup_duration
17380 + profile.conversation_row_duration
17381 + profile.message_insert_duration
17382 + profile.snippet_insert_duration
17383 + profile.fts_entry_duration
17384 + profile.fts_flush_duration
17385 + profile.analytics_duration
17386 + profile.commit_duration;
17387 assert_eq!(profile.invocations, iterations);
17388 assert_eq!(profile.messages, iterations * msg_count);
17389 assert_eq!(profile.inserted_messages, iterations * msg_count);
17390 assert!(
17391 profile.total_duration >= accounted_duration,
17392 "accounted stage durations cannot exceed total duration"
17393 );
17394
17395 profile.log_summary(&format!("remote_reuse_{msg_count}_msgs"));
17396 }
17397 }
17398
17399 #[test]
17400 #[serial]
17401 fn insert_conversation_tree_stage_profile_tracks_append_remote_source_merge() {
17402 let _defer_guard = set_env_var("CASS_DEFER_LEXICAL_UPDATES", "0");
17403
17404 for &(msg_count, iterations) in &[(5usize, 80usize), (20, 50), (50, 24)] {
17405 let dir = TempDir::new().unwrap();
17406 let db_path = dir.path().join(format!("append-profile-{msg_count}.db"));
17407 let storage = SqliteStorage::open(&db_path).unwrap();
17408 let agent_id = storage
17409 .ensure_agent(&Agent {
17410 id: None,
17411 slug: "codex".into(),
17412 name: "Codex".into(),
17413 version: None,
17414 kind: AgentKind::Cli,
17415 })
17416 .unwrap();
17417 let workspace = PathBuf::from("/ws/profiled-append-remote");
17418 let workspace_id = storage.ensure_workspace(&workspace, None).unwrap();
17419
17420 for external_id in 0..iterations {
17421 storage
17422 .insert_conversation_tree(
17423 agent_id,
17424 Some(workspace_id),
17425 &make_profiled_append_remote_merge_conversation(
17426 external_id as i64,
17427 msg_count,
17428 ),
17429 )
17430 .unwrap();
17431 }
17432
17433 let mut profile = InsertConversationTreePerfProfile::default();
17434 for external_id in 0..iterations {
17435 storage
17436 .append_existing_conversation_with_profile(
17437 agent_id,
17438 Some(workspace_id),
17439 &make_profiled_append_remote_merge_conversation(
17440 external_id as i64,
17441 msg_count * 2,
17442 ),
17443 &mut profile,
17444 )
17445 .unwrap();
17446 }
17447
17448 let accounted_duration = profile.source_duration
17449 + profile.tx_open_duration
17450 + profile.existing_lookup_duration
17451 + profile.existing_idx_lookup_duration
17452 + profile.existing_replay_lookup_duration
17453 + profile.dedupe_filter_duration
17454 + profile.conversation_row_duration
17455 + profile.message_insert_duration
17456 + profile.snippet_insert_duration
17457 + profile.fts_entry_duration
17458 + profile.fts_flush_duration
17459 + profile.analytics_duration
17460 + profile.commit_duration;
17461 assert_eq!(profile.invocations, iterations);
17462 assert_eq!(profile.messages, iterations * msg_count * 2);
17463 assert_eq!(profile.inserted_messages, iterations * msg_count);
17464 assert!(
17465 profile.total_duration >= accounted_duration,
17466 "accounted append stage durations cannot exceed total duration"
17467 );
17468
17469 profile.log_summary(&format!("append_remote_merge_{msg_count}_msgs"));
17470 }
17471 }
17472
17473 #[test]
17474 fn rebuild_daily_stats_recomputes_materialized_totals_without_monolithic_group_by() {
17475 let dir = TempDir::new().unwrap();
17476 let db_path = dir.path().join("test.db");
17477 let storage = SqliteStorage::open(&db_path).unwrap();
17478 let started_at = 1_700_000_000_000_i64;
17479 let day_id = FrankenStorage::day_id_from_millis(started_at);
17480 let hour_id = FrankenStorage::hour_id_from_millis(started_at);
17481
17482 storage
17483 .conn
17484 .execute_compat(
17485 "INSERT INTO agents (id, slug, name, version, kind, created_at, updated_at)
17486 VALUES (?1, ?2, ?3, NULL, ?4, 0, 0)",
17487 fparams![1_i64, "codex", "Codex", "cli"],
17488 )
17489 .unwrap();
17490 storage
17491 .conn
17492 .execute_compat(
17493 "INSERT INTO agents (id, slug, name, version, kind, created_at, updated_at)
17494 VALUES (?1, ?2, ?3, NULL, ?4, 0, 0)",
17495 fparams![2_i64, "claude", "Claude", "cli"],
17496 )
17497 .unwrap();
17498
17499 storage
17500 .conn
17501 .execute_compat(
17502 "INSERT INTO conversations (
17503 id, agent_id, workspace_id, source_id, external_id, title, source_path,
17504 started_at, ended_at, approx_tokens, metadata_json, origin_host, metadata_bin
17505 ) VALUES (?1, ?2, NULL, ?3, ?4, ?5, ?6, ?7, ?8, NULL, ?9, NULL, NULL)",
17506 fparams![
17507 1_i64,
17508 1_i64,
17509 LOCAL_SOURCE_ID,
17510 "daily-a",
17511 "Daily A",
17512 "/tmp/daily-a.jsonl",
17513 started_at,
17514 started_at + 200,
17515 "{}"
17516 ],
17517 )
17518 .unwrap();
17519 storage
17520 .conn
17521 .execute_compat(
17522 "INSERT INTO conversations (
17523 id, agent_id, workspace_id, source_id, external_id, title, source_path,
17524 started_at, ended_at, approx_tokens, metadata_json, origin_host, metadata_bin
17525 ) VALUES (?1, ?2, NULL, ?3, ?4, ?5, ?6, ?7, ?8, NULL, ?9, NULL, NULL)",
17526 fparams![
17527 2_i64,
17528 2_i64,
17529 LOCAL_SOURCE_ID,
17530 "daily-b",
17531 "Daily B",
17532 "/tmp/daily-b.jsonl",
17533 started_at,
17534 started_at + 300,
17535 "{}"
17536 ],
17537 )
17538 .unwrap();
17539
17540 storage
17541 .conn
17542 .execute_compat(
17543 "INSERT INTO messages (
17544 id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin
17545 ) VALUES (?1, ?2, ?3, ?4, NULL, ?5, ?6, NULL, NULL)",
17546 fparams![1_i64, 1_i64, 0_i64, "user", started_at, "hello"],
17547 )
17548 .unwrap();
17549 storage
17550 .conn
17551 .execute_compat(
17552 "INSERT INTO messages (
17553 id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin
17554 ) VALUES (?1, ?2, ?3, ?4, NULL, ?5, ?6, NULL, NULL)",
17555 fparams![2_i64, 1_i64, 1_i64, "assistant", started_at + 100, "response"],
17556 )
17557 .unwrap();
17558 storage
17559 .conn
17560 .execute_compat(
17561 "INSERT INTO messages (
17562 id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin
17563 ) VALUES (?1, ?2, ?3, ?4, NULL, ?5, ?6, NULL, NULL)",
17564 fparams![3_i64, 2_i64, 0_i64, "user", started_at + 50, "abc"],
17565 )
17566 .unwrap();
17567
17568 for (message_id, agent_slug, role, content_len) in [
17569 (1_i64, "codex", "user", 5_i64),
17570 (2_i64, "codex", "assistant", 8_i64),
17571 (3_i64, "claude", "user", 3_i64),
17572 ] {
17573 storage
17574 .conn
17575 .execute_compat(
17576 "INSERT INTO message_metrics (
17577 message_id, created_at_ms, hour_id, day_id, agent_slug, workspace_id, source_id,
17578 role, content_chars, content_tokens_est, api_input_tokens, api_output_tokens,
17579 api_cache_read_tokens, api_cache_creation_tokens, api_thinking_tokens,
17580 api_service_tier, api_data_source, tool_call_count, has_tool_calls, has_plan,
17581 model_name, model_family, model_tier, provider
17582 ) VALUES (
17583 ?1, ?2, ?3, ?4, ?5, ?6, ?7,
17584 ?8, ?9, ?10, ?11, ?12,
17585 ?13, ?14, ?15,
17586 ?16, ?17, ?18, ?19, ?20,
17587 ?21, ?22, ?23, ?24
17588 )",
17589 fparams![
17590 message_id,
17591 started_at,
17592 hour_id,
17593 day_id,
17594 agent_slug,
17595 0_i64,
17596 LOCAL_SOURCE_ID,
17597 role,
17598 content_len,
17599 content_len / 4,
17600 0_i64,
17601 0_i64,
17602 0_i64,
17603 0_i64,
17604 0_i64,
17605 "",
17606 "estimated",
17607 0_i64,
17608 0_i64,
17609 0_i64,
17610 "",
17611 "unknown",
17612 "unknown",
17613 "unknown"
17614 ],
17615 )
17616 .unwrap();
17617 }
17618
17619 storage.conn.execute("DELETE FROM daily_stats").unwrap();
17620
17621 let rebuilt = storage.rebuild_daily_stats().unwrap();
17622 assert_eq!(rebuilt.total_sessions, 2);
17623
17624 let health = storage.daily_stats_health().unwrap();
17625 assert_eq!(health.conversation_count, 2);
17626 assert_eq!(health.materialized_total, 2);
17627 assert_eq!(health.drift, 0);
17628
17629 let total_messages: i64 = storage
17630 .conn
17631 .query_row_map(
17632 "SELECT message_count FROM daily_stats WHERE agent_slug = 'all' AND source_id = 'all'",
17633 fparams![],
17634 |row| row.get_typed(0),
17635 )
17636 .unwrap();
17637 assert_eq!(total_messages, 3);
17638 }
17639
17640 #[test]
17641 fn rebuild_daily_stats_preserves_byte_counts_with_message_metrics() {
17642 let dir = TempDir::new().unwrap();
17643 let db_path = dir.path().join("test.db");
17644 let storage = SqliteStorage::open(&db_path).unwrap();
17645
17646 let content = "ASCII🙂é漢字";
17647 let expected_bytes = content.len() as i64;
17648 let started_at = 1_704_067_200_000_i64;
17649 let day_id = FrankenStorage::day_id_from_millis(started_at);
17650 let hour_id = FrankenStorage::hour_id_from_millis(started_at);
17651
17652 storage
17653 .conn
17654 .execute_compat(
17655 "INSERT INTO agents (id, slug, name, version, kind, created_at, updated_at)
17656 VALUES (?1, ?2, ?3, NULL, ?4, 0, 0)",
17657 fparams![1_i64, "tester", "Tester", "cli"],
17658 )
17659 .unwrap();
17660 storage
17661 .conn
17662 .execute_compat(
17663 "INSERT INTO conversations (
17664 id, agent_id, workspace_id, source_id, external_id, title, source_path,
17665 started_at, ended_at, approx_tokens, metadata_json, origin_host, metadata_bin
17666 ) VALUES (?1, ?2, NULL, ?3, ?4, ?5, ?6, ?7, NULL, NULL, ?8, NULL, NULL)",
17667 fparams![
17668 1_i64,
17669 1_i64,
17670 LOCAL_SOURCE_ID,
17671 "unicode-metrics",
17672 "Unicode Metrics",
17673 "/tmp/unicode-metrics.jsonl",
17674 started_at,
17675 "{}"
17676 ],
17677 )
17678 .unwrap();
17679 storage
17680 .conn
17681 .execute_compat(
17682 "INSERT INTO messages (
17683 id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin
17684 ) VALUES (?1, ?2, ?3, ?4, NULL, ?5, ?6, NULL, NULL)",
17685 fparams![1_i64, 1_i64, 0_i64, "user", started_at, content],
17686 )
17687 .unwrap();
17688 storage
17689 .conn
17690 .execute_compat(
17691 "INSERT INTO message_metrics (
17692 message_id, created_at_ms, hour_id, day_id, agent_slug, workspace_id, source_id,
17693 role, content_chars, content_tokens_est, api_input_tokens, api_output_tokens,
17694 api_cache_read_tokens, api_cache_creation_tokens, api_thinking_tokens,
17695 api_service_tier, api_data_source, tool_call_count, has_tool_calls, has_plan,
17696 model_name, model_family, model_tier, provider
17697 ) VALUES (
17698 ?1, ?2, ?3, ?4, ?5, ?6, ?7,
17699 ?8, ?9, ?10, ?11, ?12,
17700 ?13, ?14, ?15,
17701 ?16, ?17, ?18, ?19, ?20,
17702 ?21, ?22, ?23, ?24
17703 )",
17704 fparams![
17705 1_i64,
17706 started_at,
17707 hour_id,
17708 day_id,
17709 "tester",
17710 0_i64,
17711 LOCAL_SOURCE_ID,
17712 "user",
17713 expected_bytes,
17714 expected_bytes / 4,
17715 0_i64,
17716 0_i64,
17717 0_i64,
17718 0_i64,
17719 0_i64,
17720 "",
17721 "estimated",
17722 0_i64,
17723 0_i64,
17724 0_i64,
17725 "",
17726 "unknown",
17727 "unknown",
17728 "unknown"
17729 ],
17730 )
17731 .unwrap();
17732
17733 let mut tx = storage.conn.transaction().unwrap();
17734 franken_update_daily_stats_in_tx(
17735 &storage,
17736 &tx,
17737 "tester",
17738 LOCAL_SOURCE_ID,
17739 Some(started_at),
17740 StatsDelta {
17741 session_count_delta: 1,
17742 message_count_delta: 1,
17743 total_chars_delta: expected_bytes,
17744 },
17745 )
17746 .unwrap();
17747 tx.commit().unwrap();
17748
17749 let inline_total: i64 = storage
17750 .conn
17751 .query_row_map(
17752 "SELECT total_chars FROM daily_stats WHERE agent_slug = 'all' AND source_id = 'all'",
17753 fparams![],
17754 |row| row.get_typed(0),
17755 )
17756 .unwrap();
17757 assert_eq!(inline_total, expected_bytes);
17758
17759 storage.conn.execute("DELETE FROM daily_stats").unwrap();
17760
17761 let rebuilt = storage.rebuild_daily_stats().unwrap();
17762 assert_eq!(rebuilt.total_sessions, 1);
17763
17764 let rebuilt_total: i64 = storage
17765 .conn
17766 .query_row_map(
17767 "SELECT total_chars FROM daily_stats WHERE agent_slug = 'all' AND source_id = 'all'",
17768 fparams![],
17769 |row| row.get_typed(0),
17770 )
17771 .unwrap();
17772 assert_eq!(rebuilt_total, expected_bytes);
17773 }
17774
17775 #[test]
17776 fn rebuild_daily_stats_raw_fallback_preserves_byte_counts() {
17777 let dir = TempDir::new().unwrap();
17778 let db_path = dir.path().join("test.db");
17779 let storage = SqliteStorage::open(&db_path).unwrap();
17780
17781 let content = "fallback🙂é漢字";
17782 let expected_bytes = content.len() as i64;
17783 let started_at = 1_704_067_200_000_i64;
17784 storage
17785 .conn
17786 .execute_compat(
17787 "INSERT INTO agents (id, slug, name, version, kind, created_at, updated_at)
17788 VALUES (?1, ?2, ?3, NULL, ?4, 0, 0)",
17789 fparams![1_i64, "tester", "Tester", "cli"],
17790 )
17791 .unwrap();
17792 storage
17793 .conn
17794 .execute_compat(
17795 "INSERT INTO conversations (
17796 id, agent_id, workspace_id, source_id, external_id, title, source_path,
17797 started_at, ended_at, approx_tokens, metadata_json, origin_host, metadata_bin
17798 ) VALUES (?1, ?2, NULL, ?3, ?4, ?5, ?6, ?7, NULL, NULL, ?8, NULL, NULL)",
17799 fparams![
17800 1_i64,
17801 1_i64,
17802 LOCAL_SOURCE_ID,
17803 "unicode-fallback",
17804 "Unicode Fallback",
17805 "/tmp/unicode-fallback.jsonl",
17806 started_at,
17807 "{}"
17808 ],
17809 )
17810 .unwrap();
17811 storage
17812 .conn
17813 .execute_compat(
17814 "INSERT INTO messages (
17815 id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin
17816 ) VALUES (?1, ?2, ?3, ?4, NULL, ?5, ?6, NULL, NULL)",
17817 fparams![1_i64, 1_i64, 0_i64, "assistant", started_at, content],
17818 )
17819 .unwrap();
17820
17821 let mut tx = storage.conn.transaction().unwrap();
17822 franken_update_daily_stats_in_tx(
17823 &storage,
17824 &tx,
17825 "tester",
17826 LOCAL_SOURCE_ID,
17827 Some(started_at),
17828 StatsDelta {
17829 session_count_delta: 1,
17830 message_count_delta: 1,
17831 total_chars_delta: expected_bytes,
17832 },
17833 )
17834 .unwrap();
17835 tx.commit().unwrap();
17836
17837 storage.conn.execute("DELETE FROM daily_stats").unwrap();
17838
17839 let rebuilt = storage.rebuild_daily_stats().unwrap();
17840 assert_eq!(rebuilt.total_sessions, 1);
17841
17842 let rebuilt_total: i64 = storage
17843 .conn
17844 .query_row_map(
17845 "SELECT total_chars FROM daily_stats WHERE agent_slug = 'all' AND source_id = 'all'",
17846 fparams![],
17847 |row| row.get_typed(0),
17848 )
17849 .unwrap();
17850 assert_eq!(rebuilt_total, expected_bytes);
17851 }
17852
17853 #[test]
17854 fn insert_conversations_batched_appends_duplicate_external_id() {
17855 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
17856 use std::path::PathBuf;
17857
17858 let dir = TempDir::new().unwrap();
17859 let db_path = dir.path().join("test.db");
17860 let storage = SqliteStorage::open(&db_path).unwrap();
17861
17862 let agent = Agent {
17863 id: None,
17864 slug: "codex".into(),
17865 name: "Codex".into(),
17866 version: Some("0.2.3".into()),
17867 kind: AgentKind::Cli,
17868 };
17869 let agent_id = storage.ensure_agent(&agent).unwrap();
17870
17871 let base_conv = |messages: Vec<Message>| Conversation {
17872 id: None,
17873 agent_slug: "codex".into(),
17874 workspace: Some(PathBuf::from("/tmp/workspace")),
17875 external_id: Some("shared-session".into()),
17876 title: Some("Shared Session".into()),
17877 source_path: PathBuf::from("/tmp/rollout.jsonl"),
17878 started_at: Some(1_700_000_000_000),
17879 ended_at: Some(1_700_000_000_999),
17880 approx_tokens: None,
17881 metadata_json: serde_json::Value::Null,
17882 messages,
17883 source_id: "local".into(),
17884 origin_host: None,
17885 };
17886
17887 let conv_a = base_conv(vec![
17888 Message {
17889 id: None,
17890 idx: 0,
17891 role: MessageRole::User,
17892 author: None,
17893 created_at: Some(1_700_000_000_000),
17894 content: "first".into(),
17895 extra_json: serde_json::Value::Null,
17896 snippets: Vec::new(),
17897 },
17898 Message {
17899 id: None,
17900 idx: 1,
17901 role: MessageRole::Agent,
17902 author: None,
17903 created_at: Some(1_700_000_000_100),
17904 content: "second".into(),
17905 extra_json: serde_json::Value::Null,
17906 snippets: Vec::new(),
17907 },
17908 ]);
17909 let conv_b = base_conv(vec![
17910 Message {
17911 id: None,
17912 idx: 0,
17913 role: MessageRole::User,
17914 author: None,
17915 created_at: Some(1_700_000_000_000),
17916 content: "first".into(),
17917 extra_json: serde_json::Value::Null,
17918 snippets: Vec::new(),
17919 },
17920 Message {
17921 id: None,
17922 idx: 1,
17923 role: MessageRole::Agent,
17924 author: None,
17925 created_at: Some(1_700_000_000_100),
17926 content: "second".into(),
17927 extra_json: serde_json::Value::Null,
17928 snippets: Vec::new(),
17929 },
17930 Message {
17931 id: None,
17932 idx: 2,
17933 role: MessageRole::User,
17934 author: None,
17935 created_at: Some(1_700_000_000_200),
17936 content: "third".into(),
17937 extra_json: serde_json::Value::Null,
17938 snippets: Vec::new(),
17939 },
17940 Message {
17941 id: None,
17942 idx: 3,
17943 role: MessageRole::Agent,
17944 author: None,
17945 created_at: Some(1_700_000_000_300),
17946 content: "fourth".into(),
17947 extra_json: serde_json::Value::Null,
17948 snippets: Vec::new(),
17949 },
17950 ]);
17951
17952 let outcomes = storage
17953 .insert_conversations_batched(&[(agent_id, None, &conv_a), (agent_id, None, &conv_b)])
17954 .unwrap();
17955 assert_eq!(outcomes.len(), 2);
17956 assert_eq!(outcomes[0].inserted_indices, vec![0, 1]);
17957 assert_eq!(outcomes[1].inserted_indices, vec![2, 3]);
17958 assert_eq!(outcomes[0].conversation_id, outcomes[1].conversation_id);
17959
17960 let conversation_count: i64 = storage
17961 .conn
17962 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
17963 row.get_typed(0)
17964 })
17965 .unwrap();
17966 let conversation_count_not_indexed: i64 = storage
17967 .conn
17968 .query_row_map(
17969 "SELECT COUNT(*) FROM conversations NOT INDEXED",
17970 fparams![],
17971 |row| row.get_typed(0),
17972 )
17973 .unwrap();
17974 let conversation_count_source_index: i64 = storage
17975 .conn
17976 .query_row_map(
17977 "SELECT COUNT(*) FROM conversations INDEXED BY idx_conversations_source_id",
17978 fparams![],
17979 |row| row.get_typed(0),
17980 )
17981 .unwrap();
17982 let message_count: i64 = storage
17983 .conn
17984 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
17985 row.get_typed(0)
17986 })
17987 .unwrap();
17988 let reopened_storage = SqliteStorage::open(&db_path).unwrap();
17989 let reopened_conversation_count: i64 = reopened_storage
17990 .conn
17991 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
17992 row.get_typed(0)
17993 })
17994 .unwrap();
17995 let reopened_conversation_count_not_indexed: i64 = reopened_storage
17996 .conn
17997 .query_row_map(
17998 "SELECT COUNT(*) FROM conversations NOT INDEXED",
17999 fparams![],
18000 |row| row.get_typed(0),
18001 )
18002 .unwrap();
18003 let reopened_conversation_ids: Vec<i64> = reopened_storage
18004 .conn
18005 .query_map_collect(
18006 "SELECT id FROM conversations ORDER BY id",
18007 fparams![],
18008 |row| row.get_typed(0),
18009 )
18010 .unwrap();
18011 let reopened_conversation_ids_not_indexed: Vec<i64> = reopened_storage
18012 .conn
18013 .query_map_collect(
18014 "SELECT id FROM conversations NOT INDEXED ORDER BY id",
18015 fparams![],
18016 |row| row.get_typed(0),
18017 )
18018 .unwrap();
18019 let reopened_conversation_ids_source_index: Vec<i64> = reopened_storage
18020 .conn
18021 .query_map_collect(
18022 "SELECT id FROM conversations INDEXED BY idx_conversations_source_id ORDER BY id",
18023 fparams![],
18024 |row| row.get_typed(0),
18025 )
18026 .unwrap();
18027
18028 assert_eq!(reopened_conversation_ids, vec![outcomes[0].conversation_id]);
18029 assert_eq!(
18030 reopened_conversation_ids_not_indexed,
18031 vec![outcomes[0].conversation_id]
18032 );
18033 assert_eq!(
18034 reopened_conversation_ids_source_index,
18035 vec![outcomes[0].conversation_id]
18036 );
18037 assert_eq!(reopened_conversation_count, 1);
18038 assert_eq!(reopened_conversation_count_not_indexed, 1);
18039 assert_eq!(conversation_count_not_indexed, 1);
18040 assert_eq!(conversation_count_source_index, 1);
18041 assert_eq!(conversation_count, 1);
18042 assert_eq!(message_count, 4);
18043 }
18044
18045 #[test]
18046 fn franken_insert_conversation_or_get_existing_recovers_unique_conflict() {
18047 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18048 use std::path::PathBuf;
18049
18050 let dir = TempDir::new().unwrap();
18051 let db_path = dir.path().join("test.db");
18052 let storage = SqliteStorage::open(&db_path).unwrap();
18053
18054 let agent = Agent {
18055 id: None,
18056 slug: "codex".into(),
18057 name: "Codex".into(),
18058 version: Some("0.2.3".into()),
18059 kind: AgentKind::Cli,
18060 };
18061 let agent_id = storage.ensure_agent(&agent).unwrap();
18062
18063 let conv = Conversation {
18064 id: None,
18065 agent_slug: "codex".into(),
18066 workspace: Some(PathBuf::from("/tmp/workspace")),
18067 external_id: Some("recover-duplicate".into()),
18068 title: Some("Recover Duplicate".into()),
18069 source_path: PathBuf::from("/tmp/rollout.jsonl"),
18070 started_at: Some(1_700_000_000_000),
18071 ended_at: Some(1_700_000_000_100),
18072 approx_tokens: None,
18073 metadata_json: serde_json::Value::Null,
18074 messages: vec![Message {
18075 id: None,
18076 idx: 0,
18077 role: MessageRole::User,
18078 author: None,
18079 created_at: Some(1_700_000_000_000),
18080 content: "hello".into(),
18081 extra_json: serde_json::Value::Null,
18082 snippets: Vec::new(),
18083 }],
18084 source_id: "local".into(),
18085 origin_host: None,
18086 };
18087
18088 let tx = storage.conn.transaction().unwrap();
18089 let inserted_id = franken_insert_conversation(&tx, agent_id, None, &conv)
18090 .unwrap()
18091 .expect("first insert should succeed");
18092
18093 let conversation_key = conversation_merge_key(agent_id, &conv);
18094 let resolved = franken_insert_conversation_or_get_existing_after_miss(
18095 &tx,
18096 agent_id,
18097 None,
18098 &conv,
18099 &conversation_key,
18100 )
18101 .unwrap();
18102
18103 match resolved {
18104 ConversationInsertStatus::Existing(existing_id) => {
18105 assert_eq!(existing_id, inserted_id);
18106 }
18107 ConversationInsertStatus::Inserted(new_id) => {
18108 panic!("expected existing conversation id, got freshly inserted {new_id}");
18109 }
18110 }
18111
18112 let conversation_count: i64 = tx
18113 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
18114 row.get_typed(0)
18115 })
18116 .unwrap();
18117 assert_eq!(conversation_count, 1);
18118 }
18119
18120 #[test]
18121 fn insert_conversations_batched_merges_duplicate_external_id_with_gaps() {
18122 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18123 use std::path::PathBuf;
18124
18125 let dir = TempDir::new().unwrap();
18126 let db_path = dir.path().join("test.db");
18127 let storage = SqliteStorage::open(&db_path).unwrap();
18128
18129 let agent = Agent {
18130 id: None,
18131 slug: "codex".into(),
18132 name: "Codex".into(),
18133 version: Some("0.2.3".into()),
18134 kind: AgentKind::Cli,
18135 };
18136 let agent_id = storage.ensure_agent(&agent).unwrap();
18137
18138 let base_conv = |messages: Vec<Message>| Conversation {
18139 id: None,
18140 agent_slug: "codex".into(),
18141 workspace: Some(PathBuf::from("/tmp/workspace")),
18142 external_id: Some("shared-session-gap".into()),
18143 title: Some("Shared Session Gap".into()),
18144 source_path: PathBuf::from("/tmp/rollout.jsonl"),
18145 started_at: Some(1_700_000_000_000),
18146 ended_at: Some(1_700_000_000_999),
18147 approx_tokens: None,
18148 metadata_json: serde_json::Value::Null,
18149 messages,
18150 source_id: "local".into(),
18151 origin_host: None,
18152 };
18153
18154 let conv_a = base_conv(vec![
18155 Message {
18156 id: None,
18157 idx: 2,
18158 role: MessageRole::User,
18159 author: None,
18160 created_at: Some(1_700_000_000_200),
18161 content: "third".into(),
18162 extra_json: serde_json::Value::Null,
18163 snippets: Vec::new(),
18164 },
18165 Message {
18166 id: None,
18167 idx: 3,
18168 role: MessageRole::Agent,
18169 author: None,
18170 created_at: Some(1_700_000_000_300),
18171 content: "fourth".into(),
18172 extra_json: serde_json::Value::Null,
18173 snippets: Vec::new(),
18174 },
18175 ]);
18176 let conv_b = base_conv(vec![
18177 Message {
18178 id: None,
18179 idx: 0,
18180 role: MessageRole::User,
18181 author: None,
18182 created_at: Some(1_700_000_000_000),
18183 content: "first".into(),
18184 extra_json: serde_json::Value::Null,
18185 snippets: Vec::new(),
18186 },
18187 Message {
18188 id: None,
18189 idx: 1,
18190 role: MessageRole::Agent,
18191 author: None,
18192 created_at: Some(1_700_000_000_100),
18193 content: "second".into(),
18194 extra_json: serde_json::Value::Null,
18195 snippets: Vec::new(),
18196 },
18197 Message {
18198 id: None,
18199 idx: 3,
18200 role: MessageRole::Agent,
18201 author: None,
18202 created_at: Some(1_700_000_000_300),
18203 content: "fourth".into(),
18204 extra_json: serde_json::Value::Null,
18205 snippets: Vec::new(),
18206 },
18207 ]);
18208
18209 let outcomes = storage
18210 .insert_conversations_batched(&[(agent_id, None, &conv_a), (agent_id, None, &conv_b)])
18211 .unwrap();
18212 assert_eq!(outcomes.len(), 2);
18213 assert_eq!(outcomes[0].inserted_indices, vec![2, 3]);
18214 assert_eq!(outcomes[1].inserted_indices, vec![0, 1]);
18215 assert_eq!(outcomes[0].conversation_id, outcomes[1].conversation_id);
18216
18217 let stored_indices: Vec<i64> = storage
18218 .conn
18219 .query_map_collect("SELECT idx FROM messages ORDER BY idx", fparams![], |row| {
18220 row.get_typed(0)
18221 })
18222 .unwrap();
18223 assert_eq!(stored_indices, vec![0, 1, 2, 3]);
18224 }
18225
18226 #[test]
18227 fn insert_conversations_batched_refreshes_partial_pending_message_lookup() {
18228 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18229 use std::path::PathBuf;
18230
18231 let dir = TempDir::new().unwrap();
18232 let db_path = dir.path().join("test.db");
18233 let storage = SqliteStorage::open(&db_path).unwrap();
18234
18235 let agent = Agent {
18236 id: None,
18237 slug: "codex".into(),
18238 name: "Codex".into(),
18239 version: Some("0.2.3".into()),
18240 kind: AgentKind::Cli,
18241 };
18242 let agent_id = storage.ensure_agent(&agent).unwrap();
18243
18244 let make_message = |idx: i64, content: &str| Message {
18245 id: None,
18246 idx,
18247 role: if idx == 0 {
18248 MessageRole::User
18249 } else {
18250 MessageRole::Agent
18251 },
18252 author: None,
18253 created_at: Some(1_700_000_000_000 + idx),
18254 content: content.into(),
18255 extra_json: serde_json::Value::Null,
18256 snippets: Vec::new(),
18257 };
18258
18259 let base_conv = |messages: Vec<Message>| Conversation {
18260 id: None,
18261 agent_slug: "codex".into(),
18262 workspace: Some(PathBuf::from("/tmp/workspace")),
18263 external_id: Some("partial-cache-session".into()),
18264 title: Some("Partial cache session".into()),
18265 source_path: PathBuf::from("/tmp/partial-cache.jsonl"),
18266 started_at: Some(1_700_000_000_000),
18267 ended_at: Some(1_700_000_000_100),
18268 approx_tokens: None,
18269 metadata_json: serde_json::Value::Null,
18270 messages,
18271 source_id: "local".into(),
18272 origin_host: None,
18273 };
18274
18275 let canonical = base_conv(vec![
18276 make_message(0, "canonical zero"),
18277 make_message(20, "canonical twenty"),
18278 ]);
18279 storage
18280 .insert_conversation_tree(agent_id, None, &canonical)
18281 .unwrap();
18282
18283 let exact_prefix = base_conv(vec![make_message(0, "canonical zero")]);
18284 let conflicting_tail = base_conv(vec![make_message(20, "conflicting twenty")]);
18285
18286 let outcomes = storage
18287 .insert_conversations_batched(&[
18288 (agent_id, None, &exact_prefix),
18289 (agent_id, None, &conflicting_tail),
18290 ])
18291 .unwrap();
18292
18293 assert_eq!(outcomes.len(), 2);
18294 assert!(outcomes[0].inserted_indices.is_empty());
18295 assert!(
18296 outcomes[1].inserted_indices.is_empty(),
18297 "the second batch item must refresh the partial pending lookup and retain the canonical idx=20 row"
18298 );
18299
18300 let stored_messages: Vec<(i64, String)> = storage
18301 .conn
18302 .query_map_collect(
18303 "SELECT idx, content FROM messages ORDER BY idx",
18304 fparams![],
18305 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
18306 )
18307 .unwrap();
18308 assert_eq!(
18309 stored_messages,
18310 vec![
18311 (0, "canonical zero".to_string()),
18312 (20, "canonical twenty".to_string()),
18313 ]
18314 );
18315 }
18316
18317 #[test]
18318 fn insert_conversations_batched_reprocessing_conversation_is_idempotent() {
18319 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18320 use std::path::PathBuf;
18321
18322 const MESSAGE_COUNT: i64 = 64;
18323
18324 let dir = TempDir::new().unwrap();
18325 let db_path = dir.path().join("test.db");
18326 let storage = SqliteStorage::open(&db_path).unwrap();
18327
18328 let agent = Agent {
18329 id: None,
18330 slug: "codex".into(),
18331 name: "Codex".into(),
18332 version: Some("0.2.3".into()),
18333 kind: AgentKind::Cli,
18334 };
18335 let agent_id = storage.ensure_agent(&agent).unwrap();
18336
18337 let messages: Vec<Message> = (0..MESSAGE_COUNT)
18338 .map(|idx| Message {
18339 id: None,
18340 idx,
18341 role: if idx % 2 == 0 {
18342 MessageRole::User
18343 } else {
18344 MessageRole::Agent
18345 },
18346 author: None,
18347 created_at: Some(1_700_000_000_000 + idx),
18348 content: format!("message {idx}"),
18349 extra_json: serde_json::Value::Null,
18350 snippets: Vec::new(),
18351 })
18352 .collect();
18353
18354 let conversation = Conversation {
18355 id: None,
18356 agent_slug: "codex".into(),
18357 workspace: Some(PathBuf::from("/tmp/workspace")),
18358 external_id: Some("large-reprocess-session".into()),
18359 title: Some("Large Reprocess Session".into()),
18360 source_path: PathBuf::from("/tmp/large-reprocess-session.jsonl"),
18361 started_at: Some(1_700_000_000_000),
18362 ended_at: Some(1_700_000_000_000 + MESSAGE_COUNT - 1),
18363 approx_tokens: None,
18364 metadata_json: serde_json::Value::Null,
18365 messages,
18366 source_id: "local".into(),
18367 origin_host: None,
18368 };
18369
18370 let first = storage
18371 .insert_conversations_batched(&[(agent_id, None, &conversation)])
18372 .unwrap();
18373 let second = storage
18374 .insert_conversations_batched(&[(agent_id, None, &conversation)])
18375 .unwrap();
18376
18377 assert_eq!(first.len(), 1);
18378 assert_eq!(second.len(), 1);
18379 assert_eq!(first[0].inserted_indices.len(), MESSAGE_COUNT as usize);
18380 assert!(
18381 second[0].inserted_indices.is_empty(),
18382 "full reprocessing of a large conversation must not attempt duplicate idx inserts"
18383 );
18384 assert_eq!(first[0].conversation_id, second[0].conversation_id);
18385
18386 let conversation_count: i64 = storage
18387 .conn
18388 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
18389 row.get_typed(0)
18390 })
18391 .unwrap();
18392 let message_count: i64 = storage
18393 .conn
18394 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
18395 row.get_typed(0)
18396 })
18397 .unwrap();
18398
18399 assert_eq!(conversation_count, 1);
18400 assert_eq!(message_count, MESSAGE_COUNT);
18401 }
18402
18403 #[test]
18404 fn parallel_insert_conversation_tree_keeps_unique_external_ids_distinct() {
18405 use crate::connectors::{NormalizedConversation, NormalizedMessage};
18406 use crate::indexer::persist::map_to_internal;
18407 use crate::model::types::{Agent, AgentKind};
18408 use frankensqlite::compat::{ConnectionExt, RowExt};
18409 use rand::RngExt;
18410 use rayon::prelude::*;
18411
18412 fn retryable_franken_error(err: &anyhow::Error) -> bool {
18413 err.downcast_ref::<frankensqlite::FrankenError>()
18414 .or_else(|| {
18415 err.root_cause()
18416 .downcast_ref::<frankensqlite::FrankenError>()
18417 })
18418 .is_some_and(|inner| {
18419 matches!(
18420 inner,
18421 frankensqlite::FrankenError::Busy
18422 | frankensqlite::FrankenError::BusyRecovery
18423 | frankensqlite::FrankenError::BusySnapshot { .. }
18424 | frankensqlite::FrankenError::WriteConflict { .. }
18425 | frankensqlite::FrankenError::SerializationFailure { .. }
18426 )
18427 })
18428 }
18429
18430 fn with_retry<F, T>(mut f: F) -> anyhow::Result<T>
18431 where
18432 F: FnMut() -> anyhow::Result<T>,
18433 {
18434 let mut rng = rand::rng();
18435 let mut backoff_ms = 4_u64;
18436 for attempt in 0..=24 {
18437 match f() {
18438 Ok(value) => return Ok(value),
18439 Err(err) if attempt < 24 && retryable_franken_error(&err) => {
18440 let sleep_ms = backoff_ms + rng.random_range(0..=backoff_ms);
18441 std::thread::sleep(Duration::from_millis(sleep_ms));
18442 backoff_ms = (backoff_ms * 2).min(512);
18443 }
18444 Err(err) => return Err(err),
18445 }
18446 }
18447 unreachable!("retry loop must return on success or final failure")
18448 }
18449
18450 let dir = TempDir::new().unwrap();
18451 let db_path = dir.path().join("parallel_insert_conversation_tree.db");
18452 let seed = FrankenStorage::open(&db_path).unwrap();
18453 drop(seed);
18454
18455 let conversations: Vec<NormalizedConversation> = (0..10)
18456 .map(|i| NormalizedConversation {
18457 agent_slug: format!("agent-{}", i % 3),
18458 external_id: Some(format!("conv-{i}")),
18459 title: Some(format!("Conversation {i}")),
18460 workspace: Some(PathBuf::from(format!("/ws/{i}"))),
18461 source_path: PathBuf::from(format!("/log/{i}.jsonl")),
18462 started_at: Some(1_000 + i * 100),
18463 ended_at: Some(1_000 + i * 100 + 50),
18464 metadata: serde_json::json!({}),
18465 messages: (0..3)
18466 .map(|j| NormalizedMessage {
18467 idx: j,
18468 role: if j % 2 == 0 { "user" } else { "assistant" }.to_string(),
18469 author: Some("tester".into()),
18470 created_at: Some(1_000 + i * 100 + j * 10),
18471 content: format!("parallel-distinct-test conv={i} msg={j}"),
18472 extra: serde_json::json!({}),
18473 snippets: vec![],
18474 invocations: Vec::new(),
18475 })
18476 .collect(),
18477 })
18478 .collect();
18479
18480 let mut outcomes: Vec<(String, i64, Vec<i64>)> = conversations
18481 .par_chunks(3)
18482 .map(|chunk| {
18483 let storage = FrankenStorage::open_writer(&db_path).unwrap();
18484 let mut agent_cache: HashMap<String, i64> = HashMap::new();
18485 let mut workspace_cache: HashMap<PathBuf, i64> = HashMap::new();
18486 let mut chunk_outcomes = Vec::with_capacity(chunk.len());
18487
18488 for conv in chunk {
18489 let agent_slug = conv.agent_slug.clone();
18490 let workspace = conv.workspace.clone();
18491 let external_id = conv.external_id.clone().expect("external id");
18492 let internal = map_to_internal(conv);
18493 let outcome = with_retry(|| {
18494 let agent_id = if let Some(id) = agent_cache.get(&agent_slug) {
18495 *id
18496 } else {
18497 let agent = Agent {
18498 id: None,
18499 slug: agent_slug.clone(),
18500 name: agent_slug.clone(),
18501 version: None,
18502 kind: AgentKind::Cli,
18503 };
18504 let id = storage.ensure_agent(&agent)?;
18505 agent_cache.insert(agent_slug.clone(), id);
18506 id
18507 };
18508 let workspace_id = if let Some(path) = &workspace {
18509 if let Some(id) = workspace_cache.get(path) {
18510 Some(*id)
18511 } else {
18512 let id = storage.ensure_workspace(path, None)?;
18513 workspace_cache.insert(path.clone(), id);
18514 Some(id)
18515 }
18516 } else {
18517 None
18518 };
18519 storage.insert_conversation_tree(agent_id, workspace_id, &internal)
18520 })
18521 .unwrap();
18522 chunk_outcomes.push((
18523 external_id,
18524 outcome.conversation_id,
18525 outcome.inserted_indices,
18526 ));
18527 }
18528
18529 storage.close().unwrap();
18530 chunk_outcomes
18531 })
18532 .flatten()
18533 .collect();
18534 outcomes.sort_by(|left, right| left.0.cmp(&right.0));
18535
18536 assert!(
18537 outcomes
18538 .iter()
18539 .all(|(_, _, inserted_indices)| inserted_indices == &vec![0, 1, 2]),
18540 "unique external ids must not be routed through the existing-conversation merge path: {outcomes:?}"
18541 );
18542
18543 let distinct_ids: HashSet<i64> = outcomes
18544 .iter()
18545 .map(|(_, conversation_id, _)| *conversation_id)
18546 .collect();
18547 assert_eq!(
18548 distinct_ids.len(),
18549 conversations.len(),
18550 "unique external ids must produce distinct conversation ids: {outcomes:?}"
18551 );
18552
18553 let reader = FrankenStorage::open(&db_path).unwrap();
18554 let stored_rows: Vec<(i64, String)> = reader
18555 .raw()
18556 .query_map_collect(
18557 "SELECT id, external_id FROM conversations ORDER BY id",
18558 &[],
18559 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
18560 )
18561 .unwrap();
18562 let stored_count: i64 = reader
18563 .raw()
18564 .query_row_map("SELECT COUNT(*) FROM conversations", &[], |row| {
18565 row.get_typed(0)
18566 })
18567 .unwrap();
18568
18569 assert_eq!(
18570 stored_count as usize,
18571 conversations.len(),
18572 "parallel distinct inserts must persist one row per external id; rows={stored_rows:?}; outcomes={outcomes:?}"
18573 );
18574 assert_eq!(
18575 stored_rows.len(),
18576 conversations.len(),
18577 "parallel distinct inserts must remain visible after reopening; rows={stored_rows:?}; outcomes={outcomes:?}"
18578 );
18579 }
18580
18581 #[test]
18582 fn insert_conversation_tree_merges_duplicate_external_id_with_gaps() {
18583 use crate::connectors::{NormalizedConversation, NormalizedMessage};
18584 use crate::indexer::persist::map_to_internal;
18585 use crate::model::types::{Agent, AgentKind};
18586 use std::path::PathBuf;
18587
18588 let dir = TempDir::new().unwrap();
18589 let db_path = dir.path().join("test.db");
18590 let storage = SqliteStorage::open(&db_path).unwrap();
18591
18592 let agent = Agent {
18593 id: None,
18594 slug: "codex".into(),
18595 name: "Codex".into(),
18596 version: Some("0.2.3".into()),
18597 kind: AgentKind::Cli,
18598 };
18599 let agent_id = storage.ensure_agent(&agent).unwrap();
18600
18601 let base_conv = |messages: Vec<NormalizedMessage>| NormalizedConversation {
18602 agent_slug: "codex".into(),
18603 workspace: Some(PathBuf::from("/tmp/workspace")),
18604 external_id: Some("tree-gap-session".into()),
18605 title: Some("Tree Gap Session".into()),
18606 source_path: PathBuf::from("/tmp/tree.jsonl"),
18607 started_at: Some(1_700_000_000_000),
18608 ended_at: Some(1_700_000_000_999),
18609 metadata: serde_json::Value::Null,
18610 messages,
18611 };
18612
18613 let conv_a = map_to_internal(&base_conv(vec![
18614 NormalizedMessage {
18615 idx: 2,
18616 role: "user".into(),
18617 author: None,
18618 created_at: Some(1_700_000_000_200),
18619 content: "third".into(),
18620 extra: serde_json::Value::Null,
18621 snippets: Vec::new(),
18622 invocations: Vec::new(),
18623 },
18624 NormalizedMessage {
18625 idx: 3,
18626 role: "assistant".into(),
18627 author: None,
18628 created_at: Some(1_700_000_000_300),
18629 content: "fourth".into(),
18630 extra: serde_json::Value::Null,
18631 snippets: Vec::new(),
18632 invocations: Vec::new(),
18633 },
18634 ]));
18635 let conv_b = map_to_internal(&base_conv(vec![
18636 NormalizedMessage {
18637 idx: 0,
18638 role: "user".into(),
18639 author: None,
18640 created_at: Some(1_700_000_000_000),
18641 content: "first".into(),
18642 extra: serde_json::Value::Null,
18643 snippets: Vec::new(),
18644 invocations: Vec::new(),
18645 },
18646 NormalizedMessage {
18647 idx: 1,
18648 role: "assistant".into(),
18649 author: None,
18650 created_at: Some(1_700_000_000_100),
18651 content: "second".into(),
18652 extra: serde_json::Value::Null,
18653 snippets: Vec::new(),
18654 invocations: Vec::new(),
18655 },
18656 NormalizedMessage {
18657 idx: 3,
18658 role: "assistant".into(),
18659 author: None,
18660 created_at: Some(1_700_000_000_300),
18661 content: "fourth".into(),
18662 extra: serde_json::Value::Null,
18663 snippets: Vec::new(),
18664 invocations: Vec::new(),
18665 },
18666 ]));
18667
18668 let first = storage
18669 .insert_conversation_tree(agent_id, None, &conv_a)
18670 .unwrap();
18671 let second = storage
18672 .insert_conversation_tree(agent_id, None, &conv_b)
18673 .unwrap();
18674
18675 assert_eq!(first.inserted_indices, vec![2, 3]);
18676 assert_eq!(second.inserted_indices, vec![0, 1]);
18677 assert_eq!(first.conversation_id, second.conversation_id);
18678
18679 let stored_indices: Vec<i64> = storage
18680 .conn
18681 .query_map_collect("SELECT idx FROM messages ORDER BY idx", fparams![], |row| {
18682 row.get_typed(0)
18683 })
18684 .unwrap();
18685 assert_eq!(stored_indices, vec![0, 1, 2, 3]);
18686 }
18687
18688 #[test]
18689 fn insert_conversation_tree_skips_duplicate_message_indices_for_new_conversation() {
18690 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18691 use std::path::PathBuf;
18692
18693 let dir = TempDir::new().unwrap();
18694 let db_path = dir.path().join("test.db");
18695 let storage = SqliteStorage::open(&db_path).unwrap();
18696
18697 let agent = Agent {
18698 id: None,
18699 slug: "codex".into(),
18700 name: "Codex".into(),
18701 version: Some("0.2.3".into()),
18702 kind: AgentKind::Cli,
18703 };
18704 let agent_id = storage.ensure_agent(&agent).unwrap();
18705
18706 let conversation = Conversation {
18707 id: None,
18708 agent_slug: "codex".into(),
18709 workspace: Some(PathBuf::from("/tmp/workspace")),
18710 external_id: Some("duplicate-new-session".into()),
18711 title: Some("Duplicate New Session".into()),
18712 source_path: PathBuf::from("/tmp/duplicate-new-session.jsonl"),
18713 started_at: Some(1_700_000_000_000),
18714 ended_at: Some(1_700_000_000_999),
18715 approx_tokens: None,
18716 metadata_json: serde_json::Value::Null,
18717 messages: vec![
18718 Message {
18719 id: None,
18720 idx: 0,
18721 role: MessageRole::User,
18722 author: None,
18723 created_at: Some(1_700_000_000_000),
18724 content: "first canonical".into(),
18725 extra_json: serde_json::Value::Null,
18726 snippets: Vec::new(),
18727 },
18728 Message {
18729 id: None,
18730 idx: 0,
18731 role: MessageRole::User,
18732 author: None,
18733 created_at: Some(1_700_000_000_001),
18734 content: "duplicate idx should be skipped".into(),
18735 extra_json: serde_json::Value::Null,
18736 snippets: Vec::new(),
18737 },
18738 Message {
18739 id: None,
18740 idx: 1,
18741 role: MessageRole::Agent,
18742 author: None,
18743 created_at: Some(1_700_000_000_100),
18744 content: "second".into(),
18745 extra_json: serde_json::Value::Null,
18746 snippets: Vec::new(),
18747 },
18748 ],
18749 source_id: "local".into(),
18750 origin_host: None,
18751 };
18752
18753 let outcome = storage
18754 .insert_conversation_tree(agent_id, None, &conversation)
18755 .unwrap();
18756
18757 assert_eq!(outcome.inserted_indices, vec![0, 1]);
18758
18759 let stored_messages: Vec<(i64, String)> = storage
18760 .conn
18761 .query_map_collect(
18762 "SELECT idx, content FROM messages ORDER BY idx",
18763 fparams![],
18764 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
18765 )
18766 .unwrap();
18767 assert_eq!(
18768 stored_messages,
18769 vec![
18770 (0, "first canonical".to_string()),
18771 (1, "second".to_string())
18772 ]
18773 );
18774 }
18775
18776 #[test]
18777 fn insert_conversation_tree_merges_duplicate_source_path_without_external_id() {
18778 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18779 use std::path::PathBuf;
18780
18781 let dir = TempDir::new().unwrap();
18782 let db_path = dir.path().join("test.db");
18783 let storage = SqliteStorage::open(&db_path).unwrap();
18784
18785 let agent = Agent {
18786 id: None,
18787 slug: "codex".into(),
18788 name: "Codex".into(),
18789 version: Some("0.2.3".into()),
18790 kind: AgentKind::Cli,
18791 };
18792 let agent_id = storage.ensure_agent(&agent).unwrap();
18793
18794 let base_conv = |messages: Vec<Message>| Conversation {
18795 id: None,
18796 agent_slug: "codex".into(),
18797 workspace: Some(PathBuf::from("/tmp/workspace")),
18798 external_id: None,
18799 title: Some("Source Path Merge".into()),
18800 source_path: PathBuf::from("/tmp/shared-session.jsonl"),
18801 started_at: Some(1_700_000_000_000),
18802 ended_at: Some(1_700_000_000_999),
18803 approx_tokens: None,
18804 metadata_json: serde_json::Value::Null,
18805 messages,
18806 source_id: "local".into(),
18807 origin_host: None,
18808 };
18809
18810 let first = storage
18811 .insert_conversation_tree(
18812 agent_id,
18813 None,
18814 &base_conv(vec![
18815 Message {
18816 id: None,
18817 idx: 0,
18818 role: MessageRole::User,
18819 author: None,
18820 created_at: Some(1_700_000_000_000),
18821 content: "first".into(),
18822 extra_json: serde_json::Value::Null,
18823 snippets: Vec::new(),
18824 },
18825 Message {
18826 id: None,
18827 idx: 1,
18828 role: MessageRole::Agent,
18829 author: None,
18830 created_at: Some(1_700_000_000_100),
18831 content: "second".into(),
18832 extra_json: serde_json::Value::Null,
18833 snippets: Vec::new(),
18834 },
18835 ]),
18836 )
18837 .unwrap();
18838
18839 let second = storage
18840 .insert_conversation_tree(
18841 agent_id,
18842 None,
18843 &base_conv(vec![
18844 Message {
18845 id: None,
18846 idx: 1,
18847 role: MessageRole::Agent,
18848 author: None,
18849 created_at: Some(1_700_000_000_100),
18850 content: "second".into(),
18851 extra_json: serde_json::Value::Null,
18852 snippets: Vec::new(),
18853 },
18854 Message {
18855 id: None,
18856 idx: 2,
18857 role: MessageRole::User,
18858 author: None,
18859 created_at: Some(1_700_000_000_200),
18860 content: "third".into(),
18861 extra_json: serde_json::Value::Null,
18862 snippets: Vec::new(),
18863 },
18864 ]),
18865 )
18866 .unwrap();
18867
18868 assert_eq!(first.conversation_id, second.conversation_id);
18869 assert_eq!(first.inserted_indices, vec![0, 1]);
18870 assert_eq!(second.inserted_indices, vec![2]);
18871
18872 let stored_indices: Vec<i64> = storage
18873 .conn
18874 .query_map_collect("SELECT idx FROM messages ORDER BY idx", fparams![], |row| {
18875 row.get_typed(0)
18876 })
18877 .unwrap();
18878 assert_eq!(stored_indices, vec![0, 1, 2]);
18879 }
18880
18881 #[test]
18882 fn insert_conversation_tree_merges_source_path_duplicates_with_start_drift() {
18883 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18884 use std::path::PathBuf;
18885
18886 let dir = TempDir::new().unwrap();
18887 let db_path = dir.path().join("test.db");
18888 let storage = SqliteStorage::open(&db_path).unwrap();
18889
18890 let agent = Agent {
18891 id: None,
18892 slug: "codex".into(),
18893 name: "Codex".into(),
18894 version: Some("0.2.3".into()),
18895 kind: AgentKind::Cli,
18896 };
18897 let agent_id = storage.ensure_agent(&agent).unwrap();
18898
18899 let base_conv = |started_at: Option<i64>, messages: Vec<Message>| Conversation {
18900 id: None,
18901 agent_slug: "codex".into(),
18902 workspace: Some(PathBuf::from("/tmp/workspace")),
18903 external_id: None,
18904 title: Some("Drift Merge".into()),
18905 source_path: PathBuf::from("/tmp/drift-session.jsonl"),
18906 started_at,
18907 ended_at: Some(1_700_000_000_999),
18908 approx_tokens: None,
18909 metadata_json: serde_json::Value::Null,
18910 messages,
18911 source_id: "local".into(),
18912 origin_host: None,
18913 };
18914
18915 let first = storage
18916 .insert_conversation_tree(
18917 agent_id,
18918 None,
18919 &base_conv(
18920 Some(1_700_000_000_000),
18921 vec![
18922 Message {
18923 id: None,
18924 idx: 0,
18925 role: MessageRole::User,
18926 author: None,
18927 created_at: Some(1_700_000_000_000),
18928 content: "first".into(),
18929 extra_json: serde_json::Value::Null,
18930 snippets: Vec::new(),
18931 },
18932 Message {
18933 id: None,
18934 idx: 1,
18935 role: MessageRole::Agent,
18936 author: None,
18937 created_at: Some(1_700_000_000_100),
18938 content: "second".into(),
18939 extra_json: serde_json::Value::Null,
18940 snippets: Vec::new(),
18941 },
18942 ],
18943 ),
18944 )
18945 .unwrap();
18946
18947 let second = storage
18948 .insert_conversation_tree(
18949 agent_id,
18950 None,
18951 &base_conv(
18952 Some(1_700_000_004_000),
18953 vec![
18954 Message {
18955 id: None,
18956 idx: 1,
18957 role: MessageRole::Agent,
18958 author: None,
18959 created_at: Some(1_700_000_000_100),
18960 content: "second".into(),
18961 extra_json: serde_json::Value::Null,
18962 snippets: Vec::new(),
18963 },
18964 Message {
18965 id: None,
18966 idx: 2,
18967 role: MessageRole::User,
18968 author: None,
18969 created_at: Some(1_700_000_004_200),
18970 content: "third".into(),
18971 extra_json: serde_json::Value::Null,
18972 snippets: Vec::new(),
18973 },
18974 ],
18975 ),
18976 )
18977 .unwrap();
18978
18979 assert_eq!(first.conversation_id, second.conversation_id);
18980 assert_eq!(second.inserted_indices, vec![2]);
18981 }
18982
18983 #[test]
18984 fn insert_conversation_tree_keeps_single_message_overlap_sessions_separate() {
18985 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18986 use std::path::PathBuf;
18987
18988 let dir = TempDir::new().unwrap();
18989 let db_path = dir.path().join("test.db");
18990 let storage = SqliteStorage::open(&db_path).unwrap();
18991
18992 let agent = Agent {
18993 id: None,
18994 slug: "codex".into(),
18995 name: "Codex".into(),
18996 version: Some("0.2.3".into()),
18997 kind: AgentKind::Cli,
18998 };
18999 let agent_id = storage.ensure_agent(&agent).unwrap();
19000
19001 let make_conv = |started_at: i64, idx: i64, content: &str| Conversation {
19002 id: None,
19003 agent_slug: "codex".into(),
19004 workspace: Some(PathBuf::from("/tmp/workspace")),
19005 external_id: None,
19006 title: Some("Partial overlap".into()),
19007 source_path: PathBuf::from("/tmp/reused-session.jsonl"),
19008 started_at: Some(started_at),
19009 ended_at: Some(started_at + 500),
19010 approx_tokens: None,
19011 metadata_json: serde_json::Value::Null,
19012 messages: vec![Message {
19013 id: None,
19014 idx,
19015 role: MessageRole::User,
19016 author: None,
19017 created_at: Some(started_at),
19018 content: content.into(),
19019 extra_json: serde_json::Value::Null,
19020 snippets: Vec::new(),
19021 }],
19022 source_id: "local".into(),
19023 origin_host: None,
19024 };
19025
19026 storage
19027 .insert_conversation_tree(
19028 agent_id,
19029 None,
19030 &Conversation {
19031 messages: vec![
19032 Message {
19033 id: None,
19034 idx: 0,
19035 role: MessageRole::User,
19036 author: None,
19037 created_at: Some(1_700_000_000_000),
19038 content: "shared opener".into(),
19039 extra_json: serde_json::Value::Null,
19040 snippets: Vec::new(),
19041 },
19042 Message {
19043 id: None,
19044 idx: 1,
19045 role: MessageRole::Agent,
19046 author: None,
19047 created_at: Some(1_700_000_000_100),
19048 content: "first session unique".into(),
19049 extra_json: serde_json::Value::Null,
19050 snippets: Vec::new(),
19051 },
19052 ],
19053 ..make_conv(1_700_000_000_000, 0, "unused")
19054 },
19055 )
19056 .unwrap();
19057 storage
19058 .insert_conversation_tree(
19059 agent_id,
19060 None,
19061 &make_conv(1_700_000_900_000, 0, "shared opener"),
19062 )
19063 .unwrap();
19064
19065 let conversation_count: i64 = storage
19066 .conn
19067 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
19068 row.get_typed(0)
19069 })
19070 .unwrap();
19071 assert_eq!(conversation_count, 2);
19072 }
19073
19074 #[test]
19075 fn insert_conversation_tree_keeps_distinct_source_path_sessions_separate() {
19076 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19077 use std::path::PathBuf;
19078
19079 let dir = TempDir::new().unwrap();
19080 let db_path = dir.path().join("test.db");
19081 let storage = SqliteStorage::open(&db_path).unwrap();
19082
19083 let agent = Agent {
19084 id: None,
19085 slug: "codex".into(),
19086 name: "Codex".into(),
19087 version: Some("0.2.3".into()),
19088 kind: AgentKind::Cli,
19089 };
19090 let agent_id = storage.ensure_agent(&agent).unwrap();
19091
19092 let make_conv = |started_at: i64, created_at: i64, content: &str| Conversation {
19093 id: None,
19094 agent_slug: "codex".into(),
19095 workspace: Some(PathBuf::from("/tmp/workspace")),
19096 external_id: None,
19097 title: Some("Same Path Different Session".into()),
19098 source_path: PathBuf::from("/tmp/reused-session.jsonl"),
19099 started_at: Some(started_at),
19100 ended_at: Some(started_at + 500),
19101 approx_tokens: None,
19102 metadata_json: serde_json::Value::Null,
19103 messages: vec![Message {
19104 id: None,
19105 idx: 0,
19106 role: MessageRole::User,
19107 author: None,
19108 created_at: Some(created_at),
19109 content: content.into(),
19110 extra_json: serde_json::Value::Null,
19111 snippets: Vec::new(),
19112 }],
19113 source_id: "local".into(),
19114 origin_host: None,
19115 };
19116
19117 storage
19118 .insert_conversation_tree(
19119 agent_id,
19120 None,
19121 &make_conv(1_700_000_000_000, 1_700_000_000_000, "first session"),
19122 )
19123 .unwrap();
19124 storage
19125 .insert_conversation_tree(
19126 agent_id,
19127 None,
19128 &make_conv(1_700_000_900_000, 1_700_000_900_000, "second session"),
19129 )
19130 .unwrap();
19131
19132 let conversation_count: i64 = storage
19133 .conn
19134 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
19135 row.get_typed(0)
19136 })
19137 .unwrap();
19138 assert_eq!(conversation_count, 2);
19139 }
19140
19141 #[test]
19142 fn insert_conversation_tree_merges_replay_equivalent_messages_with_shifted_idx() {
19143 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19144 use std::path::PathBuf;
19145
19146 let dir = TempDir::new().unwrap();
19147 let db_path = dir.path().join("test.db");
19148 let storage = SqliteStorage::open(&db_path).unwrap();
19149
19150 let agent = Agent {
19151 id: None,
19152 slug: "codex".into(),
19153 name: "Codex".into(),
19154 version: Some("0.2.3".into()),
19155 kind: AgentKind::Cli,
19156 };
19157 let agent_id = storage.ensure_agent(&agent).unwrap();
19158
19159 let make_conv = |started_at: i64, messages: Vec<Message>| Conversation {
19160 id: None,
19161 agent_slug: "codex".into(),
19162 workspace: Some(PathBuf::from("/tmp/workspace")),
19163 external_id: None,
19164 title: Some("Shifted replay".into()),
19165 source_path: PathBuf::from("/tmp/replay-session.jsonl"),
19166 started_at: Some(started_at),
19167 ended_at: Some(started_at + 500),
19168 approx_tokens: None,
19169 metadata_json: serde_json::Value::Null,
19170 messages,
19171 source_id: "local".into(),
19172 origin_host: None,
19173 };
19174
19175 let first = storage
19176 .insert_conversation_tree(
19177 agent_id,
19178 None,
19179 &make_conv(
19180 1_700_000_000_000,
19181 vec![
19182 Message {
19183 id: None,
19184 idx: 0,
19185 role: MessageRole::User,
19186 author: None,
19187 created_at: Some(1_700_000_000_000),
19188 content: "first".into(),
19189 extra_json: serde_json::Value::Null,
19190 snippets: Vec::new(),
19191 },
19192 Message {
19193 id: None,
19194 idx: 1,
19195 role: MessageRole::Agent,
19196 author: None,
19197 created_at: Some(1_700_000_000_100),
19198 content: "second".into(),
19199 extra_json: serde_json::Value::Null,
19200 snippets: Vec::new(),
19201 },
19202 ],
19203 ),
19204 )
19205 .unwrap();
19206
19207 let second = storage
19208 .insert_conversation_tree(
19209 agent_id,
19210 None,
19211 &make_conv(
19212 1_700_000_900_000,
19213 vec![
19214 Message {
19215 id: None,
19216 idx: 10,
19217 role: MessageRole::User,
19218 author: None,
19219 created_at: Some(1_700_000_000_000),
19220 content: "first".into(),
19221 extra_json: serde_json::Value::Null,
19222 snippets: Vec::new(),
19223 },
19224 Message {
19225 id: None,
19226 idx: 11,
19227 role: MessageRole::Agent,
19228 author: None,
19229 created_at: Some(1_700_000_000_100),
19230 content: "second".into(),
19231 extra_json: serde_json::Value::Null,
19232 snippets: Vec::new(),
19233 },
19234 Message {
19235 id: None,
19236 idx: 12,
19237 role: MessageRole::User,
19238 author: None,
19239 created_at: Some(1_700_000_000_200),
19240 content: "third".into(),
19241 extra_json: serde_json::Value::Null,
19242 snippets: Vec::new(),
19243 },
19244 ],
19245 ),
19246 )
19247 .unwrap();
19248
19249 assert_eq!(first.conversation_id, second.conversation_id);
19250 assert_eq!(second.inserted_indices, vec![12]);
19251
19252 let stored_indices: Vec<i64> = storage
19253 .conn
19254 .query_map_collect(
19255 "SELECT idx FROM messages WHERE conversation_id = ?1 ORDER BY idx",
19256 fparams![first.conversation_id],
19257 |row| row.get_typed(0),
19258 )
19259 .unwrap();
19260 assert_eq!(stored_indices, vec![0, 1, 12]);
19261 }
19262
19263 #[test]
19264 fn salvage_historical_databases_imports_backups_once_and_merges_overlap() {
19265 use crate::model::types::{Conversation, Message, MessageRole};
19266 use std::path::PathBuf;
19267
19268 fn base_conv(source_path: &str, messages: Vec<Message>) -> Conversation {
19269 Conversation {
19270 id: None,
19271 agent_slug: "codex".into(),
19272 workspace: Some(PathBuf::from("/tmp/workspace")),
19273 external_id: None,
19274 title: Some("Recovered".into()),
19275 source_path: PathBuf::from(source_path),
19276 started_at: Some(1_700_000_000_000),
19277 ended_at: Some(1_700_000_000_999),
19278 approx_tokens: None,
19279 metadata_json: serde_json::Value::Null,
19280 messages,
19281 source_id: "local".into(),
19282 origin_host: None,
19283 }
19284 }
19285
19286 let dir = TempDir::new().unwrap();
19287 let canonical_db = dir.path().join("agent_search.db");
19288 let storage = SqliteStorage::open(&canonical_db).unwrap();
19289
19290 let overlapping_a = base_conv(
19291 "/tmp/shared-history.jsonl",
19292 vec![
19293 Message {
19294 id: None,
19295 idx: 0,
19296 role: MessageRole::User,
19297 author: None,
19298 created_at: Some(1_700_000_000_000),
19299 content: "first".into(),
19300 extra_json: serde_json::Value::Null,
19301 snippets: Vec::new(),
19302 },
19303 Message {
19304 id: None,
19305 idx: 1,
19306 role: MessageRole::Agent,
19307 author: None,
19308 created_at: Some(1_700_000_000_100),
19309 content: "second".into(),
19310 extra_json: serde_json::Value::Null,
19311 snippets: Vec::new(),
19312 },
19313 ],
19314 );
19315 let overlapping_b = base_conv(
19316 "/tmp/shared-history.jsonl",
19317 vec![
19318 Message {
19319 id: None,
19320 idx: 1,
19321 role: MessageRole::Agent,
19322 author: None,
19323 created_at: Some(1_700_000_000_100),
19324 content: "second".into(),
19325 extra_json: serde_json::Value::Null,
19326 snippets: Vec::new(),
19327 },
19328 Message {
19329 id: None,
19330 idx: 2,
19331 role: MessageRole::User,
19332 author: None,
19333 created_at: Some(1_700_000_000_200),
19334 content: "third".into(),
19335 extra_json: serde_json::Value::Null,
19336 snippets: Vec::new(),
19337 },
19338 ],
19339 );
19340 let unique = Conversation {
19341 source_path: PathBuf::from("/tmp/unique-history.jsonl"),
19342 messages: vec![Message {
19343 id: None,
19344 idx: 0,
19345 role: MessageRole::User,
19346 author: None,
19347 created_at: Some(1_700_000_001_000),
19348 content: "unique".into(),
19349 extra_json: serde_json::Value::Null,
19350 snippets: Vec::new(),
19351 }],
19352 started_at: Some(1_700_000_001_000),
19353 ended_at: Some(1_700_000_001_100),
19354 ..base_conv("/tmp/unique-history.jsonl", Vec::new())
19355 };
19356
19357 seed_historical_db_direct(
19358 &dir.path()
19359 .join("backups/agent_search.db.20260322T020200.bak"),
19360 std::slice::from_ref(&overlapping_a),
19361 );
19362 seed_historical_db_direct(
19363 &dir.path().join("agent_search.corrupt.20260324_212907"),
19364 &[overlapping_b, unique],
19365 );
19366
19367 let first = storage.salvage_historical_databases(&canonical_db).unwrap();
19368 assert_eq!(first.bundles_considered, 2);
19369 assert_eq!(first.bundles_imported, 2);
19370 assert_eq!(first.messages_imported, 4);
19371
19372 let conversations = storage.list_conversations(10, 0).unwrap();
19373 assert_eq!(conversations.len(), 2);
19374
19375 let shared_id = conversations
19376 .iter()
19377 .find(|conv| conv.source_path == std::path::Path::new("/tmp/shared-history.jsonl"))
19378 .and_then(|conv| conv.id)
19379 .unwrap();
19380 let shared_indices: Vec<i64> = storage
19381 .fetch_messages(shared_id)
19382 .unwrap()
19383 .into_iter()
19384 .map(|msg| msg.idx)
19385 .collect();
19386 assert_eq!(shared_indices, vec![0, 1, 2]);
19387
19388 let second = storage.salvage_historical_databases(&canonical_db).unwrap();
19389 assert_eq!(second.bundles_imported, 0);
19390 assert_eq!(second.messages_imported, 0);
19391 }
19392
19393 #[test]
19394 fn salvage_historical_databases_normalizes_host_only_remote_provenance() {
19395 use crate::model::types::{Conversation, Message, MessageRole};
19396 use std::path::PathBuf;
19397
19398 let dir = TempDir::new().unwrap();
19399 let canonical_db = dir.path().join("agent_search.db");
19400 let storage = SqliteStorage::open(&canonical_db).unwrap();
19401
19402 let host_only_remote = Conversation {
19403 id: None,
19404 agent_slug: "codex".into(),
19405 workspace: Some(PathBuf::from("/tmp/workspace")),
19406 external_id: None,
19407 title: Some("Recovered Host Only Remote".into()),
19408 source_path: PathBuf::from("/tmp/host-only-history.jsonl"),
19409 started_at: Some(1_700_000_000_000),
19410 ended_at: Some(1_700_000_000_999),
19411 approx_tokens: None,
19412 metadata_json: serde_json::Value::Null,
19413 messages: vec![Message {
19414 id: None,
19415 idx: 0,
19416 role: MessageRole::User,
19417 author: None,
19418 created_at: Some(1_700_000_000_000),
19419 content: "host-only remote".into(),
19420 extra_json: serde_json::Value::Null,
19421 snippets: Vec::new(),
19422 }],
19423 source_id: " ".into(),
19424 origin_host: Some("builder-5".into()),
19425 };
19426
19427 let historical_db = dir
19428 .path()
19429 .join("backups/agent_search.db.20260322T020200.bak");
19430 seed_historical_db_direct(&historical_db, std::slice::from_ref(&host_only_remote));
19431
19432 let historical_conn =
19433 FrankenConnection::open(historical_db.to_string_lossy().into_owned()).unwrap();
19434 historical_conn
19435 .execute_compat(
19436 "INSERT INTO sources(id, kind, host_label, created_at, updated_at) VALUES(?1, ?2, ?3, ?4, ?5)",
19437 fparams![" ", "ssh", "builder-5", 0_i64, 0_i64],
19438 )
19439 .unwrap();
19440 historical_conn
19441 .execute_compat(
19442 "UPDATE conversations SET source_id = ?1, origin_host = ?2 WHERE source_path = ?3",
19443 fparams![" ", "builder-5", "/tmp/host-only-history.jsonl"],
19444 )
19445 .unwrap();
19446 historical_conn
19447 .execute_compat("DELETE FROM sources WHERE id = ?1", fparams!["builder-5"])
19448 .unwrap();
19449 drop(historical_conn);
19450
19451 let first = storage.salvage_historical_databases(&canonical_db).unwrap();
19452 assert_eq!(first.bundles_imported, 1);
19453 assert_eq!(first.messages_imported, 1);
19454
19455 let source_ids = storage.get_source_ids().unwrap();
19456 assert_eq!(source_ids, vec!["builder-5".to_string()]);
19457
19458 let conversations = storage.list_conversations(10, 0).unwrap();
19459 assert_eq!(conversations.len(), 1);
19460 assert_eq!(conversations[0].source_id, "builder-5");
19461 assert_eq!(conversations[0].origin_host.as_deref(), Some("builder-5"));
19462 }
19463
19464 #[test]
19465 fn historical_salvage_retry_splits_single_conversation_until_it_fits() {
19466 use crate::model::types::{Conversation, Message, MessageRole};
19467 use std::path::PathBuf;
19468
19469 let mut attempts: Vec<Vec<usize>> = Vec::new();
19470 let entry = HistoricalBatchEntry {
19471 source_row_id: 77,
19472 agent_id: 1,
19473 workspace_id: None,
19474 conversation: Conversation {
19475 id: None,
19476 agent_slug: "gemini".into(),
19477 workspace: Some(PathBuf::from("/tmp/workspace")),
19478 external_id: Some("conv-77".into()),
19479 title: Some("Large recovered conversation".into()),
19480 source_path: PathBuf::from("/tmp/history.jsonl"),
19481 started_at: Some(1_700_000_000_000),
19482 ended_at: Some(1_700_000_000_999),
19483 approx_tokens: None,
19484 metadata_json: serde_json::Value::Null,
19485 messages: (0..4)
19486 .map(|idx| Message {
19487 id: None,
19488 idx,
19489 role: MessageRole::User,
19490 author: None,
19491 created_at: Some(1_700_000_000_000 + idx),
19492 content: format!("message-{idx}"),
19493 extra_json: serde_json::Value::Null,
19494 snippets: Vec::new(),
19495 })
19496 .collect(),
19497 source_id: LOCAL_SOURCE_ID.into(),
19498 origin_host: None,
19499 },
19500 };
19501
19502 let totals = SqliteStorage::import_historical_batch_with_retry(
19503 std::slice::from_ref(&entry),
19504 &mut |batch| {
19505 attempts.push(
19506 batch
19507 .iter()
19508 .map(|entry| entry.conversation.messages.len())
19509 .collect(),
19510 );
19511 let total_messages: usize = batch
19512 .iter()
19513 .map(|entry| entry.conversation.messages.len())
19514 .sum();
19515 if total_messages > 1 {
19516 Err(anyhow!("out of memory"))
19517 } else {
19518 Ok(HistoricalBatchImportTotals {
19519 inserted_source_rows: batch.len(),
19520 inserted_messages: total_messages,
19521 })
19522 }
19523 },
19524 )
19525 .unwrap();
19526
19527 assert_eq!(
19528 totals,
19529 HistoricalBatchImportTotals {
19530 inserted_source_rows: 1,
19531 inserted_messages: 4,
19532 }
19533 );
19534 assert_eq!(attempts.first().cloned(), Some(vec![4]));
19535 assert!(
19536 attempts.iter().filter(|sizes| sizes == &&vec![1]).count() >= 4,
19537 "expected recursive fallback to reach one-message slices"
19538 );
19539 }
19540
19541 #[test]
19542 fn salvage_historical_databases_resumes_from_progress_checkpoint() {
19543 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19544 use std::path::PathBuf;
19545
19546 fn make_conv(source_path: &str, idx_seed: i64) -> Conversation {
19547 Conversation {
19548 id: None,
19549 agent_slug: "codex".into(),
19550 workspace: Some(PathBuf::from("/tmp/workspace")),
19551 external_id: Some(format!("conv-{idx_seed}")),
19552 title: Some(format!("Recovered {idx_seed}")),
19553 source_path: PathBuf::from(source_path),
19554 started_at: Some(1_700_000_000_000 + idx_seed),
19555 ended_at: Some(1_700_000_000_100 + idx_seed),
19556 approx_tokens: None,
19557 metadata_json: serde_json::Value::Null,
19558 messages: vec![Message {
19559 id: None,
19560 idx: 0,
19561 role: MessageRole::User,
19562 author: None,
19563 created_at: Some(1_700_000_000_000 + idx_seed),
19564 content: format!("message-{idx_seed}"),
19565 extra_json: serde_json::Value::Null,
19566 snippets: Vec::new(),
19567 }],
19568 source_id: LOCAL_SOURCE_ID.into(),
19569 origin_host: None,
19570 }
19571 }
19572
19573 let dir = TempDir::new().unwrap();
19574 let canonical_db = dir.path().join("agent_search.db");
19575 let backup_db = dir
19576 .path()
19577 .join("backups/agent_search.db.20260322T020200.bak");
19578 let storage = SqliteStorage::open(&canonical_db).unwrap();
19579 let conv_a = make_conv("/tmp/one.jsonl", 1);
19580 let conv_b = make_conv("/tmp/two.jsonl", 2);
19581 let conv_c = make_conv("/tmp/three.jsonl", 3);
19582 seed_historical_db_direct(
19583 &backup_db,
19584 &[conv_a.clone(), conv_b.clone(), conv_c.clone()],
19585 );
19586
19587 let agent = Agent {
19588 id: None,
19589 slug: "codex".into(),
19590 name: "Codex".into(),
19591 version: Some("0.2.3".into()),
19592 kind: AgentKind::Cli,
19593 };
19594 let agent_id = storage.ensure_agent(&agent).unwrap();
19595 storage
19596 .insert_conversation_tree(agent_id, None, &conv_a)
19597 .unwrap();
19598
19599 let bundle = discover_historical_database_bundles(&canonical_db)
19600 .into_iter()
19601 .find(|bundle| bundle.root_path == backup_db)
19602 .unwrap();
19603 let first_row_id: i64 = FrankenConnection::open(backup_db.to_string_lossy().into_owned())
19604 .unwrap()
19605 .query_row_map(
19606 "SELECT id FROM conversations WHERE source_path = ?1",
19607 fparams!["/tmp/one.jsonl"],
19608 |row| row.get_typed(0),
19609 )
19610 .unwrap();
19611 storage
19612 .record_historical_bundle_progress(&bundle, "direct-readonly", first_row_id, 50, 99)
19613 .unwrap();
19614
19615 let outcome = storage.salvage_historical_databases(&canonical_db).unwrap();
19616 assert_eq!(outcome.bundles_imported, 1);
19617 assert_eq!(outcome.conversations_imported, 52);
19618 assert_eq!(outcome.messages_imported, 101);
19619 assert_eq!(storage.list_conversations(10, 0).unwrap().len(), 3);
19620
19621 let progress_key = SqliteStorage::historical_bundle_progress_key(&bundle);
19622 let progress_left: Option<String> = storage
19623 .conn
19624 .query_row_map(
19625 "SELECT value FROM meta WHERE key = ?1",
19626 fparams![progress_key.as_str()],
19627 |row| row.get_typed(0),
19628 )
19629 .optional()
19630 .unwrap();
19631 assert!(
19632 progress_left.is_none(),
19633 "completed salvage should clear bundle progress"
19634 );
19635
19636 let second = storage.salvage_historical_databases(&canonical_db).unwrap();
19637 assert_eq!(second.bundles_imported, 0);
19638 assert_eq!(second.messages_imported, 0);
19639 }
19640
19641 #[test]
19642 fn salvage_historical_databases_skips_bundle_when_checkpoint_covers_backup() {
19643 use crate::model::types::{Conversation, Message, MessageRole};
19649 use std::path::PathBuf;
19650
19651 fn make_conv(source_path: &str, idx_seed: i64) -> Conversation {
19652 Conversation {
19653 id: None,
19654 agent_slug: "codex".into(),
19655 workspace: Some(PathBuf::from("/tmp/workspace")),
19656 external_id: Some(format!("conv-{idx_seed}")),
19657 title: Some(format!("Recovered {idx_seed}")),
19658 source_path: PathBuf::from(source_path),
19659 started_at: Some(1_700_000_000_000 + idx_seed),
19660 ended_at: Some(1_700_000_000_100 + idx_seed),
19661 approx_tokens: None,
19662 metadata_json: serde_json::Value::Null,
19663 messages: vec![Message {
19664 id: None,
19665 idx: 0,
19666 role: MessageRole::User,
19667 author: None,
19668 created_at: Some(1_700_000_000_000 + idx_seed),
19669 content: format!("message-{idx_seed}"),
19670 extra_json: serde_json::Value::Null,
19671 snippets: Vec::new(),
19672 }],
19673 source_id: LOCAL_SOURCE_ID.into(),
19674 origin_host: None,
19675 }
19676 }
19677
19678 let dir = TempDir::new().unwrap();
19679 let canonical_db = dir.path().join("agent_search.db");
19680 let backup_db = dir
19681 .path()
19682 .join("backups/agent_search.db.20260322T020200.bak");
19683 let storage = SqliteStorage::open(&canonical_db).unwrap();
19684 seed_historical_db_direct(
19685 &backup_db,
19686 &[
19687 make_conv("/tmp/one.jsonl", 1),
19688 make_conv("/tmp/two.jsonl", 2),
19689 make_conv("/tmp/three.jsonl", 3),
19690 ],
19691 );
19692
19693 let bundle = discover_historical_database_bundles(&canonical_db)
19694 .into_iter()
19695 .find(|bundle| bundle.root_path == backup_db)
19696 .unwrap();
19697
19698 let backup_max_id: i64 = FrankenConnection::open(backup_db.to_string_lossy().into_owned())
19700 .unwrap()
19701 .query_row_map(
19702 "SELECT COALESCE(MAX(id), 0) FROM conversations",
19703 fparams![],
19704 |row| row.get_typed(0),
19705 )
19706 .unwrap();
19707 assert!(backup_max_id > 0, "seeded backup should have conversations");
19708 storage
19709 .record_historical_bundle_progress(&bundle, "direct-readonly", backup_max_id, 3, 3)
19710 .unwrap();
19711
19712 let outcome = storage.salvage_historical_databases(&canonical_db).unwrap();
19713 assert_eq!(
19714 outcome.bundles_imported, 0,
19715 "fully-checkpointed bundle must not be re-scanned"
19716 );
19717 assert_eq!(outcome.conversations_imported, 0);
19718 assert_eq!(outcome.messages_imported, 0);
19719 assert_eq!(
19720 storage.list_conversations(10, 0).unwrap().len(),
19721 0,
19722 "skip path must not import anything"
19723 );
19724 assert!(
19725 storage.historical_bundle_already_imported(&bundle).unwrap(),
19726 "skipped bundle must be ledgered as salvaged so future runs short-circuit"
19727 );
19728
19729 let progress_key = SqliteStorage::historical_bundle_progress_key(&bundle);
19730 let progress_left: Option<String> = storage
19731 .conn
19732 .query_row_map(
19733 "SELECT value FROM meta WHERE key = ?1",
19734 fparams![progress_key.as_str()],
19735 |row| row.get_typed(0),
19736 )
19737 .optional()
19738 .unwrap();
19739 assert!(
19740 progress_left.is_none(),
19741 "skip path must clear the bundle progress checkpoint"
19742 );
19743 }
19744
19745 #[test]
19746 fn list_conversations_for_lexical_rebuild_uses_stable_id_order() {
19747 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19748 use std::path::PathBuf;
19749
19750 let dir = TempDir::new().unwrap();
19751 let db_path = dir.path().join("agent_search.db");
19752 let storage = SqliteStorage::open(&db_path).unwrap();
19753 let agent = Agent {
19754 id: None,
19755 slug: "codex".into(),
19756 name: "Codex".into(),
19757 version: Some("0.2.3".into()),
19758 kind: AgentKind::Cli,
19759 };
19760 let agent_id = storage.ensure_agent(&agent).unwrap();
19761
19762 let make_conv = |source_path: &str, started_at: i64| Conversation {
19763 id: None,
19764 agent_slug: "codex".into(),
19765 workspace: Some(PathBuf::from("/tmp/workspace")),
19766 external_id: Some(source_path.to_string()),
19767 title: Some(source_path.to_string()),
19768 source_path: PathBuf::from(source_path),
19769 started_at: Some(started_at),
19770 ended_at: Some(started_at + 1),
19771 approx_tokens: None,
19772 metadata_json: serde_json::Value::Null,
19773 messages: vec![Message {
19774 id: None,
19775 idx: 0,
19776 role: MessageRole::User,
19777 author: None,
19778 created_at: Some(started_at),
19779 content: format!("message for {source_path}"),
19780 extra_json: serde_json::Value::Null,
19781 snippets: Vec::new(),
19782 }],
19783 source_id: LOCAL_SOURCE_ID.into(),
19784 origin_host: None,
19785 };
19786
19787 let conv_a = make_conv("/tmp/a.jsonl", 3_000);
19788 let conv_b = make_conv("/tmp/b.jsonl", 1_000);
19789 let conv_c = make_conv("/tmp/c.jsonl", 2_000);
19790
19791 storage
19792 .insert_conversation_tree(agent_id, None, &conv_a)
19793 .unwrap();
19794 storage
19795 .insert_conversation_tree(agent_id, None, &conv_b)
19796 .unwrap();
19797 storage
19798 .insert_conversation_tree(agent_id, None, &conv_c)
19799 .unwrap();
19800
19801 let user_order: Vec<PathBuf> = storage
19802 .list_conversations(10, 0)
19803 .unwrap()
19804 .into_iter()
19805 .map(|conv| conv.source_path)
19806 .collect();
19807 assert_eq!(
19808 user_order,
19809 vec![
19810 PathBuf::from("/tmp/a.jsonl"),
19811 PathBuf::from("/tmp/c.jsonl"),
19812 PathBuf::from("/tmp/b.jsonl"),
19813 ]
19814 );
19815
19816 let (agent_slugs, workspace_paths) = storage.build_lexical_rebuild_lookups().unwrap();
19817 let rebuild_order: Vec<PathBuf> = storage
19818 .list_conversations_for_lexical_rebuild_after_id(10, 0, &agent_slugs, &workspace_paths)
19819 .unwrap()
19820 .into_iter()
19821 .map(|conv| conv.source_path)
19822 .collect();
19823 assert_eq!(
19824 rebuild_order,
19825 vec![
19826 PathBuf::from("/tmp/a.jsonl"),
19827 PathBuf::from("/tmp/b.jsonl"),
19828 PathBuf::from("/tmp/c.jsonl"),
19829 ]
19830 );
19831
19832 let first_page = storage
19833 .list_conversations_for_lexical_rebuild_after_id(2, 0, &agent_slugs, &workspace_paths)
19834 .unwrap();
19835 let first_page_paths: Vec<PathBuf> = first_page
19836 .iter()
19837 .map(|conv| conv.source_path.clone())
19838 .collect();
19839 assert_eq!(
19840 first_page_paths,
19841 vec![PathBuf::from("/tmp/a.jsonl"), PathBuf::from("/tmp/b.jsonl")]
19842 );
19843
19844 let second_page = storage
19845 .list_conversations_for_lexical_rebuild_after_id(
19846 2,
19847 first_page
19848 .last()
19849 .and_then(|conv| conv.id)
19850 .expect("first page should include an id"),
19851 &agent_slugs,
19852 &workspace_paths,
19853 )
19854 .unwrap();
19855 let second_page_paths: Vec<PathBuf> = second_page
19856 .iter()
19857 .map(|conv| conv.source_path.clone())
19858 .collect();
19859 assert_eq!(second_page_paths, vec![PathBuf::from("/tmp/c.jsonl")]);
19860
19861 let bounded_page = storage
19862 .list_conversations_for_lexical_rebuild_after_id_through_id(
19863 10,
19864 0,
19865 first_page
19866 .last()
19867 .and_then(|conv| conv.id)
19868 .expect("first page should include an id"),
19869 &agent_slugs,
19870 &workspace_paths,
19871 )
19872 .unwrap();
19873 let bounded_paths: Vec<PathBuf> = bounded_page
19874 .iter()
19875 .map(|conv| conv.source_path.clone())
19876 .collect();
19877 assert_eq!(
19878 bounded_paths,
19879 vec![PathBuf::from("/tmp/a.jsonl"), PathBuf::from("/tmp/b.jsonl")]
19880 );
19881 }
19882
19883 #[test]
19884 fn keyset_traversal_handles_sparse_holey_conversation_ids() {
19885 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19886 use std::path::PathBuf;
19887
19888 let dir = TempDir::new().unwrap();
19889 let db_path = dir.path().join("agent_search.db");
19890 let storage = SqliteStorage::open(&db_path).unwrap();
19891 let agent = Agent {
19892 id: None,
19893 slug: "codex".into(),
19894 name: "Codex".into(),
19895 version: Some("0.2.3".into()),
19896 kind: AgentKind::Cli,
19897 };
19898 let agent_id = storage.ensure_agent(&agent).unwrap();
19899
19900 let make_conv = |label: &str, ts: i64| Conversation {
19901 id: None,
19902 agent_slug: "codex".into(),
19903 workspace: Some(PathBuf::from("/tmp/workspace")),
19904 external_id: Some(label.to_string()),
19905 title: Some(label.to_string()),
19906 source_path: PathBuf::from(format!("/tmp/{label}.jsonl")),
19907 started_at: Some(ts),
19908 ended_at: Some(ts + 1),
19909 approx_tokens: None,
19910 metadata_json: serde_json::Value::Null,
19911 messages: vec![Message {
19912 id: None,
19913 idx: 0,
19914 role: MessageRole::User,
19915 author: None,
19916 created_at: Some(ts),
19917 content: format!("msg for {label}"),
19918 extra_json: serde_json::Value::Null,
19919 snippets: Vec::new(),
19920 }],
19921 source_id: LOCAL_SOURCE_ID.into(),
19922 origin_host: None,
19923 };
19924
19925 for i in 0..6 {
19926 storage
19927 .insert_conversation_tree(
19928 agent_id,
19929 None,
19930 &make_conv(&format!("conv-{i}"), 1000 + i),
19931 )
19932 .unwrap();
19933 }
19934
19935 storage.conn.execute("PRAGMA foreign_keys = OFF").unwrap();
19936 storage
19937 .conn
19938 .execute_compat("DELETE FROM conversations WHERE id IN (2, 4)", fparams![])
19939 .unwrap();
19940 storage
19941 .conn
19942 .execute_compat(
19943 "DELETE FROM messages WHERE conversation_id IN (2, 4)",
19944 fparams![],
19945 )
19946 .unwrap();
19947 storage.conn.execute("PRAGMA foreign_keys = ON").unwrap();
19948
19949 let (agent_slugs, workspace_paths) = storage.build_lexical_rebuild_lookups().unwrap();
19950
19951 let page1 = storage
19952 .list_conversations_for_lexical_rebuild_after_id(2, 0, &agent_slugs, &workspace_paths)
19953 .unwrap();
19954 assert_eq!(page1.len(), 2);
19955 let page1_ids: Vec<i64> = page1.iter().map(|c| c.id.unwrap()).collect();
19956 assert_eq!(page1_ids, vec![1, 3]);
19957
19958 let page2 = storage
19959 .list_conversations_for_lexical_rebuild_after_id(
19960 2,
19961 *page1_ids.last().unwrap(),
19962 &agent_slugs,
19963 &workspace_paths,
19964 )
19965 .unwrap();
19966 assert_eq!(page2.len(), 2);
19967 let page2_ids: Vec<i64> = page2.iter().map(|c| c.id.unwrap()).collect();
19968 assert_eq!(page2_ids, vec![5, 6]);
19969
19970 let page3 = storage
19971 .list_conversations_for_lexical_rebuild_after_id(
19972 2,
19973 *page2_ids.last().unwrap(),
19974 &agent_slugs,
19975 &workspace_paths,
19976 )
19977 .unwrap();
19978 assert!(page3.is_empty());
19979
19980 let all_ids: Vec<i64> = page1_ids.iter().chain(page2_ids.iter()).copied().collect();
19981 assert_eq!(all_ids, vec![1, 3, 5, 6]);
19982 }
19983
19984 #[test]
19985 fn keyset_traversal_through_id_with_sparse_ranges() {
19986 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19987 use std::path::PathBuf;
19988
19989 let dir = TempDir::new().unwrap();
19990 let db_path = dir.path().join("agent_search.db");
19991 let storage = SqliteStorage::open(&db_path).unwrap();
19992 let agent = Agent {
19993 id: None,
19994 slug: "codex".into(),
19995 name: "Codex".into(),
19996 version: Some("0.2.3".into()),
19997 kind: AgentKind::Cli,
19998 };
19999 let agent_id = storage.ensure_agent(&agent).unwrap();
20000
20001 let make_conv = |label: &str, ts: i64| Conversation {
20002 id: None,
20003 agent_slug: "codex".into(),
20004 workspace: Some(PathBuf::from("/tmp/workspace")),
20005 external_id: Some(label.to_string()),
20006 title: Some(label.to_string()),
20007 source_path: PathBuf::from(format!("/tmp/{label}.jsonl")),
20008 started_at: Some(ts),
20009 ended_at: Some(ts + 1),
20010 approx_tokens: None,
20011 metadata_json: serde_json::Value::Null,
20012 messages: vec![Message {
20013 id: None,
20014 idx: 0,
20015 role: MessageRole::User,
20016 author: None,
20017 created_at: Some(ts),
20018 content: format!("msg for {label}"),
20019 extra_json: serde_json::Value::Null,
20020 snippets: Vec::new(),
20021 }],
20022 source_id: LOCAL_SOURCE_ID.into(),
20023 origin_host: None,
20024 };
20025
20026 for i in 0..10 {
20027 storage
20028 .insert_conversation_tree(
20029 agent_id,
20030 None,
20031 &make_conv(&format!("conv-{i}"), 1000 + i),
20032 )
20033 .unwrap();
20034 }
20035
20036 storage.conn.execute("PRAGMA foreign_keys = OFF").unwrap();
20037 storage
20038 .conn
20039 .execute_compat(
20040 "DELETE FROM conversations WHERE id IN (3, 5, 7, 8)",
20041 fparams![],
20042 )
20043 .unwrap();
20044 storage
20045 .conn
20046 .execute_compat(
20047 "DELETE FROM messages WHERE conversation_id IN (3, 5, 7, 8)",
20048 fparams![],
20049 )
20050 .unwrap();
20051 storage.conn.execute("PRAGMA foreign_keys = ON").unwrap();
20052
20053 let (agent_slugs, workspace_paths) = storage.build_lexical_rebuild_lookups().unwrap();
20054
20055 let through_5 = storage
20056 .list_conversations_for_lexical_rebuild_after_id_through_id(
20057 100,
20058 0,
20059 5,
20060 &agent_slugs,
20061 &workspace_paths,
20062 )
20063 .unwrap();
20064 let through_5_ids: Vec<i64> = through_5.iter().map(|c| c.id.unwrap()).collect();
20065 assert_eq!(through_5_ids, vec![1, 2, 4]);
20066
20067 let after_4_through_10 = storage
20068 .list_conversations_for_lexical_rebuild_after_id_through_id(
20069 100,
20070 4,
20071 10,
20072 &agent_slugs,
20073 &workspace_paths,
20074 )
20075 .unwrap();
20076 let ids: Vec<i64> = after_4_through_10.iter().map(|c| c.id.unwrap()).collect();
20077 assert_eq!(ids, vec![6, 9, 10]);
20078
20079 let after_10 = storage
20080 .list_conversations_for_lexical_rebuild_after_id_through_id(
20081 100,
20082 10,
20083 20,
20084 &agent_slugs,
20085 &workspace_paths,
20086 )
20087 .unwrap();
20088 assert!(after_10.is_empty());
20089 }
20090
20091 #[test]
20092 fn list_conversation_footprints_for_lexical_rebuild_estimates_bytes_and_keeps_empty_conversations()
20093 {
20094 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20095 use std::path::PathBuf;
20096
20097 let dir = TempDir::new().unwrap();
20098 let db_path = dir.path().join("agent_search.db");
20099 let storage = SqliteStorage::open(&db_path).unwrap();
20100 let agent = Agent {
20101 id: None,
20102 slug: "codex".into(),
20103 name: "Codex".into(),
20104 version: Some("0.2.3".into()),
20105 kind: AgentKind::Cli,
20106 };
20107 let agent_id = storage.ensure_agent(&agent).unwrap();
20108
20109 let insert = |external_id: &str, base_ts: i64, messages: Vec<Message>| {
20110 storage
20111 .insert_conversation_tree(
20112 agent_id,
20113 None,
20114 &Conversation {
20115 id: None,
20116 agent_slug: "codex".into(),
20117 workspace: Some(PathBuf::from("/tmp/workspace")),
20118 external_id: Some(external_id.to_string()),
20119 title: Some(external_id.to_string()),
20120 source_path: PathBuf::from(format!("/tmp/{external_id}.jsonl")),
20121 started_at: Some(base_ts),
20122 ended_at: Some(base_ts + 100),
20123 approx_tokens: None,
20124 metadata_json: serde_json::Value::Null,
20125 messages,
20126 source_id: LOCAL_SOURCE_ID.into(),
20127 origin_host: None,
20128 },
20129 )
20130 .unwrap()
20131 .conversation_id
20132 };
20133
20134 let ascii_id = insert(
20135 "footprint-ascii",
20136 1_700_000_000_000,
20137 vec![
20138 Message {
20139 id: None,
20140 idx: 0,
20141 role: MessageRole::User,
20142 author: None,
20143 created_at: Some(1_700_000_000_001),
20144 content: "abc".into(),
20145 extra_json: serde_json::Value::Null,
20146 snippets: Vec::new(),
20147 },
20148 Message {
20149 id: None,
20150 idx: 1,
20151 role: MessageRole::Agent,
20152 author: None,
20153 created_at: Some(1_700_000_000_002),
20154 content: "defg".into(),
20155 extra_json: serde_json::Value::Null,
20156 snippets: Vec::new(),
20157 },
20158 ],
20159 );
20160 let empty_id = insert("footprint-empty", 1_700_000_001_000, Vec::new());
20161 let utf8_id = insert(
20162 "footprint-utf8",
20163 1_700_000_002_000,
20164 vec![Message {
20165 id: None,
20166 idx: 0,
20167 role: MessageRole::Tool,
20168 author: None,
20169 created_at: Some(1_700_000_002_001),
20170 content: "hé🙂".into(),
20171 extra_json: serde_json::Value::Null,
20172 snippets: Vec::new(),
20173 }],
20174 );
20175 let sparse_id = insert(
20176 "footprint-sparse",
20177 1_700_000_003_000,
20178 vec![Message {
20179 id: None,
20180 idx: 10,
20181 role: MessageRole::User,
20182 author: None,
20183 created_at: Some(1_700_000_003_010),
20184 content: "sparse".into(),
20185 extra_json: serde_json::Value::Null,
20186 snippets: Vec::new(),
20187 }],
20188 );
20189 storage
20190 .conn
20191 .execute_compat(
20192 "DELETE FROM conversation_tail_state WHERE conversation_id = ?1",
20193 fparams![utf8_id],
20194 )
20195 .unwrap();
20196
20197 let footprints = storage
20198 .list_conversation_footprints_for_lexical_rebuild()
20199 .unwrap();
20200 assert_eq!(
20201 footprints,
20202 vec![
20203 LexicalRebuildConversationFootprintRow {
20204 conversation_id: ascii_id,
20205 message_count: 2,
20206 message_bytes: 2 * LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
20207 },
20208 LexicalRebuildConversationFootprintRow {
20209 conversation_id: empty_id,
20210 message_count: 0,
20211 message_bytes: 0,
20212 },
20213 LexicalRebuildConversationFootprintRow {
20214 conversation_id: utf8_id,
20215 message_count: 1,
20216 message_bytes: LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
20217 },
20218 LexicalRebuildConversationFootprintRow {
20219 conversation_id: sparse_id,
20220 message_count: 11,
20221 message_bytes: 11 * LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
20222 },
20223 ]
20224 );
20225 }
20226
20227 #[test]
20228 fn list_conversation_footprints_for_lexical_rebuild_falls_back_for_missing_tail_cache() {
20229 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20230 use std::path::PathBuf;
20231
20232 let dir = TempDir::new().unwrap();
20233 let db_path = dir.path().join("agent_search.db");
20234 let storage = SqliteStorage::open(&db_path).unwrap();
20235 let agent = Agent {
20236 id: None,
20237 slug: "codex".into(),
20238 name: "Codex".into(),
20239 version: Some("0.2.3".into()),
20240 kind: AgentKind::Cli,
20241 };
20242 let agent_id = storage.ensure_agent(&agent).unwrap();
20243 let conversation_id = storage
20244 .insert_conversation_tree(
20245 agent_id,
20246 None,
20247 &Conversation {
20248 id: None,
20249 agent_slug: "codex".into(),
20250 workspace: Some(PathBuf::from("/tmp/workspace")),
20251 external_id: Some("footprint-missing-tail".to_string()),
20252 title: Some("footprint-missing-tail".to_string()),
20253 source_path: PathBuf::from("/tmp/footprint-missing-tail.jsonl"),
20254 started_at: Some(1_700_000_000_000),
20255 ended_at: Some(1_700_000_000_100),
20256 approx_tokens: None,
20257 metadata_json: serde_json::Value::Null,
20258 messages: vec![Message {
20259 id: None,
20260 idx: 10,
20261 role: MessageRole::User,
20262 author: None,
20263 created_at: Some(1_700_000_000_010),
20264 content: "legacy sparse tail".into(),
20265 extra_json: serde_json::Value::Null,
20266 snippets: Vec::new(),
20267 }],
20268 source_id: LOCAL_SOURCE_ID.into(),
20269 origin_host: None,
20270 },
20271 )
20272 .unwrap()
20273 .conversation_id;
20274
20275 storage
20276 .conn
20277 .execute_compat(
20278 "UPDATE conversations
20279 SET last_message_idx = NULL, last_message_created_at = NULL
20280 WHERE id = ?1",
20281 fparams![conversation_id],
20282 )
20283 .unwrap();
20284 storage
20285 .conn
20286 .execute_compat(
20287 "DELETE FROM conversation_tail_state WHERE conversation_id = ?1",
20288 fparams![conversation_id],
20289 )
20290 .unwrap();
20291
20292 let footprints = storage
20293 .list_conversation_footprints_for_lexical_rebuild()
20294 .unwrap();
20295
20296 assert_eq!(
20297 footprints,
20298 vec![LexicalRebuildConversationFootprintRow {
20299 conversation_id,
20300 message_count: 11,
20301 message_bytes: 11 * LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
20302 }],
20303 "missing tail-cache metadata should fall back to messages MAX(idx) instead of treating legacy conversations as empty"
20304 );
20305 }
20306
20307 #[test]
20308 fn list_conversation_footprints_for_lexical_rebuild_raises_stale_low_tail_cache() {
20309 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20310 use std::path::PathBuf;
20311
20312 let dir = TempDir::new().unwrap();
20313 let db_path = dir.path().join("agent_search.db");
20314 let storage = SqliteStorage::open(&db_path).unwrap();
20315 let agent = Agent {
20316 id: None,
20317 slug: "codex".into(),
20318 name: "Codex".into(),
20319 version: Some("0.2.3".into()),
20320 kind: AgentKind::Cli,
20321 };
20322 let agent_id = storage.ensure_agent(&agent).unwrap();
20323 let conversation_id = storage
20324 .insert_conversation_tree(
20325 agent_id,
20326 None,
20327 &Conversation {
20328 id: None,
20329 agent_slug: "codex".into(),
20330 workspace: Some(PathBuf::from("/tmp/workspace")),
20331 external_id: Some("footprint-stale-tail".to_string()),
20332 title: Some("footprint-stale-tail".to_string()),
20333 source_path: PathBuf::from("/tmp/footprint-stale-tail.jsonl"),
20334 started_at: Some(1_700_000_000_000),
20335 ended_at: Some(1_700_000_000_100),
20336 approx_tokens: None,
20337 metadata_json: serde_json::Value::Null,
20338 messages: (0..3)
20339 .map(|idx| Message {
20340 id: None,
20341 idx,
20342 role: MessageRole::User,
20343 author: None,
20344 created_at: Some(1_700_000_000_010 + idx),
20345 content: format!("message {idx}"),
20346 extra_json: serde_json::Value::Null,
20347 snippets: Vec::new(),
20348 })
20349 .collect(),
20350 source_id: LOCAL_SOURCE_ID.into(),
20351 origin_host: None,
20352 },
20353 )
20354 .unwrap()
20355 .conversation_id;
20356
20357 storage
20358 .conn
20359 .execute_compat(
20360 "UPDATE conversations
20361 SET last_message_idx = 0, last_message_created_at = 1700000000010
20362 WHERE id = ?1",
20363 fparams![conversation_id],
20364 )
20365 .unwrap();
20366 storage
20367 .conn
20368 .execute_compat(
20369 "UPDATE conversation_tail_state
20370 SET last_message_idx = 0, last_message_created_at = 1700000000010
20371 WHERE conversation_id = ?1",
20372 fparams![conversation_id],
20373 )
20374 .unwrap();
20375
20376 let footprints = storage
20377 .list_conversation_footprints_for_lexical_rebuild()
20378 .unwrap();
20379
20380 assert_eq!(
20381 footprints,
20382 vec![LexicalRebuildConversationFootprintRow {
20383 conversation_id,
20384 message_count: 3,
20385 message_bytes: 3 * LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
20386 }],
20387 "stale-low tail caches must not under-plan lexical shards and trip doc>plan invariants"
20388 );
20389 }
20390
20391 #[test]
20392 fn list_conversation_footprints_for_lexical_rebuild_tolerates_missing_tail_state_table() {
20393 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20394 use std::path::PathBuf;
20395
20396 let dir = TempDir::new().unwrap();
20397 let db_path = dir.path().join("agent_search.db");
20398 let storage = SqliteStorage::open(&db_path).unwrap();
20399 let agent = Agent {
20400 id: None,
20401 slug: "codex".into(),
20402 name: "Codex".into(),
20403 version: Some("0.2.3".into()),
20404 kind: AgentKind::Cli,
20405 };
20406 let agent_id = storage.ensure_agent(&agent).unwrap();
20407 let conversation_id = storage
20408 .insert_conversation_tree(
20409 agent_id,
20410 None,
20411 &Conversation {
20412 id: None,
20413 agent_slug: "codex".into(),
20414 workspace: Some(PathBuf::from("/tmp/workspace")),
20415 external_id: Some("footprint-missing-tail-table".to_string()),
20416 title: Some("footprint-missing-tail-table".to_string()),
20417 source_path: PathBuf::from("/tmp/footprint-missing-tail-table.jsonl"),
20418 started_at: Some(1_700_000_000_000),
20419 ended_at: Some(1_700_000_000_100),
20420 approx_tokens: None,
20421 metadata_json: serde_json::Value::Null,
20422 messages: vec![Message {
20423 id: None,
20424 idx: 10,
20425 role: MessageRole::User,
20426 author: None,
20427 created_at: Some(1_700_000_000_010),
20428 content: "legacy sparse tail without hot table".into(),
20429 extra_json: serde_json::Value::Null,
20430 snippets: Vec::new(),
20431 }],
20432 source_id: LOCAL_SOURCE_ID.into(),
20433 origin_host: None,
20434 },
20435 )
20436 .unwrap()
20437 .conversation_id;
20438
20439 storage
20440 .conn
20441 .execute_compat(
20442 "UPDATE conversations
20443 SET last_message_idx = NULL, last_message_created_at = NULL
20444 WHERE id = ?1",
20445 fparams![conversation_id],
20446 )
20447 .unwrap();
20448 storage
20449 .conn
20450 .execute_compat("DROP TABLE conversation_tail_state", fparams![])
20451 .unwrap();
20452
20453 let footprints = storage
20454 .list_conversation_footprints_for_lexical_rebuild()
20455 .unwrap();
20456
20457 assert_eq!(
20458 footprints,
20459 vec![LexicalRebuildConversationFootprintRow {
20460 conversation_id,
20461 message_count: 11,
20462 message_bytes: 11 * LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
20463 }],
20464 "read-only lexical self-heal must tolerate pre-tail-cache databases and use messages MAX(idx)"
20465 );
20466 }
20467
20468 #[test]
20469 fn list_conversation_footprints_for_lexical_rebuild_tolerates_legacy_search_demo_fixture() {
20470 let fixture_db = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
20471 .join("tests")
20472 .join("fixtures")
20473 .join("search_demo_data")
20474 .join("agent_search.db");
20475 let storage = FrankenStorage::open_readonly(&fixture_db).unwrap();
20476
20477 let footprints = storage
20478 .list_conversation_footprints_for_lexical_rebuild()
20479 .unwrap();
20480
20481 assert!(
20482 !footprints.is_empty(),
20483 "search self-heal should be able to plan a lexical rebuild from the legacy search demo fixture"
20484 );
20485 assert!(
20486 footprints
20487 .iter()
20488 .all(|footprint| footprint.message_count > 0),
20489 "legacy fixture conversations should derive message counts from messages when tail caches are absent"
20490 );
20491 }
20492
20493 #[test]
20494 fn lexical_rebuild_listing_normalizes_host_only_remote_source_from_blank_source_id() {
20495 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20496 use std::path::PathBuf;
20497
20498 let dir = TempDir::new().unwrap();
20499 let db_path = dir.path().join("agent_search.db");
20500 let storage = SqliteStorage::open(&db_path).unwrap();
20501 let agent = Agent {
20502 id: None,
20503 slug: "codex".into(),
20504 name: "Codex".into(),
20505 version: Some("0.2.3".into()),
20506 kind: AgentKind::Cli,
20507 };
20508 let agent_id = storage.ensure_agent(&agent).unwrap();
20509 let conversation = Conversation {
20510 id: None,
20511 agent_slug: "codex".into(),
20512 workspace: Some(PathBuf::from("/tmp/workspace")),
20513 external_id: Some("legacy-blank-source".into()),
20514 title: Some("Legacy blank source".into()),
20515 source_path: PathBuf::from("/tmp/legacy-blank-source.jsonl"),
20516 started_at: Some(1_700_000_000_000),
20517 ended_at: Some(1_700_000_000_100),
20518 approx_tokens: None,
20519 metadata_json: serde_json::Value::Null,
20520 messages: vec![Message {
20521 id: None,
20522 idx: 0,
20523 role: MessageRole::User,
20524 author: None,
20525 created_at: Some(1_700_000_000_000),
20526 content: "hello".into(),
20527 extra_json: serde_json::Value::Null,
20528 snippets: Vec::new(),
20529 }],
20530 source_id: LOCAL_SOURCE_ID.into(),
20531 origin_host: None,
20532 };
20533
20534 let conversation_id = storage
20535 .insert_conversation_tree(agent_id, None, &conversation)
20536 .unwrap()
20537 .conversation_id;
20538 storage.conn.execute("PRAGMA foreign_keys = OFF").unwrap();
20539 storage
20540 .conn
20541 .execute_compat(
20542 "UPDATE conversations SET source_id = ?1, origin_host = ?2 WHERE id = ?3",
20543 fparams![" ", "dev@laptop", conversation_id],
20544 )
20545 .unwrap();
20546 storage.conn.execute("PRAGMA foreign_keys = ON").unwrap();
20547
20548 let listed = storage.list_conversations(10, 0).unwrap();
20549 assert_eq!(listed.len(), 1);
20550 assert_eq!(listed[0].source_id, "dev@laptop");
20551 assert_eq!(listed[0].origin_host.as_deref(), Some("dev@laptop"));
20552
20553 let (agent_slugs, workspace_paths) = storage.build_lexical_rebuild_lookups().unwrap();
20554 let rebuild_listed = storage
20555 .list_conversations_for_lexical_rebuild_after_id(10, 0, &agent_slugs, &workspace_paths)
20556 .unwrap();
20557 assert_eq!(rebuild_listed.len(), 1);
20558 assert_eq!(rebuild_listed[0].source_id, "dev@laptop");
20559 assert_eq!(rebuild_listed[0].origin_host.as_deref(), Some("dev@laptop"));
20560 }
20561
20562 #[test]
20563 fn seed_canonical_from_best_historical_bundle_copies_data_and_resets_runtime_meta() {
20564 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20565 use std::path::PathBuf;
20566
20567 let dir = TempDir::new().unwrap();
20568 let canonical_db = dir.path().join("agent_search.db");
20569 let source_db = dir
20570 .path()
20571 .join("backups/agent_search.db.20260322T020200.bak");
20572
20573 fs::create_dir_all(source_db.parent().unwrap()).unwrap();
20574
20575 let source = SqliteStorage::open(&source_db).unwrap();
20576 let agent = Agent {
20577 id: None,
20578 slug: "codex".into(),
20579 name: "Codex".into(),
20580 version: Some("0.2.3".into()),
20581 kind: AgentKind::Cli,
20582 };
20583 let agent_id = source.ensure_agent(&agent).unwrap();
20584 let conversation = Conversation {
20585 id: None,
20586 agent_slug: "codex".into(),
20587 workspace: Some(PathBuf::from("/tmp/workspace")),
20588 external_id: Some("seed-conv".into()),
20589 title: Some("Historical seed".into()),
20590 source_path: PathBuf::from("/tmp/historical-seed.jsonl"),
20591 started_at: Some(1_700_000_000_000),
20592 ended_at: Some(1_700_000_000_100),
20593 approx_tokens: Some(42),
20594 metadata_json: serde_json::json!({"seed": true}),
20595 messages: vec![Message {
20596 id: None,
20597 idx: 0,
20598 role: MessageRole::Agent,
20599 author: Some("assistant".into()),
20600 created_at: Some(1_700_000_000_050),
20601 content: "seeded message".into(),
20602 extra_json: serde_json::json!({"usage": {"total_tokens": 12}}),
20603 snippets: Vec::new(),
20604 }],
20605 source_id: LOCAL_SOURCE_ID.into(),
20606 origin_host: None,
20607 };
20608 source
20609 .insert_conversation_tree(agent_id, None, &conversation)
20610 .unwrap();
20611 source.set_last_scan_ts(123).unwrap();
20612 source.set_last_indexed_at(456).unwrap();
20613 source.set_last_embedded_message_id(789).unwrap();
20614 source
20615 .conn
20616 .execute_compat(
20617 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
20618 fparams!["historical_bundle_salvaged:stale", "{\"stale\":true}"],
20619 )
20620 .unwrap();
20621 drop(source);
20622
20623 let legacy_v13_fts_sql = "CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, content='', tokenize='porter')";
20634 let duplicate_legacy_fts_sql = "CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, message_id UNINDEXED, tokenize='porter')";
20635 let legacy = rusqlite_test_fixture_conn(&source_db);
20636 legacy
20637 .execute_batch(
20638 "UPDATE meta SET value = '13' WHERE key = 'schema_version';
20639 DELETE FROM _schema_migrations WHERE version = 14;
20640 PRAGMA writable_schema = ON;",
20641 )
20642 .unwrap();
20643 legacy
20644 .execute(
20645 "DELETE FROM meta WHERE key = ?1",
20646 [FTS_FRANKEN_REBUILD_META_KEY],
20647 )
20648 .unwrap();
20649 legacy
20651 .execute(
20652 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
20653 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
20654 [legacy_v13_fts_sql],
20655 )
20656 .unwrap();
20657 legacy
20659 .execute(
20660 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
20661 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
20662 [duplicate_legacy_fts_sql],
20663 )
20664 .unwrap();
20665 legacy
20666 .execute_batch("PRAGMA writable_schema = OFF;")
20667 .unwrap();
20668 drop(legacy);
20669
20670 {
20673 let verify = rusqlite_test_fixture_conn(&source_db);
20674 verify
20675 .execute_batch("PRAGMA writable_schema = ON;")
20676 .unwrap();
20677 let fts_entries: i64 = verify
20678 .query_row(
20679 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
20680 [],
20681 |row| row.get(0),
20682 )
20683 .unwrap();
20684 assert_eq!(
20685 fts_entries, 2,
20686 "test fixture should reproduce the duplicate legacy fts_messages rows"
20687 );
20688 let msg_count: i64 = verify
20689 .query_row("SELECT COUNT(*) FROM messages", [], |row| row.get(0))
20690 .unwrap();
20691 assert_eq!(msg_count, 1);
20692 }
20693
20694 let fresh = SqliteStorage::open(&canonical_db).unwrap();
20695 drop(fresh);
20696
20697 let outcome = seed_canonical_from_best_historical_bundle(&canonical_db)
20698 .unwrap()
20699 .unwrap();
20700 assert_eq!(outcome.bundles_imported, 1);
20701 assert_eq!(outcome.conversations_imported, 1);
20702 assert_eq!(outcome.messages_imported, 1);
20703
20704 let readonly = open_franken_with_flags(
20705 &canonical_db.to_string_lossy(),
20706 FrankenOpenFlags::SQLITE_OPEN_READ_ONLY,
20707 )
20708 .unwrap();
20709 let readonly_message_count: i64 = readonly
20710 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
20711 row.get_typed(0)
20712 })
20713 .unwrap();
20714 assert_eq!(readonly_message_count, 1);
20715
20716 let seeded = SqliteStorage::open(&canonical_db).unwrap();
20717 assert_eq!(
20718 seeded
20719 .count_sessions_in_range(None, None, None, None)
20720 .unwrap()
20721 .0,
20722 1
20723 );
20724 let message_count: i64 = seeded
20725 .conn
20726 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
20727 row.get_typed(0)
20728 })
20729 .unwrap();
20730 assert_eq!(message_count, 1);
20731 assert_eq!(seeded.get_last_scan_ts().unwrap(), None);
20732 assert_eq!(seeded.get_last_embedded_message_id().unwrap(), None);
20733
20734 let last_indexed: Option<String> = seeded
20735 .conn
20736 .query_row_map(
20737 "SELECT value FROM meta WHERE key = 'last_indexed_at'",
20738 fparams![],
20739 |row| row.get_typed(0),
20740 )
20741 .optional()
20742 .unwrap();
20743 assert!(last_indexed.is_none());
20744
20745 let salvage_keys: Vec<String> = seeded
20746 .conn
20747 .query_map_collect(
20748 "SELECT key FROM meta WHERE key LIKE 'historical_bundle_salvaged:%' ORDER BY key",
20749 fparams![],
20750 |row| row.get_typed(0),
20751 )
20752 .unwrap();
20753 assert_eq!(salvage_keys.len(), 1);
20754
20755 let reopened_readonly = open_franken_with_flags(
20756 &canonical_db.to_string_lossy(),
20757 FrankenOpenFlags::SQLITE_OPEN_READ_ONLY,
20758 )
20759 .unwrap();
20760 let reopened_fts_entries: i64 = reopened_readonly
20761 .query_row_map(
20762 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
20763 fparams![],
20764 |row| row.get_typed(0),
20765 )
20766 .unwrap();
20767 assert_eq!(
20768 reopened_fts_entries, 1,
20769 "seeded canonical db should keep a single stock-SQLite fts_messages schema row"
20770 );
20771 let reopened_message_count: i64 = reopened_readonly
20772 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
20773 row.get_typed(0)
20774 })
20775 .unwrap();
20776 assert_eq!(reopened_message_count, 1);
20777
20778 let franken_seeded = FrankenStorage::open(&canonical_db).unwrap();
20779 assert_eq!(
20780 franken_seeded.schema_version().unwrap(),
20781 CURRENT_SCHEMA_VERSION
20782 );
20783 franken_seeded
20790 .ensure_search_fallback_fts_consistency()
20791 .expect("ensure FTS consistency after seed");
20792 let post_franken_schema_rows: i64 = franken_seeded
20793 .raw()
20794 .query_row_map(
20795 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
20796 fparams![],
20797 |row| row.get_typed(0),
20798 )
20799 .unwrap();
20800 assert_eq!(post_franken_schema_rows, 1);
20801 assert!(
20802 franken_seeded
20803 .raw()
20804 .query("SELECT rowid FROM fts_messages LIMIT 1")
20805 .is_ok()
20806 );
20807 }
20808
20809 #[test]
20810 fn failed_baseline_seed_preserves_existing_canonical_bundle() {
20811 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20812 use std::path::PathBuf;
20813
20814 let dir = TempDir::new().unwrap();
20815 let canonical_db = dir.path().join("agent_search.db");
20816 let source_db = dir
20817 .path()
20818 .join("backups/agent_search.db.20260325T120000Z.bad-seed.bak");
20819
20820 fs::create_dir_all(source_db.parent().unwrap()).unwrap();
20821
20822 let canonical = SqliteStorage::open(&canonical_db).unwrap();
20823 canonical
20824 .conn
20825 .execute_compat(
20826 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
20827 fparams!["sentinel", "keep-me"],
20828 )
20829 .unwrap();
20830 drop(canonical);
20831
20832 let source = SqliteStorage::open(&source_db).unwrap();
20833 let agent = Agent {
20834 id: None,
20835 slug: "codex".into(),
20836 name: "Codex".into(),
20837 version: Some("0.2.3".into()),
20838 kind: AgentKind::Cli,
20839 };
20840 let agent_id = source.ensure_agent(&agent).unwrap();
20841 let conversation = Conversation {
20842 id: None,
20843 agent_slug: "codex".into(),
20844 workspace: Some(PathBuf::from("/tmp/workspace")),
20845 external_id: Some("bad-seed-conv".into()),
20846 title: Some("Bad seed".into()),
20847 source_path: PathBuf::from("/tmp/bad-seed.jsonl"),
20848 started_at: Some(1_700_000_000_000),
20849 ended_at: Some(1_700_000_000_100),
20850 approx_tokens: Some(42),
20851 metadata_json: serde_json::json!({"seed": "bad"}),
20852 messages: vec![Message {
20853 id: None,
20854 idx: 0,
20855 role: MessageRole::Agent,
20856 author: Some("assistant".into()),
20857 created_at: Some(1_700_000_000_050),
20858 content: "this seed should fail".into(),
20859 extra_json: serde_json::Value::Null,
20860 snippets: Vec::new(),
20861 }],
20862 source_id: LOCAL_SOURCE_ID.into(),
20863 origin_host: None,
20864 };
20865 source
20866 .insert_conversation_tree(agent_id, None, &conversation)
20867 .unwrap();
20868 drop(source);
20869
20870 let legacy = FrankenConnection::open(source_db.to_string_lossy().into_owned()).unwrap();
20871 legacy
20872 .execute("UPDATE meta SET value = '12' WHERE key = 'schema_version'")
20873 .unwrap();
20874 drop(legacy);
20875
20876 let err = seed_canonical_from_best_historical_bundle(&canonical_db).unwrap_err();
20877 assert!(
20878 err.to_string()
20879 .contains("schema_version 12 is too old for baseline import"),
20880 "unexpected seed error: {err:#}"
20881 );
20882
20883 let reopened = SqliteStorage::open(&canonical_db).unwrap();
20884 let sentinel: Option<String> = reopened
20885 .conn
20886 .query_row_map(
20887 "SELECT value FROM meta WHERE key = 'sentinel'",
20888 fparams![],
20889 |row| row.get_typed(0),
20890 )
20891 .optional()
20892 .unwrap();
20893 assert_eq!(sentinel.as_deref(), Some("keep-me"));
20894
20895 let conversation_count: i64 = reopened
20896 .conn
20897 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
20898 row.get_typed(0)
20899 })
20900 .unwrap();
20901 assert_eq!(conversation_count, 0);
20902
20903 let readonly = open_franken_with_flags(
20904 &canonical_db.to_string_lossy(),
20905 FrankenOpenFlags::SQLITE_OPEN_READ_ONLY,
20906 )
20907 .unwrap();
20908 let readonly_conversation_count: i64 = readonly
20909 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
20910 row.get_typed(0)
20911 })
20912 .unwrap();
20913 assert_eq!(readonly_conversation_count, 0);
20914 }
20915
20916 #[test]
20917 fn fetch_messages_for_lexical_rebuild_skips_extra_json() {
20918 let dir = TempDir::new().unwrap();
20919 let db_path = dir.path().join("test.db");
20920 let storage = SqliteStorage::open(&db_path).unwrap();
20921
20922 let agent = Agent {
20923 id: None,
20924 slug: "codex".into(),
20925 name: "Codex".into(),
20926 version: Some("0.2.3".into()),
20927 kind: AgentKind::Cli,
20928 };
20929 let agent_id = storage.ensure_agent(&agent).unwrap();
20930
20931 let conversation = Conversation {
20932 id: None,
20933 agent_slug: "codex".into(),
20934 workspace: Some(PathBuf::from("/tmp/workspace")),
20935 external_id: Some("lexical-rebuild-test".into()),
20936 title: Some("Lexical rebuild".into()),
20937 source_path: PathBuf::from("/tmp/lexical-rebuild.jsonl"),
20938 started_at: Some(1_700_000_000_000),
20939 ended_at: Some(1_700_000_000_100),
20940 approx_tokens: Some(42),
20941 metadata_json: serde_json::Value::Null,
20942 messages: vec![Message {
20943 id: None,
20944 idx: 0,
20945 role: MessageRole::Agent,
20946 author: Some("assistant".into()),
20947 created_at: Some(1_700_000_000_050),
20948 content: "indexed text".into(),
20949 extra_json: serde_json::json!({
20950 "usage": { "total_tokens": 1234 },
20951 "irrelevant_blob": "still preserved in canonical storage"
20952 }),
20953 snippets: Vec::new(),
20954 }],
20955 source_id: LOCAL_SOURCE_ID.into(),
20956 origin_host: None,
20957 };
20958
20959 let inserted = storage
20960 .insert_conversation_tree(agent_id, None, &conversation)
20961 .unwrap();
20962 let conversation_id = inserted.conversation_id;
20963
20964 let stored = storage.fetch_messages(conversation_id).unwrap();
20965 assert_eq!(stored.len(), 1);
20966 assert!(!stored[0].extra_json.is_null());
20967
20968 let lexical = storage
20969 .fetch_messages_for_lexical_rebuild(conversation_id)
20970 .unwrap();
20971 assert_eq!(lexical.len(), 1);
20972 assert_eq!(lexical[0].content, "indexed text");
20973 assert_eq!(lexical[0].author.as_deref(), Some("assistant"));
20974 assert!(lexical[0].extra_json.is_null());
20975 }
20976
20977 #[test]
20978 fn fetch_messages_for_lexical_rebuild_batch_groups_and_orders_messages() {
20979 let dir = TempDir::new().unwrap();
20980 let db_path = dir.path().join("test.db");
20981 let storage = SqliteStorage::open(&db_path).unwrap();
20982
20983 let agent = Agent {
20984 id: None,
20985 slug: "codex".into(),
20986 name: "Codex".into(),
20987 version: Some("0.2.3".into()),
20988 kind: AgentKind::Cli,
20989 };
20990 let agent_id = storage.ensure_agent(&agent).unwrap();
20991
20992 let first = Conversation {
20993 id: None,
20994 agent_slug: "codex".into(),
20995 workspace: Some(PathBuf::from("/tmp/workspace")),
20996 external_id: Some("lexical-batch-1".into()),
20997 title: Some("Lexical batch 1".into()),
20998 source_path: PathBuf::from("/tmp/lexical-batch-1.jsonl"),
20999 started_at: Some(1_700_000_000_000),
21000 ended_at: Some(1_700_000_000_100),
21001 approx_tokens: Some(42),
21002 metadata_json: serde_json::Value::Null,
21003 messages: vec![
21004 Message {
21005 id: None,
21006 idx: 0,
21007 role: MessageRole::User,
21008 author: Some("user".into()),
21009 created_at: Some(1_700_000_000_010),
21010 content: "first-a".into(),
21011 extra_json: serde_json::json!({"opaque": true}),
21012 snippets: Vec::new(),
21013 },
21014 Message {
21015 id: None,
21016 idx: 1,
21017 role: MessageRole::Agent,
21018 author: Some("assistant".into()),
21019 created_at: Some(1_700_000_000_020),
21020 content: "first-b".into(),
21021 extra_json: serde_json::json!({"opaque": true}),
21022 snippets: Vec::new(),
21023 },
21024 ],
21025 source_id: LOCAL_SOURCE_ID.into(),
21026 origin_host: None,
21027 };
21028
21029 let second = Conversation {
21030 id: None,
21031 agent_slug: "codex".into(),
21032 workspace: Some(PathBuf::from("/tmp/workspace")),
21033 external_id: Some("lexical-batch-2".into()),
21034 title: Some("Lexical batch 2".into()),
21035 source_path: PathBuf::from("/tmp/lexical-batch-2.jsonl"),
21036 started_at: Some(1_700_000_000_200),
21037 ended_at: Some(1_700_000_000_300),
21038 approx_tokens: Some(84),
21039 metadata_json: serde_json::Value::Null,
21040 messages: vec![Message {
21041 id: None,
21042 idx: 0,
21043 role: MessageRole::Tool,
21044 author: Some("tool".into()),
21045 created_at: Some(1_700_000_000_210),
21046 content: "second-a".into(),
21047 extra_json: serde_json::json!({"opaque": true}),
21048 snippets: Vec::new(),
21049 }],
21050 source_id: LOCAL_SOURCE_ID.into(),
21051 origin_host: None,
21052 };
21053 let third = Conversation {
21054 external_id: Some("lexical-batch-3".into()),
21055 title: Some("Lexical batch 3".into()),
21056 source_path: PathBuf::from("/tmp/lexical-batch-3.jsonl"),
21057 messages: vec![Message {
21058 id: None,
21059 idx: 0,
21060 role: MessageRole::System,
21061 author: Some("system".into()),
21062 created_at: Some(1_700_000_000_410),
21063 content: "third-a".into(),
21064 extra_json: serde_json::json!({"opaque": true}),
21065 snippets: Vec::new(),
21066 }],
21067 ..second.clone()
21068 };
21069
21070 let first_id = storage
21071 .insert_conversation_tree(agent_id, None, &first)
21072 .unwrap()
21073 .conversation_id;
21074 let second_id = storage
21075 .insert_conversation_tree(agent_id, None, &second)
21076 .unwrap()
21077 .conversation_id;
21078 let third_id = storage
21079 .insert_conversation_tree(agent_id, None, &third)
21080 .unwrap()
21081 .conversation_id;
21082
21083 let lexical = storage
21084 .fetch_messages_for_lexical_rebuild_batch(&[third_id, first_id], None, None)
21085 .unwrap();
21086
21087 let first_messages = lexical.get(&first_id).expect("first conversation");
21088 assert_eq!(first_messages.len(), 2);
21089 assert_eq!(first_messages[0].content, "first-a");
21090 assert_eq!(first_messages[1].content, "first-b");
21091 assert!(
21092 first_messages
21093 .iter()
21094 .all(|message| message.extra_json.is_null())
21095 );
21096
21097 assert!(
21098 !lexical.contains_key(&second_id),
21099 "batch fetch must exclude conversations not requested by the caller"
21100 );
21101
21102 let third_messages = lexical.get(&third_id).expect("third conversation");
21103 assert_eq!(third_messages.len(), 1);
21104 assert_eq!(third_messages[0].content, "third-a");
21105 assert!(third_messages[0].extra_json.is_null());
21106 }
21107
21108 #[test]
21109 fn fetch_messages_for_lexical_rebuild_batch_enforces_content_byte_guardrail() {
21110 let dir = TempDir::new().unwrap();
21111 let db_path = dir.path().join("test.db");
21112 let storage = SqliteStorage::open(&db_path).unwrap();
21113
21114 let agent = Agent {
21115 id: None,
21116 slug: "codex".into(),
21117 name: "Codex".into(),
21118 version: Some("0.2.3".into()),
21119 kind: AgentKind::Cli,
21120 };
21121 let agent_id = storage.ensure_agent(&agent).unwrap();
21122
21123 let conversation = Conversation {
21124 id: None,
21125 agent_slug: "codex".into(),
21126 workspace: Some(PathBuf::from("/tmp/workspace")),
21127 external_id: Some("lexical-batch-guard".into()),
21128 title: Some("Lexical batch guard".into()),
21129 source_path: PathBuf::from("/tmp/lexical-batch-guard.jsonl"),
21130 started_at: Some(1_700_000_000_000),
21131 ended_at: Some(1_700_000_000_100),
21132 approx_tokens: Some(42),
21133 metadata_json: serde_json::Value::Null,
21134 messages: vec![
21135 Message {
21136 id: None,
21137 idx: 0,
21138 role: MessageRole::User,
21139 author: Some("user".into()),
21140 created_at: Some(1_700_000_000_010),
21141 content: "123456".into(),
21142 extra_json: serde_json::Value::Null,
21143 snippets: Vec::new(),
21144 },
21145 Message {
21146 id: None,
21147 idx: 1,
21148 role: MessageRole::Agent,
21149 author: Some("assistant".into()),
21150 created_at: Some(1_700_000_000_020),
21151 content: "abcdef".into(),
21152 extra_json: serde_json::Value::Null,
21153 snippets: Vec::new(),
21154 },
21155 ],
21156 source_id: LOCAL_SOURCE_ID.into(),
21157 origin_host: None,
21158 };
21159
21160 let conversation_id = storage
21161 .insert_conversation_tree(agent_id, None, &conversation)
21162 .unwrap()
21163 .conversation_id;
21164
21165 let error = storage
21166 .fetch_messages_for_lexical_rebuild_batch(&[conversation_id], Some(10), Some(8))
21167 .expect_err("guardrail should reject oversized batch content");
21168
21169 let message = format!("{error:#}");
21170 assert!(
21171 message.contains("content-byte guardrail"),
21172 "expected guardrail reason in error, got {message}"
21173 );
21174 }
21175
21176 #[test]
21177 fn fetch_messages_handles_manual_rows_inserted_via_raw_connection() {
21178 let dir = TempDir::new().unwrap();
21179 let db_path = dir.path().join("manual-rows.db");
21180 let storage = FrankenStorage::open(&db_path).unwrap();
21181 let conn = storage.raw();
21182
21183 conn.execute(
21184 "INSERT INTO agents (id, slug, name, kind, created_at, updated_at)
21185 VALUES (1, 'claude_code', 'Claude Code', 'local', 0, 0)",
21186 )
21187 .unwrap();
21188 conn.execute(
21189 "INSERT INTO conversations
21190 (id, agent_id, external_id, title, source_path, source_id, started_at)
21191 VALUES (1, 1, 'manual-ext', 'Manual Session', '/tmp/manual.jsonl', 'local', 200)",
21192 )
21193 .unwrap();
21194 conn.execute(
21195 "INSERT INTO messages
21196 (id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin)
21197 VALUES (1, 1, 0, 'user', 'tester', 1700000000000, 'manual body', '{\"k\":1}', NULL)",
21198 )
21199 .unwrap();
21200
21201 let lexical = storage.fetch_messages_for_lexical_rebuild(1).unwrap();
21202 assert_eq!(lexical.len(), 1);
21203 assert_eq!(lexical[0].content, "manual body");
21204
21205 let full = storage.fetch_messages(1).unwrap();
21206 assert_eq!(full.len(), 1);
21207 assert_eq!(full[0].content, "manual body");
21208 assert_eq!(full[0].author.as_deref(), Some("tester"));
21209 assert_eq!(full[0].extra_json, serde_json::json!({ "k": 1 }));
21210 }
21211
21212 #[test]
21213 fn lexical_rebuild_batch_messages_query_avoids_sorter_temp_btrees() {
21214 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
21215 use std::path::PathBuf;
21216
21217 let dir = TempDir::new().unwrap();
21218 let db_path = dir.path().join("agent_search.db");
21219 let storage = SqliteStorage::open(&db_path).unwrap();
21220
21221 let agent = Agent {
21222 id: None,
21223 slug: "claude_code".into(),
21224 name: "Claude Code".into(),
21225 version: None,
21226 kind: AgentKind::Cli,
21227 };
21228 let agent_id = storage.ensure_agent(&agent).unwrap();
21229
21230 for (external_id, base_ts) in [
21231 ("conv-1", 1_700_000_000_000_i64),
21232 ("conv-2", 1_700_000_001_000_i64),
21233 ] {
21234 let conversation = Conversation {
21235 id: None,
21236 agent_slug: "claude_code".into(),
21237 workspace: Some(PathBuf::from("/tmp/workspace")),
21238 external_id: Some(external_id.to_string()),
21239 title: Some("Lexical rebuild".into()),
21240 source_path: PathBuf::from(format!("/tmp/{external_id}.jsonl")),
21241 started_at: Some(base_ts),
21242 ended_at: Some(base_ts + 100),
21243 approx_tokens: None,
21244 metadata_json: serde_json::Value::Null,
21245 messages: vec![
21246 Message {
21247 id: None,
21248 idx: 0,
21249 role: MessageRole::User,
21250 author: Some("user".into()),
21251 created_at: Some(base_ts + 10),
21252 content: format!("{external_id}-first"),
21253 extra_json: serde_json::Value::Null,
21254 snippets: Vec::new(),
21255 },
21256 Message {
21257 id: None,
21258 idx: 1,
21259 role: MessageRole::Agent,
21260 author: Some("assistant".into()),
21261 created_at: Some(base_ts + 20),
21262 content: format!("{external_id}-second"),
21263 extra_json: serde_json::Value::Null,
21264 snippets: Vec::new(),
21265 },
21266 ],
21267 source_id: LOCAL_SOURCE_ID.into(),
21268 origin_host: None,
21269 };
21270 storage
21271 .insert_conversation_tree(agent_id, None, &conversation)
21272 .unwrap();
21273 }
21274
21275 let conversation_ids: Vec<i64> = storage
21276 .conn
21277 .query_map_collect(
21278 "SELECT id FROM conversations ORDER BY id",
21279 fparams![],
21280 |row| row.get_typed(0),
21281 )
21282 .unwrap();
21283 assert_eq!(conversation_ids.len(), 2);
21284
21285 let plan_details: Vec<String> = storage
21286 .conn
21287 .query_map_collect(
21288 "EXPLAIN QUERY PLAN \
21289 SELECT conversation_id, id, idx, role, author, created_at, content \
21290 FROM messages \
21291 WHERE conversation_id IN (?1, ?2) \
21292 ORDER BY conversation_id ASC, idx ASC",
21293 fparams![conversation_ids[0], conversation_ids[1]],
21294 |row| row.get_typed(3),
21295 )
21296 .unwrap();
21297
21298 assert!(
21299 plan_details
21300 .iter()
21301 .any(|detail| detail.contains("sqlite_autoindex_messages_1")),
21302 "expected batched lexical rebuild fetch to use the conversation_id/idx composite index, got {plan_details:?}"
21303 );
21304 assert!(
21305 !plan_details
21306 .iter()
21307 .any(|detail| detail.contains("TEMP B-TREE")),
21308 "expected batched lexical rebuild fetch to avoid sorter temp b-trees, got {plan_details:?}"
21309 );
21310 }
21311
21312 #[test]
21313 fn stream_messages_for_lexical_rebuild_groups_and_orders_messages() {
21314 let dir = TempDir::new().unwrap();
21315 let db_path = dir.path().join("test.db");
21316 let storage = SqliteStorage::open(&db_path).unwrap();
21317
21318 let agent = Agent {
21319 id: None,
21320 slug: "codex".into(),
21321 name: "Codex".into(),
21322 version: Some("0.2.3".into()),
21323 kind: AgentKind::Cli,
21324 };
21325 let agent_id = storage.ensure_agent(&agent).unwrap();
21326
21327 let first = Conversation {
21328 id: None,
21329 agent_slug: "codex".into(),
21330 workspace: Some(PathBuf::from("/tmp/workspace")),
21331 external_id: Some("lexical-stream-1".into()),
21332 title: Some("Lexical stream 1".into()),
21333 source_path: PathBuf::from("/tmp/lexical-stream-1.jsonl"),
21334 started_at: Some(1_700_000_000_000),
21335 ended_at: Some(1_700_000_000_100),
21336 approx_tokens: Some(42),
21337 metadata_json: serde_json::Value::Null,
21338 messages: vec![
21339 Message {
21340 id: None,
21341 idx: 0,
21342 role: MessageRole::User,
21343 author: Some("user".into()),
21344 created_at: Some(1_700_000_000_010),
21345 content: "first-a".into(),
21346 extra_json: serde_json::json!({"opaque": true}),
21347 snippets: Vec::new(),
21348 },
21349 Message {
21350 id: None,
21351 idx: 1,
21352 role: MessageRole::Agent,
21353 author: Some("assistant".into()),
21354 created_at: Some(1_700_000_000_020),
21355 content: "first-b".into(),
21356 extra_json: serde_json::json!({"opaque": true}),
21357 snippets: Vec::new(),
21358 },
21359 ],
21360 source_id: LOCAL_SOURCE_ID.into(),
21361 origin_host: None,
21362 };
21363
21364 let second = Conversation {
21365 id: None,
21366 agent_slug: "codex".into(),
21367 workspace: Some(PathBuf::from("/tmp/workspace")),
21368 external_id: Some("lexical-stream-2".into()),
21369 title: Some("Lexical stream 2".into()),
21370 source_path: PathBuf::from("/tmp/lexical-stream-2.jsonl"),
21371 started_at: Some(1_700_000_000_200),
21372 ended_at: Some(1_700_000_000_300),
21373 approx_tokens: Some(84),
21374 metadata_json: serde_json::Value::Null,
21375 messages: vec![Message {
21376 id: None,
21377 idx: 0,
21378 role: MessageRole::Tool,
21379 author: Some("tool".into()),
21380 created_at: Some(1_700_000_000_210),
21381 content: "second-a".into(),
21382 extra_json: serde_json::json!({"opaque": true}),
21383 snippets: Vec::new(),
21384 }],
21385 source_id: LOCAL_SOURCE_ID.into(),
21386 origin_host: None,
21387 };
21388
21389 let first_id = storage
21390 .insert_conversation_tree(agent_id, None, &first)
21391 .unwrap()
21392 .conversation_id;
21393 let second_id = storage
21394 .insert_conversation_tree(agent_id, None, &second)
21395 .unwrap()
21396 .conversation_id;
21397
21398 let mut streamed = Vec::new();
21399 storage
21400 .stream_messages_for_lexical_rebuild_from_conversation_id(first_id, |row| {
21401 streamed.push((
21402 row.conversation_id,
21403 row.idx,
21404 row.role,
21405 row.author,
21406 row.content,
21407 ));
21408 Ok(())
21409 })
21410 .unwrap();
21411
21412 assert_eq!(
21413 streamed,
21414 vec![
21415 (
21416 first_id,
21417 0,
21418 "user".to_string(),
21419 Some("user".to_string()),
21420 "first-a".to_string(),
21421 ),
21422 (
21423 first_id,
21424 1,
21425 "agent".to_string(),
21426 Some("assistant".to_string()),
21427 "first-b".to_string(),
21428 ),
21429 (
21430 second_id,
21431 0,
21432 "tool".to_string(),
21433 Some("tool".to_string()),
21434 "second-a".to_string(),
21435 ),
21436 ]
21437 );
21438 }
21439
21440 #[test]
21441 fn stream_messages_for_lexical_rebuild_between_conversation_ids_respects_upper_bound() {
21442 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
21443 use std::path::PathBuf;
21444
21445 let dir = TempDir::new().unwrap();
21446 let db_path = dir.path().join("agent_search.db");
21447 let storage = SqliteStorage::open(&db_path).unwrap();
21448
21449 let agent = Agent {
21450 id: None,
21451 slug: "claude_code".into(),
21452 name: "Claude Code".into(),
21453 version: Some("1.2.3".into()),
21454 kind: AgentKind::Cli,
21455 };
21456 let agent_id = storage.ensure_agent(&agent).unwrap();
21457
21458 let first = Conversation {
21459 id: None,
21460 agent_slug: "claude_code".into(),
21461 workspace: Some(PathBuf::from("/tmp/workspace")),
21462 external_id: Some("lexical-range-1".into()),
21463 title: Some("Lexical range 1".into()),
21464 source_path: PathBuf::from("/tmp/lexical-range-1.jsonl"),
21465 started_at: Some(1_700_000_000_000),
21466 ended_at: Some(1_700_000_000_100),
21467 approx_tokens: Some(42),
21468 metadata_json: serde_json::Value::Null,
21469 messages: vec![Message {
21470 id: None,
21471 idx: 0,
21472 role: MessageRole::User,
21473 author: Some("user".into()),
21474 created_at: Some(1_700_000_000_010),
21475 content: "first-only".into(),
21476 extra_json: serde_json::json!({"opaque": true}),
21477 snippets: Vec::new(),
21478 }],
21479 source_id: LOCAL_SOURCE_ID.into(),
21480 origin_host: None,
21481 };
21482
21483 let second = Conversation {
21484 id: None,
21485 agent_slug: "claude_code".into(),
21486 workspace: Some(PathBuf::from("/tmp/workspace")),
21487 external_id: Some("lexical-range-2".into()),
21488 title: Some("Lexical range 2".into()),
21489 source_path: PathBuf::from("/tmp/lexical-range-2.jsonl"),
21490 started_at: Some(1_700_000_000_200),
21491 ended_at: Some(1_700_000_000_300),
21492 approx_tokens: Some(84),
21493 metadata_json: serde_json::Value::Null,
21494 messages: vec![Message {
21495 id: None,
21496 idx: 0,
21497 role: MessageRole::Tool,
21498 author: Some("tool".into()),
21499 created_at: Some(1_700_000_000_210),
21500 content: "second-should-not-appear".into(),
21501 extra_json: serde_json::json!({"opaque": true}),
21502 snippets: Vec::new(),
21503 }],
21504 source_id: LOCAL_SOURCE_ID.into(),
21505 origin_host: None,
21506 };
21507
21508 let first_id = storage
21509 .insert_conversation_tree(agent_id, None, &first)
21510 .unwrap()
21511 .conversation_id;
21512 let second_id = storage
21513 .insert_conversation_tree(agent_id, None, &second)
21514 .unwrap()
21515 .conversation_id;
21516
21517 let mut streamed = Vec::new();
21518 storage
21519 .stream_messages_for_lexical_rebuild_between_conversation_ids(
21520 first_id,
21521 first_id,
21522 |row| {
21523 streamed.push((row.conversation_id, row.idx, row.content));
21524 Ok(())
21525 },
21526 )
21527 .unwrap();
21528
21529 assert_eq!(streamed, vec![(first_id, 0, "first-only".to_string())]);
21530 assert!(
21531 streamed
21532 .iter()
21533 .all(|(conversation_id, _, _)| *conversation_id != second_id),
21534 "upper bound should exclude later conversation ids"
21535 );
21536 }
21537
21538 #[test]
21539 fn stream_messages_for_lexical_rebuild_between_conversation_ids_handles_mixed_ranges() {
21540 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
21541 use std::path::PathBuf;
21542
21543 let dir = TempDir::new().unwrap();
21544 let db_path = dir.path().join("agent_search.db");
21545 let storage = SqliteStorage::open(&db_path).unwrap();
21546
21547 let claude_agent_id = storage
21548 .ensure_agent(&Agent {
21549 id: None,
21550 slug: "claude_code".into(),
21551 name: "Claude Code".into(),
21552 version: None,
21553 kind: AgentKind::Cli,
21554 })
21555 .unwrap();
21556 let aider_agent_id = storage
21557 .ensure_agent(&Agent {
21558 id: None,
21559 slug: "aider".into(),
21560 name: "Aider".into(),
21561 version: None,
21562 kind: AgentKind::Cli,
21563 })
21564 .unwrap();
21565
21566 type MessageSpec = (i64, MessageRole, Option<String>, Option<i64>, String);
21567
21568 let mut expected = Vec::new();
21569 let mut first_conversation_id = None;
21570 let mut last_conversation_id = None;
21571 let mut insert_conversation =
21572 |agent_id: i64,
21573 external_id: &str,
21574 title: &str,
21575 source_path: &str,
21576 started_at: i64,
21577 message_specs: Vec<MessageSpec>| {
21578 let conversation = Conversation {
21579 id: None,
21580 agent_slug: if agent_id == aider_agent_id {
21581 "aider".into()
21582 } else {
21583 "claude_code".into()
21584 },
21585 workspace: Some(PathBuf::from("/tmp/workspace")),
21586 external_id: Some(external_id.to_string()),
21587 title: Some(title.to_string()),
21588 source_path: PathBuf::from(source_path),
21589 started_at: Some(started_at),
21590 ended_at: Some(started_at + 100),
21591 approx_tokens: None,
21592 metadata_json: serde_json::Value::Null,
21593 messages: message_specs
21594 .iter()
21595 .map(|(idx, role, author, created_at, content)| Message {
21596 id: None,
21597 idx: *idx,
21598 role: role.clone(),
21599 author: author.clone(),
21600 created_at: *created_at,
21601 content: content.clone(),
21602 extra_json: serde_json::Value::Null,
21603 snippets: Vec::new(),
21604 })
21605 .collect(),
21606 source_id: LOCAL_SOURCE_ID.into(),
21607 origin_host: None,
21608 };
21609 let conversation_id = storage
21610 .insert_conversation_tree(agent_id, None, &conversation)
21611 .unwrap()
21612 .conversation_id;
21613 if first_conversation_id.is_none() {
21614 first_conversation_id = Some(conversation_id);
21615 }
21616 last_conversation_id = Some(conversation_id);
21617 expected.extend(message_specs.into_iter().map(
21618 |(idx, role, author, created_at, content)| {
21619 (
21620 conversation_id,
21621 idx,
21622 match role {
21623 MessageRole::User => "user".to_string(),
21624 MessageRole::Agent => "agent".to_string(),
21625 MessageRole::Tool => "tool".to_string(),
21626 MessageRole::System => "system".to_string(),
21627 MessageRole::Other(other) => other,
21628 },
21629 author,
21630 created_at,
21631 content,
21632 )
21633 },
21634 ));
21635 };
21636
21637 for (label, base_ts) in [
21638 ("alpha", 1_700_000_000_000_i64),
21639 ("beta", 1_700_000_001_000_i64),
21640 ("gamma", 1_700_000_002_000_i64),
21641 ("delta", 1_700_000_003_000_i64),
21642 ("epsilon", 1_700_000_004_000_i64),
21643 ] {
21644 insert_conversation(
21645 claude_agent_id,
21646 &format!("lexical-{label}"),
21647 &format!("Lexical {label}"),
21648 &format!("/tmp/{label}.jsonl"),
21649 base_ts,
21650 vec![
21651 (
21652 0,
21653 MessageRole::User,
21654 None,
21655 Some(base_ts + 10),
21656 format!("{label}_content"),
21657 ),
21658 (
21659 1,
21660 MessageRole::Agent,
21661 None,
21662 Some(base_ts + 20),
21663 format!("{label}_content_response"),
21664 ),
21665 ],
21666 );
21667 }
21668
21669 insert_conversation(
21670 aider_agent_id,
21671 "lexical-aider-history",
21672 "Aider Chat: coding_agent_session_search",
21673 "/tmp/.aider.chat.history.md",
21674 1_764_619_673_394,
21675 vec![
21676 (
21677 0,
21678 MessageRole::System,
21679 Some("system".to_string()),
21680 None,
21681 "# aider chat started at 2025-12-01 20:07:47".to_string(),
21682 ),
21683 (
21684 1,
21685 MessageRole::User,
21686 Some("user".to_string()),
21687 None,
21688 "/tmp/workspace/.venv/bin/aider --no-git --message hello world".to_string(),
21689 ),
21690 ],
21691 );
21692 insert_conversation(
21693 aider_agent_id,
21694 "lexical-aider-fixture",
21695 "Aider Chat: aider",
21696 "/tmp/tests/fixtures/aider/.aider.chat.history.md",
21697 1_764_621_401_399,
21698 vec![
21699 (
21700 0,
21701 MessageRole::User,
21702 Some("user".to_string()),
21703 None,
21704 "/add src/main.rs".to_string(),
21705 ),
21706 (
21707 1,
21708 MessageRole::Agent,
21709 Some("assistant".to_string()),
21710 None,
21711 "Added src/main.rs to the chat.
21712
21713#### /add src/main.rs"
21714 .to_string(),
21715 ),
21716 (
21717 2,
21718 MessageRole::User,
21719 Some("user".to_string()),
21720 None,
21721 "Please refactor.".to_string(),
21722 ),
21723 (
21724 3,
21725 MessageRole::Agent,
21726 Some("assistant".to_string()),
21727 None,
21728 "Sure, here is the code.".to_string(),
21729 ),
21730 ],
21731 );
21732
21733 let mut streamed = Vec::new();
21734 storage
21735 .stream_messages_for_lexical_rebuild_between_conversation_ids(
21736 first_conversation_id.unwrap(),
21737 last_conversation_id.unwrap(),
21738 |row| {
21739 streamed.push((
21740 row.conversation_id,
21741 row.idx,
21742 row.role,
21743 row.author,
21744 row.created_at,
21745 row.content,
21746 ));
21747 Ok(())
21748 },
21749 )
21750 .unwrap();
21751
21752 assert_eq!(streamed, expected);
21753 }
21754
21755 #[test]
21756 fn lexical_rebuild_stream_queries_use_rowid_and_per_conversation_probes() {
21757 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
21758 use std::path::PathBuf;
21759
21760 let dir = TempDir::new().unwrap();
21761 let db_path = dir.path().join("agent_search.db");
21762 let storage = SqliteStorage::open(&db_path).unwrap();
21763
21764 let agent = Agent {
21765 id: None,
21766 slug: "claude_code".into(),
21767 name: "Claude Code".into(),
21768 version: None,
21769 kind: AgentKind::Cli,
21770 };
21771 let agent_id = storage.ensure_agent(&agent).unwrap();
21772
21773 for (external_id, base_ts) in [
21774 ("conv-1", 1_700_000_000_000_i64),
21775 ("conv-2", 1_700_000_001_000_i64),
21776 ] {
21777 let conversation = Conversation {
21778 id: None,
21779 agent_slug: "claude_code".into(),
21780 workspace: Some(PathBuf::from("/tmp/workspace")),
21781 external_id: Some(external_id.to_string()),
21782 title: Some("Lexical rebuild".into()),
21783 source_path: PathBuf::from(format!("/tmp/{external_id}.jsonl")),
21784 started_at: Some(base_ts),
21785 ended_at: Some(base_ts + 100),
21786 approx_tokens: None,
21787 metadata_json: serde_json::Value::Null,
21788 messages: vec![
21789 Message {
21790 id: None,
21791 idx: 0,
21792 role: MessageRole::User,
21793 author: Some("user".into()),
21794 created_at: Some(base_ts + 10),
21795 content: format!("{external_id}-first"),
21796 extra_json: serde_json::Value::Null,
21797 snippets: Vec::new(),
21798 },
21799 Message {
21800 id: None,
21801 idx: 1,
21802 role: MessageRole::Agent,
21803 author: Some("assistant".into()),
21804 created_at: Some(base_ts + 20),
21805 content: format!("{external_id}-second"),
21806 extra_json: serde_json::Value::Null,
21807 snippets: Vec::new(),
21808 },
21809 ],
21810 source_id: LOCAL_SOURCE_ID.into(),
21811 origin_host: None,
21812 };
21813 storage
21814 .insert_conversation_tree(agent_id, None, &conversation)
21815 .unwrap();
21816 }
21817
21818 let first_id: i64 = storage
21819 .conn
21820 .query_row_map(
21821 "SELECT id FROM conversations ORDER BY id LIMIT 1",
21822 fparams![],
21823 |row| row.get_typed(0),
21824 )
21825 .unwrap();
21826 let last_id: i64 = storage
21827 .conn
21828 .query_row_map(
21829 "SELECT id FROM conversations ORDER BY id DESC LIMIT 1",
21830 fparams![],
21831 |row| row.get_typed(0),
21832 )
21833 .unwrap();
21834
21835 let conversation_plan_details: Vec<String> = storage
21836 .conn
21837 .query_map_collect(
21838 "EXPLAIN QUERY PLAN SELECT id FROM conversations WHERE id >= ?1 AND id <= ?2 ORDER BY id ASC",
21839 fparams![first_id, last_id],
21840 |row| row.get_typed(3),
21841 )
21842 .unwrap();
21843 assert!(
21844 !conversation_plan_details
21845 .iter()
21846 .any(|detail| detail.contains("TEMP B-TREE")),
21847 "expected streamed lexical rebuild conversation listing to avoid sorter temp b-trees, got {conversation_plan_details:?}"
21848 );
21849
21850 let message_plan_details: Vec<String> = storage
21851 .conn
21852 .query_map_collect(
21853 "EXPLAIN QUERY PLAN SELECT id, idx, role, author, created_at, content FROM messages INDEXED BY sqlite_autoindex_messages_1 WHERE conversation_id = ?1 ORDER BY idx",
21854 fparams![first_id],
21855 |row| row.get_typed(3),
21856 )
21857 .unwrap();
21858 assert!(
21859 message_plan_details
21860 .iter()
21861 .any(|detail| detail.contains("sqlite_autoindex_messages_1")
21862 || detail.contains("idx_messages_conv_idx")),
21863 "expected per-conversation lexical rebuild fetch to use the conversation_id/idx index, got {message_plan_details:?}"
21864 );
21865 assert!(
21866 !message_plan_details
21867 .iter()
21868 .any(|detail| detail.contains("TEMP B-TREE")),
21869 "expected per-conversation lexical rebuild fetch to avoid sorter temp b-trees, got {message_plan_details:?}"
21870 );
21871 }
21872
21873 #[test]
21874 fn discover_historical_database_bundles_prefers_larger_archives_first() {
21875 let dir = TempDir::new().unwrap();
21876 let canonical_db = dir.path().join("agent_search.db");
21877 fs::write(&canonical_db, b"canonical").unwrap();
21878
21879 let smaller = dir.path().join("agent_search.corrupt.small");
21880 fs::write(&smaller, vec![0_u8; 32]).unwrap();
21881
21882 let backups_dir = dir.path().join("backups");
21883 fs::create_dir_all(&backups_dir).unwrap();
21884 let larger = backups_dir.join("agent_search.db.20260322T020200.bak");
21885 fs::write(&larger, vec![0_u8; 128]).unwrap();
21886
21887 let bundles = discover_historical_database_bundles(&canonical_db);
21888 let ordered_paths: Vec<PathBuf> =
21889 bundles.into_iter().map(|bundle| bundle.root_path).collect();
21890
21891 assert_eq!(ordered_paths, vec![larger, smaller]);
21892 }
21893
21894 #[test]
21895 fn discover_historical_database_bundles_prefers_queryable_direct_bundles_first() {
21896 let dir = TempDir::new().unwrap();
21897 let canonical_db = dir.path().join("agent_search.db");
21898 fs::write(&canonical_db, b"canonical").unwrap();
21899
21900 let larger_corrupt = dir.path().join("agent_search.corrupt.20260324_212907");
21901 fs::write(&larger_corrupt, vec![0_u8; 4096]).unwrap();
21902
21903 let backups_dir = dir.path().join("backups");
21904 fs::create_dir_all(&backups_dir).unwrap();
21905 let smaller_healthy = backups_dir.join("agent_search.db.20260322T020200.bak");
21906 let conn = FrankenConnection::open(smaller_healthy.to_string_lossy().into_owned()).unwrap();
21907 conn.execute_batch(
21908 "CREATE TABLE conversations (id INTEGER PRIMARY KEY, source_path TEXT);
21909 CREATE TABLE messages (
21910 id INTEGER PRIMARY KEY,
21911 conversation_id INTEGER NOT NULL,
21912 idx INTEGER NOT NULL,
21913 content TEXT
21914 );
21915 INSERT INTO conversations(id, source_path) VALUES (1, '/tmp/history.jsonl');
21916 INSERT INTO messages(id, conversation_id, idx, content)
21917 VALUES (1, 1, 0, 'seed');",
21918 )
21919 .unwrap();
21920 drop(conn);
21921
21922 let bundles = discover_historical_database_bundles(&canonical_db);
21923 let ordered_paths: Vec<PathBuf> = bundles
21924 .iter()
21925 .map(|bundle| bundle.root_path.clone())
21926 .collect();
21927
21928 assert_eq!(ordered_paths, vec![smaller_healthy, larger_corrupt]);
21929 assert!(bundles[0].supports_direct_readonly);
21930 assert!(!bundles[1].supports_direct_readonly);
21931 }
21932
21933 #[test]
21934 fn salvage_historical_databases_skips_unreadable_quarantined_bundles() {
21935 let dir = TempDir::new().unwrap();
21936 let canonical_db = dir.path().join("agent_search.db");
21937 let storage = SqliteStorage::open(&canonical_db).unwrap();
21938
21939 let quarantined = dir.path().join("agent_search.corrupt.20260324_212907");
21940 fs::write(&quarantined, b"not a sqlite database").unwrap();
21941
21942 let discovered: Vec<PathBuf> = discover_historical_database_bundles(&canonical_db)
21943 .into_iter()
21944 .map(|bundle| bundle.root_path)
21945 .collect();
21946 assert_eq!(discovered, vec![quarantined]);
21947
21948 let outcome = storage.salvage_historical_databases(&canonical_db).unwrap();
21949 assert_eq!(outcome.bundles_considered, 1);
21950 assert_eq!(outcome.bundles_imported, 0);
21951 assert_eq!(outcome.conversations_imported, 0);
21952 assert_eq!(outcome.messages_imported, 0);
21953 assert!(storage.list_conversations(10, 0).unwrap().is_empty());
21954 }
21955
21956 #[test]
21957 fn discover_historical_database_bundles_includes_repair_lab_and_snapshots_named_roots() {
21958 let dir = TempDir::new().unwrap();
21959 let canonical_db = dir.path().join("agent_search.db");
21960 fs::write(&canonical_db, b"canonical").unwrap();
21961
21962 let repair_lab_dir = dir.path().join("repair-lab").join("live-copy");
21963 fs::create_dir_all(&repair_lab_dir).unwrap();
21964 let repair_lab_db = repair_lab_dir.join("agent_search.db");
21965 fs::write(&repair_lab_db, vec![0_u8; 96]).unwrap();
21966 fs::write(
21967 repair_lab_dir.join("agent_search.rebuild-test.db"),
21968 vec![0_u8; 192],
21969 )
21970 .unwrap();
21971
21972 let snapshots_dir = dir.path().join("snapshots").join("20260324T013201Z");
21973 fs::create_dir_all(&snapshots_dir).unwrap();
21974 let snapshot_db = snapshots_dir.join("agent_search.db");
21975 fs::write(&snapshot_db, vec![0_u8; 64]).unwrap();
21976
21977 let bundles = discover_historical_database_bundles(&canonical_db);
21978 let ordered_paths: Vec<PathBuf> =
21979 bundles.into_iter().map(|bundle| bundle.root_path).collect();
21980
21981 assert!(ordered_paths.contains(&repair_lab_db));
21982 assert!(ordered_paths.contains(&snapshot_db));
21983 assert!(
21984 !ordered_paths
21985 .iter()
21986 .any(|path| path.file_name().and_then(|name| name.to_str())
21987 == Some("agent_search.rebuild-test.db"))
21988 );
21989 }
21990
21991 #[test]
21992 fn discover_historical_database_bundles_prefers_healthy_backup_over_replay_priority() {
21993 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
21994
21995 let dir = TempDir::new().unwrap();
21996 let canonical_db = dir.path().join("agent_search.db");
21997 fs::write(&canonical_db, b"canonical").unwrap();
21998
21999 let replay_dir = dir
22000 .path()
22001 .join("repair-lab")
22002 .join("replay-20260324T070101Z");
22003 fs::create_dir_all(&replay_dir).unwrap();
22004 let replay_db = replay_dir.join("agent_search.db");
22005 let replay_storage = SqliteStorage::open(&replay_db).unwrap();
22006 let agent = Agent {
22007 id: None,
22008 slug: "codex".into(),
22009 name: "Codex".into(),
22010 version: Some("0.2.3".into()),
22011 kind: AgentKind::Cli,
22012 };
22013 let agent_id = replay_storage.ensure_agent(&agent).unwrap();
22014 let conversation = Conversation {
22015 id: None,
22016 agent_slug: "codex".into(),
22017 workspace: Some(PathBuf::from("/tmp/workspace")),
22018 external_id: Some("replay-conv".into()),
22019 title: Some("Replay bundle".into()),
22020 source_path: PathBuf::from("/tmp/replay.jsonl"),
22021 started_at: Some(1_700_000_000_000),
22022 ended_at: Some(1_700_000_000_100),
22023 approx_tokens: Some(42),
22024 metadata_json: serde_json::Value::Null,
22025 messages: vec![Message {
22026 id: None,
22027 idx: 0,
22028 role: MessageRole::Agent,
22029 author: Some("assistant".into()),
22030 created_at: Some(1_700_000_000_050),
22031 content: "replay message".into(),
22032 extra_json: serde_json::Value::Null,
22033 snippets: Vec::new(),
22034 }],
22035 source_id: LOCAL_SOURCE_ID.into(),
22036 origin_host: None,
22037 };
22038 replay_storage
22039 .insert_conversation_tree(agent_id, None, &conversation)
22040 .unwrap();
22041 drop(replay_storage);
22042
22043 let duplicate_legacy_fts_sql = "CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, message_id UNINDEXED, tokenize='porter')";
22044 let replay_legacy = rusqlite_test_fixture_conn(&replay_db);
22045 replay_legacy
22046 .execute_batch(
22047 "UPDATE meta SET value = '13' WHERE key = 'schema_version';
22048 DELETE FROM _schema_migrations WHERE version = 14;
22049 PRAGMA writable_schema = ON;",
22050 )
22051 .unwrap();
22052 replay_legacy
22053 .execute(
22054 "DELETE FROM meta WHERE key = ?1",
22055 [FTS_FRANKEN_REBUILD_META_KEY],
22056 )
22057 .unwrap();
22058 replay_legacy
22059 .execute(
22060 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
22061 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
22062 [duplicate_legacy_fts_sql],
22063 )
22064 .unwrap();
22065 replay_legacy
22066 .execute_batch("PRAGMA writable_schema = OFF;")
22067 .unwrap();
22068 drop(replay_legacy);
22069
22070 let backups_dir = dir.path().join("backups");
22071 fs::create_dir_all(&backups_dir).unwrap();
22072 let clean_backup = backups_dir.join("agent_search.db.20260322T020200.bak");
22073 let clean_storage = SqliteStorage::open(&clean_backup).unwrap();
22074 let clean_agent_id = clean_storage.ensure_agent(&agent).unwrap();
22075 clean_storage
22076 .insert_conversation_tree(clean_agent_id, None, &conversation)
22077 .unwrap();
22078 drop(clean_storage);
22079
22080 let bundles = discover_historical_database_bundles(&canonical_db);
22081 let ordered_paths: Vec<PathBuf> = bundles
22082 .iter()
22083 .map(|bundle| bundle.root_path.clone())
22084 .collect();
22085
22086 assert_eq!(ordered_paths[0], clean_backup);
22087 assert_eq!(ordered_paths[1], replay_db);
22088 assert_eq!(
22089 bundles[0].probe.schema_version,
22090 Some(CURRENT_SCHEMA_VERSION)
22091 );
22092 assert_eq!(bundles[0].probe.fts_schema_rows, Some(0));
22098 assert!(!bundles[0].probe.fts_queryable);
22101 assert_eq!(bundles[1].probe.schema_version, Some(13));
22102 assert_eq!(bundles[1].probe.fts_schema_rows, Some(1));
22107 }
22108
22109 #[test]
22110 fn ensure_fts_consistency_via_rusqlite_catches_up_missing_rows() {
22111 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
22112
22113 let dir = TempDir::new().unwrap();
22114 let db_path = dir.path().join("fts-catchup.db");
22115 let storage = SqliteStorage::open(&db_path).unwrap();
22116 let agent = Agent {
22117 id: None,
22118 slug: "codex".into(),
22119 name: "Codex".into(),
22120 version: Some("0.2.3".into()),
22121 kind: AgentKind::Cli,
22122 };
22123 let agent_id = storage.ensure_agent(&agent).unwrap();
22124 let conversation = Conversation {
22125 id: None,
22126 agent_slug: "codex".into(),
22127 workspace: Some(PathBuf::from("/tmp/workspace")),
22128 external_id: Some("fts-catchup".into()),
22129 title: Some("FTS catchup".into()),
22130 source_path: PathBuf::from("/tmp/fts-catchup.jsonl"),
22131 started_at: Some(1_700_000_000_000),
22132 ended_at: Some(1_700_000_000_100),
22133 approx_tokens: Some(42),
22134 metadata_json: serde_json::Value::Null,
22135 messages: vec![Message {
22136 id: None,
22137 idx: 0,
22138 role: MessageRole::User,
22139 author: Some("user".into()),
22140 created_at: Some(1_700_000_000_050),
22141 content: "initial message".into(),
22142 extra_json: serde_json::Value::Null,
22143 snippets: Vec::new(),
22144 }],
22145 source_id: LOCAL_SOURCE_ID.into(),
22146 origin_host: None,
22147 };
22148 storage
22149 .insert_conversation_tree(agent_id, None, &conversation)
22150 .unwrap();
22151 drop(storage);
22152
22153 rebuild_fts_via_rusqlite(&db_path).unwrap();
22154
22155 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
22156 let conversation_id: i64 = conn
22157 .query_row_map("SELECT id FROM conversations LIMIT 1", fparams![], |row| {
22158 row.get_typed(0)
22159 })
22160 .unwrap();
22161 conn.execute_compat(
22162 "INSERT INTO messages(id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin)
22163 VALUES(2, ?1, 1, 'assistant', 'assistant', 1700000000060, 'authentication catchup', NULL, NULL)",
22164 fparams![conversation_id],
22165 )
22166 .unwrap();
22167 drop(conn);
22168
22169 let repair = ensure_fts_consistency_via_rusqlite(&db_path).unwrap();
22170 assert_eq!(
22171 repair,
22172 FtsConsistencyRepair::IncrementalCatchUp {
22173 inserted_rows: 1,
22174 total_rows: 2
22175 }
22176 );
22177
22178 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
22179 let auth_rows: i64 = conn
22180 .query_row_map(
22181 "SELECT COUNT(*) FROM fts_messages WHERE rowid = 2",
22182 fparams![],
22183 |row| row.get_typed(0),
22184 )
22185 .unwrap();
22186 assert_eq!(auth_rows, 1);
22187 }
22188
22189 #[test]
22190 fn rebuild_fts_via_rusqlite_cleans_duplicate_legacy_schema_rows() {
22191 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
22192
22193 let dir = TempDir::new().unwrap();
22194 let db_path = dir.path().join("fts-duplicate-rebuild.db");
22195
22196 let storage = SqliteStorage::open(&db_path).unwrap();
22197 let agent = Agent {
22198 id: None,
22199 slug: "codex".into(),
22200 name: "Codex".into(),
22201 version: Some("0.2.3".into()),
22202 kind: AgentKind::Cli,
22203 };
22204 let agent_id = storage.ensure_agent(&agent).unwrap();
22205 let conversation = Conversation {
22206 id: None,
22207 agent_slug: "codex".into(),
22208 workspace: Some(PathBuf::from("/ws")),
22209 external_id: Some("retro".into()),
22210 title: Some("retro".into()),
22211 source_path: PathBuf::from("/tmp/retro.jsonl"),
22212 started_at: Some(42),
22213 ended_at: Some(42),
22214 approx_tokens: None,
22215 metadata_json: serde_json::Value::Null,
22216 messages: vec![Message {
22217 id: None,
22218 idx: 0,
22219 role: MessageRole::User,
22220 author: None,
22221 created_at: Some(42),
22222 content: "retro investigation".into(),
22223 extra_json: serde_json::Value::Null,
22224 snippets: Vec::new(),
22225 }],
22226 source_id: LOCAL_SOURCE_ID.into(),
22227 origin_host: None,
22228 };
22229 storage
22230 .insert_conversation_tree(agent_id, None, &conversation)
22231 .unwrap();
22232 drop(storage);
22233 materialize_fresh_fts_schema_via_rusqlite(&db_path).unwrap();
22234
22235 let conn = rusqlite_test_fixture_conn(&db_path);
22236 conn.execute_batch("PRAGMA writable_schema = ON;").unwrap();
22237 conn.execute(
22238 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
22239 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
22240 ["CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, message_id UNINDEXED, tokenize='porter')"],
22241 )
22242 .unwrap();
22243 conn.execute_batch("PRAGMA writable_schema = OFF;").unwrap();
22244 let duplicate_rows: i64 = conn
22245 .query_row(
22246 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
22247 [],
22248 |row| row.get(0),
22249 )
22250 .unwrap();
22251 assert_eq!(duplicate_rows, 2);
22252 drop(conn);
22253
22254 let inserted = rebuild_fts_via_rusqlite(&db_path).unwrap();
22255 assert_eq!(inserted, 1);
22256
22257 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
22258 let schema_rows = franken_fts_schema_rows(&conn).unwrap();
22259 assert_eq!(
22260 schema_rows, 1,
22261 "DROP TABLE should leave one clean FTS schema"
22262 );
22263 let match_count: i64 = conn
22264 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
22265 row.get_typed(0)
22266 })
22267 .unwrap();
22268 assert_eq!(match_count, 1);
22269 }
22270
22271 #[test]
22276 fn ensure_agent_creates_new() {
22277 let dir = TempDir::new().unwrap();
22278 let db_path = dir.path().join("test.db");
22279 let storage = SqliteStorage::open(&db_path).unwrap();
22280
22281 let agent = Agent {
22282 id: None,
22283 slug: "test_agent".into(),
22284 name: "Test Agent".into(),
22285 version: Some("1.0".into()),
22286 kind: AgentKind::Cli,
22287 };
22288
22289 let id = storage.ensure_agent(&agent).unwrap();
22290 assert!(id > 0);
22291 }
22292
22293 #[test]
22294 fn ensure_agent_returns_existing_id() {
22295 let dir = TempDir::new().unwrap();
22296 let db_path = dir.path().join("test.db");
22297 let storage = SqliteStorage::open(&db_path).unwrap();
22298
22299 let agent = Agent {
22300 id: None,
22301 slug: "codex".into(),
22302 name: "Codex".into(),
22303 version: None,
22304 kind: AgentKind::Cli,
22305 };
22306
22307 let id1 = storage.ensure_agent(&agent).unwrap();
22308 let id2 = storage.ensure_agent(&agent).unwrap();
22309 assert_eq!(id1, id2);
22310 }
22311
22312 #[test]
22313 fn ensure_agent_unchanged_preserves_updated_at() {
22314 let dir = TempDir::new().unwrap();
22315 let db_path = dir.path().join("test.db");
22316 let storage = SqliteStorage::open(&db_path).unwrap();
22317
22318 let agent = Agent {
22319 id: None,
22320 slug: "codex".into(),
22321 name: "Codex".into(),
22322 version: Some("1.0".into()),
22323 kind: AgentKind::Cli,
22324 };
22325
22326 storage.ensure_agent(&agent).unwrap();
22327 let initial_updated_at: i64 = storage
22328 .conn
22329 .query_row_map(
22330 "SELECT updated_at FROM agents WHERE slug = ?1",
22331 fparams![agent.slug.as_str()],
22332 |row| row.get_typed(0),
22333 )
22334 .unwrap();
22335 std::thread::sleep(std::time::Duration::from_millis(5));
22336
22337 storage.ensure_agent(&agent).unwrap();
22338 let fetched_updated_at: i64 = storage
22339 .conn
22340 .query_row_map(
22341 "SELECT updated_at FROM agents WHERE slug = ?1",
22342 fparams![agent.slug.as_str()],
22343 |row| row.get_typed(0),
22344 )
22345 .unwrap();
22346
22347 assert_eq!(fetched_updated_at, initial_updated_at);
22348 }
22349
22350 #[test]
22351 fn ensure_agent_changed_metadata_updates_cached_slug() {
22352 let dir = TempDir::new().unwrap();
22353 let db_path = dir.path().join("test.db");
22354 let storage = SqliteStorage::open(&db_path).unwrap();
22355
22356 let mut agent = Agent {
22357 id: None,
22358 slug: "codex".into(),
22359 name: "Codex".into(),
22360 version: Some("1.0".into()),
22361 kind: AgentKind::Cli,
22362 };
22363
22364 let id1 = storage.ensure_agent(&agent).unwrap();
22365 agent.name = "Codex CLI".into();
22366 agent.version = Some("1.1".into());
22367 let id2 = storage.ensure_agent(&agent).unwrap();
22368
22369 let fetched: (String, Option<String>) = storage
22370 .conn
22371 .query_row_map(
22372 "SELECT name, version FROM agents WHERE slug = ?1",
22373 fparams![agent.slug.as_str()],
22374 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
22375 )
22376 .unwrap();
22377
22378 assert_eq!(id1, id2);
22379 assert_eq!(fetched, ("Codex CLI".into(), Some("1.1".into())));
22380 }
22381
22382 #[test]
22383 fn list_agents_returns_inserted() {
22384 let dir = TempDir::new().unwrap();
22385 let db_path = dir.path().join("test.db");
22386 let storage = SqliteStorage::open(&db_path).unwrap();
22387
22388 let agent = Agent {
22389 id: None,
22390 slug: "new_agent".into(),
22391 name: "New Agent".into(),
22392 version: None,
22393 kind: AgentKind::VsCode,
22394 };
22395 storage.ensure_agent(&agent).unwrap();
22396
22397 let agents = storage.list_agents().unwrap();
22398 assert!(agents.iter().any(|a| a.slug == "new_agent"));
22399 }
22400
22401 #[test]
22406 fn ensure_workspace_creates_new() {
22407 let dir = TempDir::new().unwrap();
22408 let db_path = dir.path().join("test.db");
22409 let storage = SqliteStorage::open(&db_path).unwrap();
22410
22411 let id = storage
22412 .ensure_workspace(Path::new("/home/user/project"), Some("My Project"))
22413 .unwrap();
22414 assert!(id > 0);
22415 }
22416
22417 #[test]
22418 fn ensure_workspace_returns_existing() {
22419 let dir = TempDir::new().unwrap();
22420 let db_path = dir.path().join("test.db");
22421 let storage = SqliteStorage::open(&db_path).unwrap();
22422
22423 let path = Path::new("/home/user/myproject");
22424 let id1 = storage.ensure_workspace(path, None).unwrap();
22425 let id2 = storage.ensure_workspace(path, None).unwrap();
22426 assert_eq!(id1, id2);
22427 }
22428
22429 #[test]
22430 fn ensure_workspace_changed_display_name_updates_cached_path() {
22431 let dir = TempDir::new().unwrap();
22432 let db_path = dir.path().join("test.db");
22433 let storage = SqliteStorage::open(&db_path).unwrap();
22434
22435 let path = Path::new("/home/user/myproject");
22436 let id1 = storage.ensure_workspace(path, Some("Before")).unwrap();
22437 let id2 = storage.ensure_workspace(path, Some("After")).unwrap();
22438
22439 let display_name: Option<String> = storage
22440 .conn
22441 .query_row_map(
22442 "SELECT display_name FROM workspaces WHERE path = ?1",
22443 fparams![path.to_string_lossy().as_ref()],
22444 |row| row.get_typed(0),
22445 )
22446 .unwrap();
22447
22448 assert_eq!(id1, id2);
22449 assert_eq!(display_name.as_deref(), Some("After"));
22450 }
22451
22452 #[test]
22453 fn list_workspaces_returns_inserted() {
22454 let dir = TempDir::new().unwrap();
22455 let db_path = dir.path().join("test.db");
22456 let storage = SqliteStorage::open(&db_path).unwrap();
22457
22458 storage
22459 .ensure_workspace(Path::new("/test/workspace"), Some("Test WS"))
22460 .unwrap();
22461
22462 let workspaces = storage.list_workspaces().unwrap();
22463 assert!(
22464 workspaces
22465 .iter()
22466 .any(|w| w.path.to_str() == Some("/test/workspace"))
22467 );
22468 }
22469
22470 #[test]
22475 fn upsert_source_creates_new() {
22476 let dir = TempDir::new().unwrap();
22477 let db_path = dir.path().join("test.db");
22478 let storage = SqliteStorage::open(&db_path).unwrap();
22479
22480 let source = Source {
22481 id: "test-laptop".into(),
22482 kind: SourceKind::Ssh,
22483 host_label: Some("test.local".into()),
22484 machine_id: Some("test-machine-id".into()),
22485 platform: None,
22486 config_json: None,
22487 created_at: Some(SqliteStorage::now_millis()),
22488 updated_at: None,
22489 };
22490
22491 storage.upsert_source(&source).unwrap();
22492 let fetched = storage.get_source("test-laptop").unwrap();
22493 assert!(fetched.is_some());
22494 assert_eq!(fetched.unwrap().host_label, Some("test.local".into()));
22495 }
22496
22497 #[test]
22498 fn upsert_source_updates_existing() {
22499 let dir = TempDir::new().unwrap();
22500 let db_path = dir.path().join("test.db");
22501 let storage = SqliteStorage::open(&db_path).unwrap();
22502
22503 let source1 = Source {
22504 id: "my-source".into(),
22505 kind: SourceKind::Ssh,
22506 host_label: Some("Original Label".into()),
22507 machine_id: None,
22508 platform: None,
22509 config_json: None,
22510 created_at: Some(SqliteStorage::now_millis()),
22511 updated_at: None,
22512 };
22513 storage.upsert_source(&source1).unwrap();
22514
22515 let source2 = Source {
22516 id: "my-source".into(),
22517 kind: SourceKind::Ssh,
22518 host_label: Some("Updated Label".into()),
22519 machine_id: None,
22520 platform: Some("linux".into()),
22521 config_json: None,
22522 created_at: Some(SqliteStorage::now_millis()),
22523 updated_at: Some(SqliteStorage::now_millis()),
22524 };
22525 storage.upsert_source(&source2).unwrap();
22526
22527 let fetched = storage.get_source("my-source").unwrap().unwrap();
22528 assert_eq!(fetched.host_label, Some("Updated Label".into()));
22529 assert!(fetched.platform.is_some());
22530 }
22531
22532 #[test]
22533 fn upsert_source_unchanged_preserves_updated_at() {
22534 let dir = TempDir::new().unwrap();
22535 let db_path = dir.path().join("test.db");
22536 let storage = SqliteStorage::open(&db_path).unwrap();
22537
22538 let source = Source {
22539 id: "stable-source".into(),
22540 kind: SourceKind::Ssh,
22541 host_label: Some("builder.local".into()),
22542 machine_id: None,
22543 platform: Some("linux".into()),
22544 config_json: Some(serde_json::json!({"role": "bench"})),
22545 created_at: None,
22546 updated_at: None,
22547 };
22548
22549 storage.upsert_source(&source).unwrap();
22550 let initial = storage.get_source("stable-source").unwrap().unwrap();
22551 std::thread::sleep(std::time::Duration::from_millis(5));
22552
22553 storage.upsert_source(&source).unwrap();
22554 let fetched = storage.get_source("stable-source").unwrap().unwrap();
22555
22556 assert_eq!(fetched.created_at, initial.created_at);
22557 assert_eq!(fetched.updated_at, initial.updated_at);
22558 assert_eq!(fetched.host_label, initial.host_label);
22559 assert_eq!(fetched.platform, initial.platform);
22560 assert_eq!(fetched.config_json, initial.config_json);
22561 }
22562
22563 #[test]
22564 fn ensure_source_for_conversation_recreates_remote_source_after_delete() {
22565 let dir = TempDir::new().unwrap();
22566 let db_path = dir.path().join("test.db");
22567 let storage = SqliteStorage::open(&db_path).unwrap();
22568
22569 let conversation = Conversation {
22570 id: None,
22571 agent_slug: "codex".into(),
22572 workspace: Some(PathBuf::from("/ws/cache-recreate")),
22573 external_id: Some("cache-recreate".into()),
22574 title: Some("Cache Recreate".into()),
22575 source_path: PathBuf::from("/log/cache-recreate.jsonl"),
22576 started_at: Some(1_700_000_000_000),
22577 ended_at: Some(1_700_000_000_001),
22578 approx_tokens: Some(16),
22579 metadata_json: serde_json::json!({}),
22580 messages: vec![Message {
22581 id: None,
22582 idx: 0,
22583 role: MessageRole::User,
22584 author: Some("tester".into()),
22585 created_at: Some(1_700_000_000_000),
22586 content: "cache recreate".into(),
22587 extra_json: serde_json::json!({}),
22588 snippets: Vec::new(),
22589 }],
22590 source_id: "cache-remote-source".into(),
22591 origin_host: Some("builder-cache".into()),
22592 };
22593
22594 storage
22595 .ensure_source_for_conversation(&conversation)
22596 .unwrap();
22597 assert!(storage.get_source("cache-remote-source").unwrap().is_some());
22598
22599 let deleted = storage.delete_source("cache-remote-source", false).unwrap();
22600 assert!(deleted);
22601 assert!(storage.get_source("cache-remote-source").unwrap().is_none());
22602
22603 storage
22604 .ensure_source_for_conversation(&conversation)
22605 .unwrap();
22606 let recreated = storage.get_source("cache-remote-source").unwrap();
22607 assert!(recreated.is_some());
22608 assert_eq!(
22609 recreated.unwrap().host_label.as_deref(),
22610 Some("builder-cache")
22611 );
22612 }
22613
22614 #[test]
22615 fn delete_source_removes_entry() {
22616 let dir = TempDir::new().unwrap();
22617 let db_path = dir.path().join("test.db");
22618 let storage = SqliteStorage::open(&db_path).unwrap();
22619
22620 let source = Source {
22621 id: "to-delete".into(),
22622 kind: SourceKind::Local,
22623 host_label: None,
22624 machine_id: None,
22625 platform: None,
22626 config_json: None,
22627 created_at: Some(SqliteStorage::now_millis()),
22628 updated_at: None,
22629 };
22630 storage.upsert_source(&source).unwrap();
22631
22632 let deleted = storage.delete_source("to-delete", false).unwrap();
22633 assert!(deleted);
22634
22635 let fetched = storage.get_source("to-delete").unwrap();
22636 assert!(fetched.is_none());
22637 }
22638
22639 #[test]
22640 fn delete_source_cannot_delete_local() {
22641 let dir = TempDir::new().unwrap();
22642 let db_path = dir.path().join("test.db");
22643 let storage = SqliteStorage::open(&db_path).unwrap();
22644
22645 let result = storage.delete_source(LOCAL_SOURCE_ID, false);
22646 assert!(result.is_err());
22647 }
22648
22649 #[test]
22650 fn list_sources_includes_local() {
22651 let dir = TempDir::new().unwrap();
22652 let db_path = dir.path().join("test.db");
22653 let storage = SqliteStorage::open(&db_path).unwrap();
22654
22655 let sources = storage.list_sources().unwrap();
22656 assert!(sources.iter().any(|s| s.id == LOCAL_SOURCE_ID));
22657 }
22658
22659 #[test]
22660 fn insert_conversation_tree_blank_local_source_normalizes_to_local_id() {
22661 let dir = TempDir::new().unwrap();
22662 let db_path = dir.path().join("test.db");
22663 let storage = SqliteStorage::open(&db_path).unwrap();
22664
22665 let agent_id = storage
22666 .ensure_agent(&Agent {
22667 id: None,
22668 slug: "codex".into(),
22669 name: "Codex".into(),
22670 version: None,
22671 kind: AgentKind::Cli,
22672 })
22673 .unwrap();
22674
22675 let conversation = Conversation {
22676 id: None,
22677 agent_slug: "codex".into(),
22678 workspace: None,
22679 external_id: Some("blank-local-source".into()),
22680 title: Some("Blank local source".into()),
22681 source_path: dir.path().join("blank-local.jsonl"),
22682 started_at: Some(1_700_000_000_000),
22683 ended_at: Some(1_700_000_000_001),
22684 approx_tokens: None,
22685 metadata_json: serde_json::Value::Null,
22686 messages: vec![Message {
22687 id: None,
22688 idx: 0,
22689 role: MessageRole::User,
22690 author: None,
22691 created_at: Some(1_700_000_000_000),
22692 content: "hello".into(),
22693 extra_json: serde_json::Value::Null,
22694 snippets: Vec::new(),
22695 }],
22696 source_id: " ".into(),
22697 origin_host: None,
22698 };
22699
22700 storage
22701 .insert_conversation_tree(agent_id, None, &conversation)
22702 .unwrap();
22703
22704 assert!(storage.get_source(" ").unwrap().is_none());
22705 let source = storage
22706 .get_source(LOCAL_SOURCE_ID)
22707 .unwrap()
22708 .expect("local source row should exist");
22709 assert_eq!(source.kind, SourceKind::Local);
22710 assert_eq!(source.host_label, None);
22711
22712 let conversations = storage.list_conversations(10, 0).unwrap();
22713 assert_eq!(conversations.len(), 1);
22714 assert_eq!(conversations[0].source_id, LOCAL_SOURCE_ID);
22715 assert_eq!(conversations[0].origin_host, None);
22716 }
22717
22718 #[test]
22719 fn repeated_local_inserts_do_not_touch_bootstrap_source_row() {
22720 let dir = TempDir::new().unwrap();
22721 let db_path = dir.path().join("test.db");
22722 let storage = SqliteStorage::open(&db_path).unwrap();
22723
22724 let agent_id = storage
22725 .ensure_agent(&Agent {
22726 id: None,
22727 slug: "codex".into(),
22728 name: "Codex".into(),
22729 version: None,
22730 kind: AgentKind::Cli,
22731 })
22732 .unwrap();
22733
22734 let bootstrap_updated_at: i64 = storage
22735 .conn
22736 .query_row_map(
22737 "SELECT updated_at FROM sources WHERE id = ?1",
22738 fparams![LOCAL_SOURCE_ID],
22739 |row| row.get_typed(0),
22740 )
22741 .unwrap();
22742
22743 let make_conversation = |external_id: &str, suffix: &str| Conversation {
22744 id: None,
22745 agent_slug: "codex".into(),
22746 workspace: None,
22747 external_id: Some(external_id.into()),
22748 title: Some(format!("Local source {suffix}")),
22749 source_path: dir.path().join(format!("local-{suffix}.jsonl")),
22750 started_at: Some(1_700_000_000_000),
22751 ended_at: Some(1_700_000_000_001),
22752 approx_tokens: None,
22753 metadata_json: serde_json::Value::Null,
22754 messages: vec![Message {
22755 id: None,
22756 idx: 0,
22757 role: MessageRole::User,
22758 author: None,
22759 created_at: Some(1_700_000_000_000),
22760 content: format!("hello-{suffix}"),
22761 extra_json: serde_json::Value::Null,
22762 snippets: Vec::new(),
22763 }],
22764 source_id: LOCAL_SOURCE_ID.into(),
22765 origin_host: None,
22766 };
22767
22768 std::thread::sleep(std::time::Duration::from_millis(5));
22769 storage
22770 .insert_conversation_tree(agent_id, None, &make_conversation("local-source-1", "one"))
22771 .unwrap();
22772 let after_first_insert: i64 = storage
22773 .conn
22774 .query_row_map(
22775 "SELECT updated_at FROM sources WHERE id = ?1",
22776 fparams![LOCAL_SOURCE_ID],
22777 |row| row.get_typed(0),
22778 )
22779 .unwrap();
22780
22781 std::thread::sleep(std::time::Duration::from_millis(5));
22782 storage
22783 .insert_conversation_tree(agent_id, None, &make_conversation("local-source-2", "two"))
22784 .unwrap();
22785 let after_second_insert: i64 = storage
22786 .conn
22787 .query_row_map(
22788 "SELECT updated_at FROM sources WHERE id = ?1",
22789 fparams![LOCAL_SOURCE_ID],
22790 |row| row.get_typed(0),
22791 )
22792 .unwrap();
22793
22794 assert_eq!(after_first_insert, bootstrap_updated_at);
22795 assert_eq!(after_second_insert, bootstrap_updated_at);
22796 }
22797
22798 #[test]
22799 fn insert_conversation_tree_blank_remote_source_normalizes_to_origin_host() {
22800 let dir = TempDir::new().unwrap();
22801 let db_path = dir.path().join("test.db");
22802 let storage = SqliteStorage::open(&db_path).unwrap();
22803
22804 let agent_id = storage
22805 .ensure_agent(&Agent {
22806 id: None,
22807 slug: "codex".into(),
22808 name: "Codex".into(),
22809 version: None,
22810 kind: AgentKind::Cli,
22811 })
22812 .unwrap();
22813
22814 let conversation = Conversation {
22815 id: None,
22816 agent_slug: "codex".into(),
22817 workspace: None,
22818 external_id: Some("blank-remote-source".into()),
22819 title: Some("Blank remote source".into()),
22820 source_path: dir.path().join("blank-remote.jsonl"),
22821 started_at: Some(1_700_000_000_000),
22822 ended_at: Some(1_700_000_000_001),
22823 approx_tokens: None,
22824 metadata_json: serde_json::Value::Null,
22825 messages: vec![Message {
22826 id: None,
22827 idx: 0,
22828 role: MessageRole::User,
22829 author: None,
22830 created_at: Some(1_700_000_000_000),
22831 content: "hello".into(),
22832 extra_json: serde_json::Value::Null,
22833 snippets: Vec::new(),
22834 }],
22835 source_id: " ".into(),
22836 origin_host: Some("user@work-laptop".into()),
22837 };
22838
22839 storage
22840 .insert_conversation_tree(agent_id, None, &conversation)
22841 .unwrap();
22842
22843 assert!(storage.get_source(" ").unwrap().is_none());
22844 let source = storage
22845 .get_source("user@work-laptop")
22846 .unwrap()
22847 .expect("normalized remote source row should exist");
22848 assert_eq!(source.kind, SourceKind::Ssh);
22849 assert_eq!(source.host_label.as_deref(), Some("user@work-laptop"));
22850
22851 let conversations = storage.list_conversations(10, 0).unwrap();
22852 assert_eq!(conversations.len(), 1);
22853 assert_eq!(conversations[0].source_id, "user@work-laptop");
22854 assert_eq!(
22855 conversations[0].origin_host.as_deref(),
22856 Some("user@work-laptop")
22857 );
22858 }
22859
22860 #[test]
22861 fn insert_conversations_batched_normalizes_host_only_remote_source_id() {
22862 let dir = TempDir::new().unwrap();
22863 let db_path = dir.path().join("test.db");
22864 let storage = SqliteStorage::open(&db_path).unwrap();
22865
22866 let agent_id = storage
22867 .ensure_agent(&Agent {
22868 id: None,
22869 slug: "codex".into(),
22870 name: "Codex".into(),
22871 version: None,
22872 kind: AgentKind::Cli,
22873 })
22874 .unwrap();
22875
22876 let conversation = Conversation {
22877 id: None,
22878 agent_slug: "codex".into(),
22879 workspace: None,
22880 external_id: Some("batched-blank-remote-source".into()),
22881 title: Some("Batched blank remote source".into()),
22882 source_path: dir.path().join("batched-blank-remote.jsonl"),
22883 started_at: Some(1_700_000_000_000),
22884 ended_at: Some(1_700_000_000_001),
22885 approx_tokens: None,
22886 metadata_json: serde_json::Value::Null,
22887 messages: vec![Message {
22888 id: None,
22889 idx: 0,
22890 role: MessageRole::User,
22891 author: None,
22892 created_at: Some(1_700_000_000_000),
22893 content: "hello".into(),
22894 extra_json: serde_json::Value::Null,
22895 snippets: Vec::new(),
22896 }],
22897 source_id: " ".into(),
22898 origin_host: Some("user@batch-host".into()),
22899 };
22900
22901 storage
22902 .insert_conversations_batched(&[(agent_id, None, &conversation)])
22903 .unwrap();
22904
22905 assert!(storage.get_source(" ").unwrap().is_none());
22906 let source = storage
22907 .get_source("user@batch-host")
22908 .unwrap()
22909 .expect("normalized batched remote source row should exist");
22910 assert_eq!(source.kind, SourceKind::Ssh);
22911 assert_eq!(source.host_label.as_deref(), Some("user@batch-host"));
22912
22913 let conversations = storage.list_conversations(10, 0).unwrap();
22914 assert_eq!(conversations.len(), 1);
22915 assert_eq!(conversations[0].source_id, "user@batch-host");
22916 assert_eq!(
22917 conversations[0].origin_host.as_deref(),
22918 Some("user@batch-host")
22919 );
22920 }
22921
22922 #[test]
22923 fn get_source_ids_excludes_local() {
22924 let dir = TempDir::new().unwrap();
22925 let db_path = dir.path().join("test.db");
22926 let storage = SqliteStorage::open(&db_path).unwrap();
22927
22928 let source = Source {
22930 id: "remote-1".into(),
22931 kind: SourceKind::Ssh,
22932 host_label: Some("server".into()),
22933 machine_id: None,
22934 platform: None,
22935 config_json: None,
22936 created_at: Some(SqliteStorage::now_millis()),
22937 updated_at: None,
22938 };
22939 storage.upsert_source(&source).unwrap();
22940
22941 let ids = storage.get_source_ids().unwrap();
22942 assert!(!ids.contains(&LOCAL_SOURCE_ID.to_string()));
22943 assert!(ids.contains(&"remote-1".to_string()));
22944 }
22945
22946 #[test]
22951 fn get_last_scan_ts_returns_none_initially() {
22952 let dir = TempDir::new().unwrap();
22953 let db_path = dir.path().join("test.db");
22954 let storage = SqliteStorage::open(&db_path).unwrap();
22955
22956 let ts = storage.get_last_scan_ts().unwrap();
22957 assert!(ts.is_none());
22958 }
22959
22960 #[test]
22961 fn set_and_get_last_scan_ts() {
22962 let dir = TempDir::new().unwrap();
22963 let db_path = dir.path().join("test.db");
22964 let storage = SqliteStorage::open(&db_path).unwrap();
22965
22966 let expected_ts = 1700000000000_i64;
22967 storage.set_last_scan_ts(expected_ts).unwrap();
22968
22969 let actual_ts = storage.get_last_scan_ts().unwrap();
22970 assert_eq!(actual_ts, Some(expected_ts));
22971 }
22972
22973 #[test]
22978 fn now_millis_returns_reasonable_value() {
22979 let ts = SqliteStorage::now_millis();
22980 assert!(ts > 1577836800000);
22982 assert!(ts < 4102444800000);
22984 }
22985
22986 #[test]
22991 fn msgpack_roundtrip_basic_object() {
22992 let value = serde_json::json!({
22993 "key": "value",
22994 "number": 42,
22995 "nested": { "inner": true }
22996 });
22997
22998 let bytes = serialize_json_to_msgpack(&value).expect("should serialize");
22999 let recovered = deserialize_msgpack_to_json(&bytes);
23000
23001 assert_eq!(value, recovered);
23002 }
23003
23004 #[test]
23005 fn msgpack_returns_none_for_null() {
23006 let value = serde_json::Value::Null;
23007 assert!(serialize_json_to_msgpack(&value).is_none());
23008 }
23009
23010 #[test]
23011 fn message_insert_stores_null_extra_json_as_sql_null() {
23012 let dir = TempDir::new().unwrap();
23013 let db_path = dir.path().join("test.db");
23014 let storage = SqliteStorage::open(&db_path).unwrap();
23015 let agent_id = storage
23016 .ensure_agent(&Agent {
23017 id: None,
23018 slug: "codex".into(),
23019 name: "Codex".into(),
23020 version: None,
23021 kind: AgentKind::Cli,
23022 })
23023 .unwrap();
23024 let conversation = Conversation {
23025 id: None,
23026 agent_slug: "codex".into(),
23027 workspace: None,
23028 external_id: Some("null-extra-json".into()),
23029 title: Some("Null extra_json".into()),
23030 source_path: PathBuf::from("/tmp/null-extra-json.jsonl"),
23031 started_at: Some(1_700_000_000_000),
23032 ended_at: Some(1_700_000_000_001),
23033 approx_tokens: None,
23034 metadata_json: serde_json::Value::Null,
23035 messages: vec![Message {
23036 id: None,
23037 idx: 0,
23038 role: MessageRole::User,
23039 author: None,
23040 created_at: Some(1_700_000_000_000),
23041 content: "null metadata message".into(),
23042 extra_json: serde_json::Value::Null,
23043 snippets: Vec::new(),
23044 }],
23045 source_id: LOCAL_SOURCE_ID.into(),
23046 origin_host: None,
23047 };
23048
23049 let conversation_id = storage
23050 .insert_conversation_tree(agent_id, None, &conversation)
23051 .unwrap()
23052 .conversation_id;
23053
23054 let (extra_json, extra_bin): (Option<String>, Option<Vec<u8>>) = storage
23055 .conn
23056 .query_row_map(
23057 "SELECT extra_json, extra_bin FROM messages WHERE conversation_id = ?1",
23058 fparams![conversation_id],
23059 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
23060 )
23061 .unwrap();
23062 assert!(extra_json.is_none());
23063 assert!(extra_bin.is_none());
23064
23065 let stored = storage.fetch_messages(conversation_id).unwrap();
23066 assert!(stored[0].extra_json.is_null());
23067 }
23068
23069 #[test]
23070 fn message_insert_stores_nonempty_extra_json_as_msgpack_only() {
23071 let dir = TempDir::new().unwrap();
23072 let db_path = dir.path().join("test.db");
23073 let storage = SqliteStorage::open(&db_path).unwrap();
23074 let agent_id = storage
23075 .ensure_agent(&Agent {
23076 id: None,
23077 slug: "codex".into(),
23078 name: "Codex".into(),
23079 version: None,
23080 kind: AgentKind::Cli,
23081 })
23082 .unwrap();
23083 let extra_json = serde_json::json!({ "idx": 7, "kind": "profile" });
23084 let conversation = Conversation {
23085 id: None,
23086 agent_slug: "codex".into(),
23087 workspace: None,
23088 external_id: Some("msgpack-extra-json".into()),
23089 title: Some("MessagePack extra_json".into()),
23090 source_path: PathBuf::from("/tmp/msgpack-extra-json.jsonl"),
23091 started_at: Some(1_700_000_000_000),
23092 ended_at: Some(1_700_000_000_001),
23093 approx_tokens: None,
23094 metadata_json: serde_json::Value::Null,
23095 messages: vec![Message {
23096 id: None,
23097 idx: 0,
23098 role: MessageRole::User,
23099 author: None,
23100 created_at: Some(1_700_000_000_000),
23101 content: "msgpack metadata message".into(),
23102 extra_json: extra_json.clone(),
23103 snippets: Vec::new(),
23104 }],
23105 source_id: LOCAL_SOURCE_ID.into(),
23106 origin_host: None,
23107 };
23108
23109 let conversation_id = storage
23110 .insert_conversation_tree(agent_id, None, &conversation)
23111 .unwrap()
23112 .conversation_id;
23113
23114 let (extra_json_text, extra_bin): (Option<String>, Option<Vec<u8>>) = storage
23115 .conn
23116 .query_row_map(
23117 "SELECT extra_json, extra_bin FROM messages WHERE conversation_id = ?1",
23118 fparams![conversation_id],
23119 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
23120 )
23121 .unwrap();
23122 assert!(extra_json_text.is_none());
23123 assert!(extra_bin.is_some());
23124
23125 let stored = storage.fetch_messages(conversation_id).unwrap();
23126 assert_eq!(stored[0].extra_json, extra_json);
23127 }
23128
23129 #[test]
23130 fn conversation_insert_preserves_null_metadata_json_as_json_null() {
23131 let dir = TempDir::new().unwrap();
23132 let db_path = dir.path().join("test.db");
23133 let storage = SqliteStorage::open(&db_path).unwrap();
23134 let agent_id = storage
23135 .ensure_agent(&Agent {
23136 id: None,
23137 slug: "codex".into(),
23138 name: "Codex".into(),
23139 version: None,
23140 kind: AgentKind::Cli,
23141 })
23142 .unwrap();
23143 let conversation = Conversation {
23144 id: None,
23145 agent_slug: "codex".into(),
23146 workspace: None,
23147 external_id: Some("null-conversation-metadata".into()),
23148 title: Some("Null conversation metadata".into()),
23149 source_path: PathBuf::from("/tmp/null-conversation-metadata.jsonl"),
23150 started_at: Some(1_700_000_000_000),
23151 ended_at: Some(1_700_000_000_001),
23152 approx_tokens: None,
23153 metadata_json: serde_json::Value::Null,
23154 messages: vec![Message {
23155 id: None,
23156 idx: 0,
23157 role: MessageRole::User,
23158 author: None,
23159 created_at: Some(1_700_000_000_000),
23160 content: "null conversation metadata message".into(),
23161 extra_json: serde_json::Value::Null,
23162 snippets: Vec::new(),
23163 }],
23164 source_id: LOCAL_SOURCE_ID.into(),
23165 origin_host: None,
23166 };
23167
23168 storage
23169 .insert_conversation_tree(agent_id, None, &conversation)
23170 .unwrap();
23171
23172 let (metadata_json, metadata_bin): (Option<String>, Option<Vec<u8>>) = storage
23173 .conn
23174 .query_row_map(
23175 "SELECT metadata_json, metadata_bin FROM conversations WHERE external_id = ?1",
23176 fparams!["null-conversation-metadata"],
23177 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
23178 )
23179 .unwrap();
23180 assert_eq!(metadata_json.as_deref(), Some("null"));
23181 assert!(metadata_bin.is_none());
23182
23183 let listed = storage.list_conversations(10, 0).unwrap();
23184 assert!(listed[0].metadata_json.is_null());
23185 }
23186
23187 #[test]
23188 fn conversation_insert_stores_nonempty_metadata_as_msgpack_only() {
23189 let dir = TempDir::new().unwrap();
23190 let db_path = dir.path().join("test.db");
23191 let storage = SqliteStorage::open(&db_path).unwrap();
23192 let agent_id = storage
23193 .ensure_agent(&Agent {
23194 id: None,
23195 slug: "codex".into(),
23196 name: "Codex".into(),
23197 version: None,
23198 kind: AgentKind::Cli,
23199 })
23200 .unwrap();
23201 let metadata_json = serde_json::json!({ "bench": true, "source": "profile" });
23202 let conversation = Conversation {
23203 id: None,
23204 agent_slug: "codex".into(),
23205 workspace: None,
23206 external_id: Some("msgpack-conversation-metadata".into()),
23207 title: Some("MessagePack conversation metadata".into()),
23208 source_path: PathBuf::from("/tmp/msgpack-conversation-metadata.jsonl"),
23209 started_at: Some(1_700_000_000_000),
23210 ended_at: Some(1_700_000_000_001),
23211 approx_tokens: None,
23212 metadata_json: metadata_json.clone(),
23213 messages: vec![Message {
23214 id: None,
23215 idx: 0,
23216 role: MessageRole::User,
23217 author: None,
23218 created_at: Some(1_700_000_000_000),
23219 content: "msgpack conversation metadata message".into(),
23220 extra_json: serde_json::Value::Null,
23221 snippets: Vec::new(),
23222 }],
23223 source_id: LOCAL_SOURCE_ID.into(),
23224 origin_host: None,
23225 };
23226
23227 storage
23228 .insert_conversation_tree(agent_id, None, &conversation)
23229 .unwrap();
23230
23231 let (metadata_text, metadata_bin): (Option<String>, Option<Vec<u8>>) = storage
23232 .conn
23233 .query_row_map(
23234 "SELECT metadata_json, metadata_bin FROM conversations WHERE external_id = ?1",
23235 fparams!["msgpack-conversation-metadata"],
23236 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
23237 )
23238 .unwrap();
23239 assert!(metadata_text.is_none());
23240 assert!(metadata_bin.is_some());
23241
23242 let listed = storage.list_conversations(10, 0).unwrap();
23243 assert_eq!(listed[0].metadata_json, metadata_json);
23244 }
23245
23246 #[test]
23247 fn msgpack_returns_none_for_empty_object() {
23248 let value = serde_json::json!({});
23249 assert!(serialize_json_to_msgpack(&value).is_none());
23250 }
23251
23252 #[test]
23253 fn parse_historical_json_column_preserves_large_payloads_as_raw_json() {
23254 let raw = format!("{{\"blob\":\"{}\"}}", "x".repeat(1_000_000));
23255
23256 let value = parse_historical_json_column(Some(raw.clone()));
23257
23258 assert_eq!(historical_raw_json(&value), Some(raw.as_str()));
23259 assert_eq!(json_value_size_hint(&value), raw.len());
23260 }
23261
23262 #[test]
23263 fn parse_historical_json_column_preserves_small_payloads_as_raw_json() {
23264 let raw = String::from("{\"ok\":true,\"n\":1}");
23265
23266 let value = parse_historical_json_column(Some(raw.clone()));
23267
23268 assert_eq!(historical_raw_json(&value), Some(raw.as_str()));
23269 }
23270
23271 #[test]
23272 fn msgpack_serializes_non_empty_array() {
23273 let value = serde_json::json!([1, 2, 3]);
23274 let bytes = serialize_json_to_msgpack(&value).expect("should serialize array");
23275 let recovered = deserialize_msgpack_to_json(&bytes);
23276 assert_eq!(value, recovered);
23277 }
23278
23279 #[test]
23280 fn msgpack_smaller_than_json() {
23281 let value = serde_json::json!({
23282 "field_name_one": "some_value",
23283 "field_name_two": 123456,
23284 "field_name_three": [1, 2, 3, 4, 5],
23285 "field_name_four": { "nested": true }
23286 });
23287
23288 let json_bytes = serde_json::to_vec(&value).unwrap();
23289 let msgpack_bytes = serialize_json_to_msgpack(&value).unwrap();
23290
23291 assert!(
23293 msgpack_bytes.len() < json_bytes.len(),
23294 "MessagePack ({} bytes) should be smaller than JSON ({} bytes)",
23295 msgpack_bytes.len(),
23296 json_bytes.len()
23297 );
23298 }
23299
23300 #[test]
23301 fn migration_v7_adds_binary_columns() {
23302 let dir = TempDir::new().unwrap();
23303 let db_path = dir.path().join("test.db");
23304 let storage = SqliteStorage::open(&db_path).unwrap();
23305
23306 let has_metadata_bin = storage
23308 .raw()
23309 .query("PRAGMA table_info(conversations)")
23310 .unwrap()
23311 .iter()
23312 .any(|row| row.get_typed::<String>(1).unwrap() == "metadata_bin");
23313 assert!(
23314 has_metadata_bin,
23315 "conversations should have metadata_bin column"
23316 );
23317
23318 let has_extra_bin = storage
23320 .raw()
23321 .query("PRAGMA table_info(messages)")
23322 .unwrap()
23323 .iter()
23324 .any(|row| row.get_typed::<String>(1).unwrap() == "extra_bin");
23325 assert!(has_extra_bin, "messages should have extra_bin column");
23326 }
23327
23328 #[test]
23329 fn insert_conversation_tree_rehydrates_append_tail_state_cache_after_manual_clear() {
23330 let dir = TempDir::new().unwrap();
23331 let db_path = dir.path().join("append-tail-state-cache.db");
23332 let storage = SqliteStorage::open(&db_path).unwrap();
23333 let agent_id = storage
23334 .ensure_agent(&Agent {
23335 id: None,
23336 slug: "codex".into(),
23337 name: "Codex".into(),
23338 version: None,
23339 kind: AgentKind::Cli,
23340 })
23341 .unwrap();
23342 let workspace = PathBuf::from("/ws/profiled-append-remote");
23343 let workspace_id = storage.ensure_workspace(&workspace, None).unwrap();
23344
23345 let initial = make_profiled_append_remote_merge_conversation(11, 5);
23346 let insert_outcome = storage
23347 .insert_conversation_tree(agent_id, Some(workspace_id), &initial)
23348 .unwrap();
23349 let conversation_id = insert_outcome.conversation_id;
23350
23351 let initial_tail: (Option<i64>, Option<i64>, Option<i64>) = storage
23352 .raw()
23353 .query_row_map(
23354 "SELECT ended_at, last_message_idx, last_message_created_at
23355 FROM conversation_tail_state
23356 WHERE conversation_id = ?1",
23357 fparams![conversation_id],
23358 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
23359 )
23360 .unwrap();
23361 assert_eq!(initial_tail, (Some(111_005), Some(4), Some(111_004)));
23362
23363 storage
23364 .raw()
23365 .execute_compat(
23366 "UPDATE conversations SET ended_at = ?1 WHERE id = ?2",
23367 fparams![111_999_i64, conversation_id],
23368 )
23369 .unwrap();
23370 storage
23371 .raw()
23372 .execute_compat(
23373 "DELETE FROM conversation_tail_state WHERE conversation_id = ?1",
23374 fparams![conversation_id],
23375 )
23376 .unwrap();
23377
23378 let appended = make_profiled_append_remote_merge_conversation(11, 10);
23379 let append_outcome = storage
23380 .insert_conversation_tree(agent_id, Some(workspace_id), &appended)
23381 .unwrap();
23382 assert_eq!(append_outcome.inserted_indices, vec![5, 6, 7, 8, 9]);
23383
23384 let final_tail: (Option<i64>, Option<i64>, Option<i64>) = storage
23385 .raw()
23386 .query_row_map(
23387 "SELECT ended_at, last_message_idx, last_message_created_at
23388 FROM conversation_tail_state
23389 WHERE conversation_id = ?1",
23390 fparams![conversation_id],
23391 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
23392 )
23393 .unwrap();
23394 assert_eq!(final_tail, (Some(111_999), Some(9), Some(111_009)));
23395 }
23396
23397 #[test]
23398 fn msgpack_deserialize_empty_returns_default() {
23399 let recovered = deserialize_msgpack_to_json(&[]);
23400 assert_eq!(recovered, serde_json::Value::Object(serde_json::Map::new()));
23401 }
23402
23403 #[test]
23404 fn msgpack_deserialize_garbage_returns_default() {
23405 let recovered = deserialize_msgpack_to_json(&[0x85]);
23408 assert_eq!(recovered, serde_json::Value::Object(serde_json::Map::new()));
23409 }
23410
23411 #[test]
23412 fn stats_aggregator_collects_and_expands() {
23413 let mut agg = StatsAggregator::new();
23414 assert!(agg.is_empty());
23415
23416 agg.record("claude", "local", 100, 5, 500);
23419 agg.record("codex", "local", 100, 3, 300);
23421 agg.record("claude", "local", 101, 2, 200);
23423
23424 assert!(!agg.is_empty());
23425 assert_eq!(agg.raw_entry_count(), 3);
23426
23427 let entries = agg.expand();
23428 assert_eq!(entries.len(), 10);
23456
23457 let day100_all = entries
23459 .iter()
23460 .find(|(d, a, s, _)| *d == 100 && a == "all" && s == "all")
23461 .unwrap();
23462 assert_eq!(day100_all.3.session_count_delta, 2);
23463 assert_eq!(day100_all.3.message_count_delta, 8);
23464 assert_eq!(day100_all.3.total_chars_delta, 800);
23465 }
23466
23467 #[test]
23472 fn lazy_franken_db_not_open_before_get() {
23473 let dir = TempDir::new().unwrap();
23474 let db_path = dir.path().join("lazy_test.db");
23475
23476 let _storage = SqliteStorage::open(&db_path).unwrap();
23478
23479 let lazy = LazyFrankenDb::new(db_path);
23480 assert!(
23481 !lazy.is_open(),
23482 "LazyFrankenDb must not open on construction"
23483 );
23484 }
23485
23486 #[test]
23487 fn lazy_franken_db_opens_on_first_get() {
23488 let dir = TempDir::new().unwrap();
23489 let db_path = dir.path().join("lazy_test.db");
23490
23491 let _storage = SqliteStorage::open(&db_path).unwrap();
23493 drop(_storage);
23494
23495 let lazy = LazyFrankenDb::new(db_path);
23496 assert!(!lazy.is_open());
23497
23498 let conn = lazy.get("test").expect("should open successfully");
23499 let count: i64 = conn
23500 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |r| {
23501 r.get_typed(0)
23502 })
23503 .unwrap();
23504 assert_eq!(count, 0);
23505 drop(conn);
23506
23507 assert!(lazy.is_open(), "LazyFrankenDb must be open after get()");
23508 }
23509
23510 #[test]
23511 fn lazy_franken_db_reuses_connection() {
23512 let dir = TempDir::new().unwrap();
23513 let db_path = dir.path().join("lazy_test.db");
23514 let _storage = SqliteStorage::open(&db_path).unwrap();
23515 drop(_storage);
23516
23517 let lazy = LazyFrankenDb::new(db_path);
23518
23519 {
23521 let conn = lazy.get("first").unwrap();
23522 conn.execute_batch("CREATE TABLE IF NOT EXISTS test_tbl (id INTEGER)")
23523 .unwrap();
23524 }
23525
23526 {
23528 let conn = lazy.get("second").unwrap();
23529 let count: i64 = conn
23530 .query_row_map("SELECT COUNT(*) FROM test_tbl", fparams![], |r| {
23531 r.get_typed(0)
23532 })
23533 .unwrap();
23534 assert_eq!(count, 0);
23535 }
23536 }
23537
23538 #[test]
23539 fn lazy_franken_db_not_found_error() {
23540 let dir = TempDir::new().unwrap();
23541 let db_path = dir.path().join("nonexistent.db");
23542
23543 let lazy = LazyFrankenDb::new(db_path);
23544 let result = lazy.get("test");
23545 assert!(result.is_err());
23546 assert!(
23547 matches!(result.unwrap_err(), LazyDbError::NotFound(_)),
23548 "should return NotFound for missing DB"
23549 );
23550 }
23551
23552 #[test]
23553 fn lazy_franken_db_path_accessor() {
23554 let path = PathBuf::from("/tmp/test_lazy.db");
23555 let lazy = LazyFrankenDb::new(path.clone());
23556 assert_eq!(lazy.path(), path.as_path());
23557 }
23558
23559 #[test]
23564 fn sql_like_match_basic_patterns() {
23565 assert!(sql_like_match("claude-opus-4-20250101", "claude-opus-4%"));
23566 assert!(sql_like_match("claude-opus-4", "claude-opus-4%"));
23567 assert!(!sql_like_match("claude-sonnet-4", "claude-opus-4%"));
23568
23569 assert!(sql_like_match("gemini-2.0-flash-001", "gemini-2%flash%"));
23571 assert!(sql_like_match("gemini-2-flash", "gemini-2%flash%"));
23572 assert!(!sql_like_match("gemini-2-pro", "gemini-2%flash%"));
23573
23574 assert!(sql_like_match("hello", "hello"));
23576 assert!(!sql_like_match("hello!", "hello"));
23577
23578 assert!(sql_like_match("gpt-4o", "gpt-4_"));
23580 assert!(!sql_like_match("gpt-4oo", "gpt-4_"));
23581
23582 assert!(sql_like_match("Claude-Opus-4", "claude-opus-4%"));
23584 }
23585
23586 #[test]
23587 fn date_str_to_day_id_converts_correctly() {
23588 assert_eq!(date_str_to_day_id("2025-10-01").unwrap(), 2100);
23590 assert_eq!(date_str_to_day_id("2024-04-01").unwrap(), 1552);
23592 assert!(date_str_to_day_id("invalid").is_err());
23593 }
23594
23595 #[test]
23596 fn pricing_table_lookup_selects_matching_entry() {
23597 let effective_day = date_str_to_day_id("2025-10-01").unwrap();
23598 let lookup_day = date_str_to_day_id("2026-02-06").unwrap();
23599 let table = PricingTable {
23600 entries: vec![
23601 PricingEntry {
23602 model_pattern: "claude-opus-4%".into(),
23603 provider: "anthropic".into(),
23604 input_cost_per_mtok: 15.0,
23605 output_cost_per_mtok: 75.0,
23606 cache_read_cost_per_mtok: Some(1.5),
23607 cache_creation_cost_per_mtok: Some(18.75),
23608 effective_day_id: effective_day,
23609 },
23610 PricingEntry {
23611 model_pattern: "claude-sonnet-4%".into(),
23612 provider: "anthropic".into(),
23613 input_cost_per_mtok: 3.0,
23614 output_cost_per_mtok: 15.0,
23615 cache_read_cost_per_mtok: Some(0.3),
23616 cache_creation_cost_per_mtok: Some(3.75),
23617 effective_day_id: effective_day,
23618 },
23619 ],
23620 };
23621
23622 let result = table.lookup("claude-opus-4-20260101", lookup_day);
23623 assert!(result.is_some());
23624 assert_eq!(result.unwrap().input_cost_per_mtok, 15.0);
23625
23626 let result = table.lookup("claude-sonnet-4-latest", lookup_day);
23627 assert!(result.is_some());
23628 assert_eq!(result.unwrap().input_cost_per_mtok, 3.0);
23629
23630 assert!(table.lookup("unknown-model", lookup_day).is_none());
23631 }
23632
23633 #[test]
23634 fn pricing_table_lookup_respects_effective_date() {
23635 let effective_day_1 = date_str_to_day_id("2025-10-01").unwrap();
23636 let effective_day_2 = date_str_to_day_id("2026-01-01").unwrap();
23637 let table = PricingTable {
23638 entries: vec![
23639 PricingEntry {
23640 model_pattern: "claude-opus-4%".into(),
23641 provider: "anthropic".into(),
23642 input_cost_per_mtok: 15.0,
23643 output_cost_per_mtok: 75.0,
23644 cache_read_cost_per_mtok: None,
23645 cache_creation_cost_per_mtok: None,
23646 effective_day_id: effective_day_1,
23647 },
23648 PricingEntry {
23649 model_pattern: "claude-opus-4%".into(),
23650 provider: "anthropic".into(),
23651 input_cost_per_mtok: 12.0,
23652 output_cost_per_mtok: 60.0,
23653 cache_read_cost_per_mtok: None,
23654 cache_creation_cost_per_mtok: None,
23655 effective_day_id: effective_day_2,
23656 },
23657 ],
23658 };
23659
23660 let result = table.lookup("claude-opus-4", date_str_to_day_id("2025-11-01").unwrap());
23662 assert!(result.is_some());
23663 assert_eq!(result.unwrap().input_cost_per_mtok, 15.0);
23664
23665 let result = table.lookup("claude-opus-4", date_str_to_day_id("2026-02-01").unwrap());
23667 assert!(result.is_some());
23668 assert_eq!(result.unwrap().input_cost_per_mtok, 12.0);
23669
23670 assert!(
23672 table
23673 .lookup("claude-opus-4", date_str_to_day_id("2024-01-01").unwrap())
23674 .is_none()
23675 );
23676 }
23677
23678 #[test]
23679 fn pricing_table_lookup_specificity_tiebreak() {
23680 let effective_day = date_str_to_day_id("2025-01-01").unwrap();
23681 let lookup_day = date_str_to_day_id("2026-01-01").unwrap();
23682 let table = PricingTable {
23683 entries: vec![
23684 PricingEntry {
23685 model_pattern: "gpt-4%".into(),
23686 provider: "openai".into(),
23687 input_cost_per_mtok: 10.0,
23688 output_cost_per_mtok: 30.0,
23689 cache_read_cost_per_mtok: None,
23690 cache_creation_cost_per_mtok: None,
23691 effective_day_id: effective_day,
23692 },
23693 PricingEntry {
23694 model_pattern: "gpt-4-turbo%".into(),
23695 provider: "openai".into(),
23696 input_cost_per_mtok: 5.0,
23697 output_cost_per_mtok: 15.0,
23698 cache_read_cost_per_mtok: None,
23699 cache_creation_cost_per_mtok: None,
23700 effective_day_id: effective_day,
23701 },
23702 ],
23703 };
23704
23705 let result = table.lookup("gpt-4-turbo-2025", lookup_day);
23707 assert!(result.is_some());
23708 assert_eq!(result.unwrap().input_cost_per_mtok, 5.0);
23709
23710 let result = table.lookup("gpt-4o", lookup_day);
23712 assert!(result.is_some());
23713 assert_eq!(result.unwrap().input_cost_per_mtok, 10.0);
23714 }
23715
23716 #[test]
23717 fn pricing_table_compute_cost_basic() {
23718 let effective_day = date_str_to_day_id("2025-10-01").unwrap();
23719 let table = PricingTable {
23720 entries: vec![PricingEntry {
23721 model_pattern: "claude-opus-4%".into(),
23722 provider: "anthropic".into(),
23723 input_cost_per_mtok: 15.0,
23724 output_cost_per_mtok: 75.0,
23725 cache_read_cost_per_mtok: Some(1.5),
23726 cache_creation_cost_per_mtok: Some(18.75),
23727 effective_day_id: effective_day,
23728 }],
23729 };
23730
23731 let cost = table.compute_cost(
23732 Some("claude-opus-4-latest"),
23733 date_str_to_day_id("2026-02-06").unwrap(),
23734 Some(1000),
23735 Some(500),
23736 None,
23737 None,
23738 );
23739 assert!(cost.is_some());
23740 assert!((cost.unwrap() - 0.0525).abs() < 1e-10);
23742 }
23743
23744 #[test]
23745 fn pricing_table_compute_cost_with_cache() {
23746 let effective_day = date_str_to_day_id("2025-10-01").unwrap();
23747 let table = PricingTable {
23748 entries: vec![PricingEntry {
23749 model_pattern: "claude-opus-4%".into(),
23750 provider: "anthropic".into(),
23751 input_cost_per_mtok: 15.0,
23752 output_cost_per_mtok: 75.0,
23753 cache_read_cost_per_mtok: Some(1.5),
23754 cache_creation_cost_per_mtok: Some(18.75),
23755 effective_day_id: effective_day,
23756 }],
23757 };
23758
23759 let cost = table.compute_cost(
23760 Some("claude-opus-4-latest"),
23761 date_str_to_day_id("2026-02-06").unwrap(),
23762 Some(1_000_000),
23763 Some(100_000),
23764 Some(500_000),
23765 Some(200_000),
23766 );
23767 assert!(cost.is_some());
23768 assert!((cost.unwrap() - 16.5).abs() < 1e-10);
23774 }
23775
23776 #[test]
23777 fn pricing_table_compute_cost_returns_none_for_unknown_model() {
23778 let effective_day = date_str_to_day_id("2025-10-01").unwrap();
23779 let lookup_day = date_str_to_day_id("2026-02-06").unwrap();
23780 let table = PricingTable {
23781 entries: vec![PricingEntry {
23782 model_pattern: "claude-opus-4%".into(),
23783 provider: "anthropic".into(),
23784 input_cost_per_mtok: 15.0,
23785 output_cost_per_mtok: 75.0,
23786 cache_read_cost_per_mtok: None,
23787 cache_creation_cost_per_mtok: None,
23788 effective_day_id: effective_day,
23789 }],
23790 };
23791
23792 assert!(
23793 table
23794 .compute_cost(
23795 Some("unknown-model"),
23796 lookup_day,
23797 Some(1000),
23798 Some(500),
23799 None,
23800 None
23801 )
23802 .is_none()
23803 );
23804 assert!(
23805 table
23806 .compute_cost(None, lookup_day, Some(1000), Some(500), None, None)
23807 .is_none()
23808 );
23809 assert!(
23810 table
23811 .compute_cost(Some("claude-opus-4"), lookup_day, None, None, None, None)
23812 .is_none()
23813 );
23814 }
23815
23816 #[test]
23817 fn pricing_table_load_from_db() {
23818 let dir = TempDir::new().unwrap();
23819 let db_path = dir.path().join("test.db");
23820 let storage = SqliteStorage::open(&db_path).unwrap();
23821
23822 let table = PricingTable::load(&storage.conn).unwrap();
23823 assert!(!table.is_empty());
23824
23825 let lookup_day = date_str_to_day_id("2026-02-06").unwrap();
23826
23827 let opus = table.lookup("claude-opus-4-latest", lookup_day);
23828 assert!(opus.is_some());
23829 assert_eq!(opus.unwrap().input_cost_per_mtok, 15.0);
23830
23831 let flash = table.lookup("gemini-2.0-flash-001", lookup_day);
23832 assert!(flash.is_some());
23833 assert_eq!(flash.unwrap().input_cost_per_mtok, 0.075);
23834 }
23835
23836 #[test]
23837 fn pricing_table_load_rejects_invalid_effective_date() {
23838 let dir = TempDir::new().unwrap();
23839 let db_path = dir.path().join("test.db");
23840 let storage = SqliteStorage::open(&db_path).unwrap();
23841
23842 storage
23843 .conn
23844 .execute_compat(
23845 "INSERT INTO model_pricing (
23846 model_pattern, provider, input_cost_per_mtok, output_cost_per_mtok,
23847 cache_read_cost_per_mtok, cache_creation_cost_per_mtok, effective_date
23848 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
23849 fparams![
23850 "broken-model%",
23851 "test",
23852 1.0_f64,
23853 2.0_f64,
23854 Option::<f64>::None,
23855 Option::<f64>::None,
23856 "not-a-date"
23857 ],
23858 )
23859 .unwrap();
23860
23861 let err = PricingTable::load(&storage.conn).unwrap_err();
23862 assert!(err.to_string().contains("invalid effective_date"));
23863 }
23864
23865 #[test]
23866 fn pricing_diagnostics_tracks_coverage() {
23867 let mut diag = PricingDiagnostics::default();
23868 diag.record_priced();
23869 diag.record_priced();
23870 diag.record_unpriced(Some("custom-model-v1"));
23871 diag.record_unpriced(Some("custom-model-v1"));
23872 diag.record_unpriced(None);
23873
23874 assert_eq!(diag.priced_count, 2);
23875 assert_eq!(diag.unpriced_count, 3);
23876 assert_eq!(diag.unknown_models.len(), 2);
23877 assert_eq!(diag.unknown_models["custom-model-v1"], 2);
23878 assert_eq!(diag.unknown_models["(none)"], 1);
23879 }
23880
23881 fn franken_storage_in_memory() -> FrankenStorage {
23891 let conn = FrankenConnection::open(":memory:").unwrap();
23892 let storage = FrankenStorage::new(conn, PathBuf::from(":memory:"));
23893 storage.run_migrations().unwrap();
23894 storage.apply_config().unwrap();
23895 storage
23896 }
23897
23898 #[test]
23899 fn franken_migrations_create_all_tables() {
23900 let storage = franken_storage_in_memory();
23901
23902 let version = storage.schema_version().unwrap();
23904 assert_eq!(
23905 version, CURRENT_SCHEMA_VERSION,
23906 "fresh FrankenStorage should be at current schema version"
23907 );
23908
23909 let rows = storage
23911 .raw()
23912 .query("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;")
23913 .unwrap();
23914 let table_names: Vec<String> = rows
23915 .iter()
23916 .filter_map(|r| r.get_typed::<String>(0).ok())
23917 .collect();
23918
23919 for required in [
23920 "meta",
23921 "agents",
23922 "workspaces",
23923 "conversations",
23924 "messages",
23925 "snippets",
23926 "tags",
23927 "conversation_tags",
23928 ] {
23929 assert!(
23930 table_names.contains(&required.to_string()),
23931 "missing table: {required}"
23932 );
23933 }
23934
23935 assert!(
23937 table_names.contains(&"sources".to_string()),
23938 "missing sources table"
23939 );
23940
23941 assert!(
23943 table_names.contains(&"daily_stats".to_string()),
23944 "missing daily_stats table"
23945 );
23946
23947 assert!(
23949 table_names.contains(&"embedding_jobs".to_string()),
23950 "missing embedding_jobs table"
23951 );
23952
23953 for analytics_table in ["message_metrics", "usage_hourly", "usage_daily"] {
23955 assert!(
23956 table_names.contains(&analytics_table.to_string()),
23957 "missing table: {analytics_table}"
23958 );
23959 }
23960 assert!(
23961 table_names.contains(&"conversation_tail_state".to_string()),
23962 "missing conversation_tail_state table"
23963 );
23964 assert!(
23965 table_names.contains(&"conversation_external_lookup".to_string()),
23966 "missing conversation_external_lookup table"
23967 );
23968 assert!(
23969 table_names.contains(&"conversation_external_tail_lookup".to_string()),
23970 "missing conversation_external_tail_lookup table"
23971 );
23972
23973 let rows = storage
23976 .raw()
23977 .query("SELECT COUNT(*) FROM _schema_migrations;")
23978 .unwrap();
23979 let count: i64 = rows.first().unwrap().get_typed(0).unwrap();
23980 assert_eq!(
23981 count,
23982 (13..=CURRENT_SCHEMA_VERSION).count() as i64,
23983 "_schema_migrations should record the V13 base schema and post-V13 migrations"
23984 );
23985
23986 let rows = storage
23988 .raw()
23989 .query("SELECT version FROM _schema_migrations ORDER BY version;")
23990 .unwrap();
23991 let versions: Vec<i64> = rows
23992 .iter()
23993 .map(|row| row.get_typed(0))
23994 .collect::<std::result::Result<_, _>>()
23995 .unwrap();
23996 assert_eq!(
23997 versions,
23998 (13..=CURRENT_SCHEMA_VERSION).collect::<Vec<i64>>(),
23999 "_schema_migrations should contain v13 through current"
24000 );
24001 }
24002
24003 #[test]
24004 fn franken_migrations_idempotent() {
24005 let storage = franken_storage_in_memory();
24006 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24007
24008 storage.run_migrations().unwrap();
24010 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24011 }
24012
24013 #[test]
24014 fn migration_v20_backfills_conversation_external_tail_lookup() {
24015 let storage = franken_storage_in_memory();
24016 let agent_id = storage
24017 .ensure_agent(&Agent {
24018 id: None,
24019 slug: "codex".into(),
24020 name: "Codex".into(),
24021 version: None,
24022 kind: AgentKind::Cli,
24023 })
24024 .unwrap();
24025 let workspace_id = storage
24026 .ensure_workspace(&PathBuf::from("/ws/profiled-storage-remote"), None)
24027 .unwrap();
24028 let mut conv = make_profiled_storage_remote_conversation(1919, 2);
24029 conv.source_id = "profiled-storage-remote-source-東京".into();
24030 conv.external_id = Some("profiled-storage-remote-☃-1919".into());
24031 let outcome = storage
24032 .insert_conversation_tree(agent_id, Some(workspace_id), &conv)
24033 .unwrap();
24034 let external_id = conv.external_id.as_deref().unwrap();
24035 let lookup_key = conversation_external_lookup_key(&conv.source_id, agent_id, external_id);
24036
24037 storage
24038 .raw()
24039 .execute("DELETE FROM conversation_external_tail_lookup")
24040 .unwrap();
24041 storage
24042 .raw()
24043 .execute("DELETE FROM _schema_migrations WHERE version = 20")
24044 .unwrap();
24045 storage
24046 .raw()
24047 .execute_compat(
24048 "UPDATE meta SET value = ?1 WHERE key = 'schema_version'",
24049 fparams!["19"],
24050 )
24051 .unwrap();
24052
24053 storage.run_migrations().unwrap();
24054
24055 let backfilled: (i64, Option<i64>, Option<i64>, Option<i64>) = storage
24056 .raw()
24057 .query_row_map(
24058 "SELECT conversation_id, ended_at, last_message_idx, last_message_created_at
24059 FROM conversation_external_tail_lookup
24060 WHERE lookup_key = ?1",
24061 fparams![lookup_key.as_str()],
24062 |row| {
24063 Ok((
24064 row.get_typed(0)?,
24065 row.get_typed(1)?,
24066 row.get_typed(2)?,
24067 row.get_typed(3)?,
24068 ))
24069 },
24070 )
24071 .unwrap();
24072 assert_eq!(
24073 backfilled,
24074 (
24075 outcome.conversation_id,
24076 conv.ended_at,
24077 Some(1),
24078 conv.messages[1].created_at
24079 )
24080 );
24081 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24082 }
24083
24084 #[test]
24085 fn migration_v15_creates_lazy_tail_state_cache() {
24086 let conn = FrankenConnection::open(":memory:").unwrap();
24087 conn.execute_batch(
24088 "CREATE TABLE conversations (
24089 id INTEGER PRIMARY KEY,
24090 ended_at INTEGER
24091 );
24092 CREATE TABLE messages (
24093 id INTEGER PRIMARY KEY,
24094 conversation_id INTEGER NOT NULL,
24095 idx INTEGER NOT NULL,
24096 created_at INTEGER
24097 );
24098 INSERT INTO conversations(id, ended_at) VALUES
24099 (1, 1710000000300),
24100 (2, NULL);
24101 INSERT INTO messages(id, conversation_id, idx, created_at) VALUES
24102 (10, 1, 0, 1710000000100),
24103 (11, 1, 1, 1710000000200),
24104 (12, 2, 0, 1710000000400);",
24105 )
24106 .unwrap();
24107
24108 conn.execute(
24109 "CREATE TABLE _schema_migrations (
24110 version INTEGER PRIMARY KEY,
24111 name TEXT NOT NULL,
24112 applied_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now'))
24113 );",
24114 )
24115 .unwrap();
24116
24117 assert!(
24118 apply_conversation_tail_state_cache_migration(&conn).unwrap(),
24119 "v15 migration should apply once"
24120 );
24121 assert!(
24122 !apply_conversation_tail_state_cache_migration(&conn).unwrap(),
24123 "v15 migration should be idempotent once recorded"
24124 );
24125
24126 let columns = conn.query("PRAGMA table_info(conversations);").unwrap();
24127 let column_names: HashSet<String> = columns
24128 .iter()
24129 .map(|row| row.get_typed(1))
24130 .collect::<std::result::Result<_, frankensqlite::FrankenError>>()
24131 .unwrap();
24132 assert!(column_names.contains("last_message_idx"));
24133 assert!(column_names.contains("last_message_created_at"));
24134
24135 let tail_rows: i64 = conn
24136 .query("SELECT COUNT(*) FROM conversation_tail_state;")
24137 .unwrap()
24138 .first()
24139 .unwrap()
24140 .get_typed(0)
24141 .unwrap();
24142 assert_eq!(
24143 tail_rows, 0,
24144 "v15 should create the cache without an open-time message scan"
24145 );
24146
24147 let applied: i64 = conn
24148 .query("SELECT COUNT(*) FROM _schema_migrations WHERE version = 15;")
24149 .unwrap()
24150 .first()
24151 .unwrap()
24152 .get_typed(0)
24153 .unwrap();
24154 assert_eq!(applied, 1);
24155 }
24156
24157 #[test]
24158 fn schema_repair_adds_missing_conversations_token_columns() {
24159 let conn = FrankenConnection::open(":memory:").unwrap();
24160 conn.execute_batch(
24161 "CREATE TABLE conversations (
24162 id INTEGER PRIMARY KEY,
24163 agent_id INTEGER NOT NULL,
24164 source_path TEXT NOT NULL
24165 );",
24166 )
24167 .unwrap();
24168 let storage = FrankenStorage::new(conn, std::path::PathBuf::from(":memory:"));
24169
24170 storage.repair_missing_conversation_token_columns().unwrap();
24171 storage.repair_missing_conversation_token_columns().unwrap();
24172
24173 let columns = franken_table_column_names(&storage.conn, "conversations").unwrap();
24174 for &(column_name, _) in REQUIRED_CONVERSATION_TOKEN_COLUMNS {
24175 assert!(
24176 columns.contains(column_name),
24177 "schema repair should add conversations.{column_name}"
24178 );
24179 }
24180 }
24181
24182 #[test]
24183 fn franken_meta_schema_version_in_sync() {
24184 let storage = franken_storage_in_memory();
24185
24186 let rows = storage
24188 .raw()
24189 .query("SELECT value FROM meta WHERE key = 'schema_version';")
24190 .unwrap();
24191 let meta_version: String = rows.first().unwrap().get_typed(0).unwrap();
24192 assert_eq!(
24193 meta_version,
24194 CURRENT_SCHEMA_VERSION.to_string(),
24195 "meta.schema_version should match CURRENT_SCHEMA_VERSION"
24196 );
24197 }
24198
24199 #[test]
24200 fn franken_transition_from_meta_version() {
24201 let dir = TempDir::new().unwrap();
24202 let db_path = dir.path().join("test_transition.db");
24203
24204 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24207 conn.execute("CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);")
24208 .unwrap();
24209 conn.execute("INSERT INTO meta(key, value) VALUES('schema_version', '10');")
24210 .unwrap();
24211 conn.execute("CREATE TABLE conversations (id INTEGER PRIMARY KEY);")
24213 .unwrap();
24214 drop(conn);
24215
24216 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24218 transition_from_meta_version(&conn).unwrap();
24219
24220 let rows = conn
24222 .query("SELECT version FROM _schema_migrations ORDER BY version;")
24223 .unwrap();
24224 let versions: Vec<i64> = rows.iter().filter_map(|r| r.get_typed(0).ok()).collect();
24225 assert_eq!(
24226 versions,
24227 (1..=10).collect::<Vec<i64>>(),
24228 "transition should backfill versions 1..=10"
24229 );
24230 }
24231
24232 #[test]
24233 fn franken_transition_from_current_meta_backfills_current_schema_marker() {
24234 let dir = TempDir::new().unwrap();
24235 let db_path = dir.path().join("test_current_transition.db");
24236
24237 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24238 conn.execute("CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);")
24239 .unwrap();
24240 conn.execute_compat(
24241 "INSERT INTO meta(key, value) VALUES('schema_version', ?1);",
24242 &[ParamValue::from(CURRENT_SCHEMA_VERSION.to_string())],
24243 )
24244 .unwrap();
24245 conn.execute("CREATE TABLE conversations (id INTEGER PRIMARY KEY);")
24246 .unwrap();
24247 drop(conn);
24248
24249 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24250 transition_from_meta_version(&conn).unwrap();
24251
24252 let rows = conn
24253 .query("SELECT version FROM _schema_migrations ORDER BY version;")
24254 .unwrap();
24255 let versions: Vec<i64> = rows.iter().filter_map(|r| r.get_typed(0).ok()).collect();
24256 assert_eq!(
24257 versions,
24258 (1..=CURRENT_SCHEMA_VERSION).collect::<Vec<i64>>(),
24259 "current meta schema marker should backfill every known migration"
24260 );
24261 }
24262
24263 #[test]
24264 fn franken_transition_skips_when_already_done() {
24265 let dir = TempDir::new().unwrap();
24266 let db_path = dir.path().join("test_transition_skip.db");
24267
24268 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24270 conn.execute(
24271 "CREATE TABLE _schema_migrations (version INTEGER PRIMARY KEY, name TEXT NOT NULL, applied_at TEXT NOT NULL DEFAULT 'now');",
24272 ).unwrap();
24273 conn.execute("INSERT INTO _schema_migrations (version, name) VALUES (1, 'test');")
24274 .unwrap();
24275
24276 transition_from_meta_version(&conn).unwrap();
24278
24279 let rows = conn
24281 .query("SELECT COUNT(*) FROM _schema_migrations;")
24282 .unwrap();
24283 let count: i64 = rows.first().unwrap().get_typed(0).unwrap();
24284 assert_eq!(
24285 count, 1,
24286 "transition should not re-run on already-transitioned DB"
24287 );
24288 }
24289
24290 #[test]
24291 fn franken_transition_fresh_db_is_noop() {
24292 let dir = TempDir::new().unwrap();
24293 let db_path = dir.path().join("test_fresh_noop.db");
24294
24295 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24297 transition_from_meta_version(&conn).unwrap();
24298
24299 let res = conn.query("SELECT * FROM \"_schema_migrations\";");
24301 assert!(
24302 res.is_err(),
24303 "transition should not create _schema_migrations on fresh DB"
24304 );
24305 }
24306
24307 #[test]
24308 fn franken_transition_with_fts_virtual_table_succeeds() {
24309 let dir = TempDir::new().unwrap();
24310 let db_path = dir.path().join("test_transition_with_fts.db");
24311
24312 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
24313 conn.execute_batch(
24314 "CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);
24315 INSERT INTO meta(key, value) VALUES('schema_version', '13');
24316 CREATE TABLE conversations (id INTEGER PRIMARY KEY);
24317 CREATE VIRTUAL TABLE fts_messages USING fts5(
24318 content,
24319 title,
24320 agent,
24321 workspace,
24322 source_path,
24323 created_at,
24324 content='',
24325 tokenize='porter unicode61'
24326 );",
24327 )
24328 .unwrap();
24329 drop(conn);
24330
24331 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24332 transition_from_meta_version(&conn).unwrap();
24333
24334 let rows = conn
24335 .query("SELECT version FROM _schema_migrations ORDER BY version;")
24336 .unwrap();
24337 let versions: Vec<i64> = rows.iter().filter_map(|r| r.get_typed(0).ok()).collect();
24338 assert_eq!(versions, (1..=13).collect::<Vec<i64>>());
24339 }
24340
24341 #[test]
24342 fn franken_storage_open_legacy_v13_with_fts_virtual_table_succeeds() {
24343 let dir = TempDir::new().unwrap();
24344 let db_path = dir.path().join("test_open_legacy_v13_with_fts.db");
24345
24346 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
24347 conn.execute_batch(
24348 "CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);
24349 INSERT INTO meta(key, value) VALUES('schema_version', '13');
24350 CREATE TABLE agents (
24351 id INTEGER PRIMARY KEY,
24352 slug TEXT NOT NULL
24353 );
24354 CREATE TABLE workspaces (
24355 id INTEGER PRIMARY KEY,
24356 path TEXT NOT NULL
24357 );
24358 CREATE TABLE sources (
24359 id TEXT PRIMARY KEY,
24360 kind TEXT NOT NULL,
24361 host_label TEXT,
24362 machine_id TEXT,
24363 platform TEXT,
24364 config_json TEXT,
24365 created_at INTEGER NOT NULL,
24366 updated_at INTEGER NOT NULL
24367 );
24368 CREATE TABLE conversations (
24369 id INTEGER PRIMARY KEY,
24370 agent_id INTEGER NOT NULL,
24371 workspace_id INTEGER,
24372 source_id TEXT NOT NULL DEFAULT 'local',
24373 external_id TEXT,
24374 title TEXT,
24375 source_path TEXT NOT NULL,
24376 started_at INTEGER,
24377 ended_at INTEGER
24378 );
24379 CREATE TABLE messages (
24380 id INTEGER PRIMARY KEY,
24381 conversation_id INTEGER NOT NULL,
24382 idx INTEGER NOT NULL,
24383 role TEXT NOT NULL,
24384 author TEXT,
24385 created_at INTEGER,
24386 content TEXT NOT NULL,
24387 extra_json TEXT,
24388 extra_bin BLOB
24389 );
24390 INSERT INTO agents(id, slug) VALUES (1, 'codex');
24391 INSERT INTO workspaces(id, path) VALUES (1, '/data/projects/coding_agent_session_search');
24392 INSERT INTO sources(id, kind, host_label, created_at, updated_at)
24393 VALUES ('local', 'local', NULL, 1710000000000, 1710000000000);
24394 INSERT INTO conversations(
24395 id,
24396 agent_id,
24397 workspace_id,
24398 source_id,
24399 external_id,
24400 title,
24401 source_path,
24402 started_at
24403 )
24404 VALUES (
24405 1,
24406 1,
24407 1,
24408 'local',
24409 'legacy-session',
24410 'legacy session',
24411 '/tmp/legacy.jsonl',
24412 1710000000000
24413 );
24414 INSERT INTO messages(id, conversation_id, idx, role, author, created_at, content)
24415 VALUES (1, 1, 0, 'user', 'tester', 1710000000000, 'legacy content');
24416 CREATE VIRTUAL TABLE fts_messages USING fts5(
24417 content,
24418 title,
24419 agent,
24420 workspace,
24421 source_path,
24422 created_at,
24423 message_id,
24424 content='',
24425 tokenize='porter unicode61'
24426 );",
24427 )
24428 .unwrap();
24429 drop(conn);
24430
24431 let storage = FrankenStorage::open(&db_path).unwrap();
24432 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24433
24434 let rows = storage
24435 .raw()
24436 .query("SELECT version FROM _schema_migrations ORDER BY version;")
24437 .unwrap();
24438 let versions: Vec<i64> = rows.iter().filter_map(|r| r.get_typed(0).ok()).collect();
24439 assert_eq!(versions, (1..=CURRENT_SCHEMA_VERSION).collect::<Vec<i64>>());
24440 }
24441
24442 #[test]
24443 fn franken_storage_open_repairs_duplicate_fts_messages_schema_rows() {
24444 let dir = TempDir::new().unwrap();
24445 let db_path = dir.path().join("test_open_repairs_duplicate_fts_schema.db");
24446
24447 let storage = FrankenStorage::open(&db_path).unwrap();
24448 let agent = Agent {
24449 id: None,
24450 slug: "codex".into(),
24451 name: "Codex".into(),
24452 version: None,
24453 kind: AgentKind::Cli,
24454 };
24455 let agent_id = storage.ensure_agent(&agent).unwrap();
24456 let conversation = Conversation {
24457 id: None,
24458 agent_slug: "codex".into(),
24459 workspace: Some(PathBuf::from("/tmp/workspace")),
24460 external_id: Some("dup-fts-schema".into()),
24461 title: Some("Duplicate FTS schema".into()),
24462 source_path: PathBuf::from("/tmp/dup-fts-schema.jsonl"),
24463 started_at: Some(1_700_000_000_000),
24464 ended_at: Some(1_700_000_000_100),
24465 approx_tokens: Some(42),
24466 metadata_json: serde_json::Value::Null,
24467 messages: vec![Message {
24468 id: None,
24469 idx: 0,
24470 role: MessageRole::User,
24471 author: Some("user".into()),
24472 created_at: Some(1_700_000_000_050),
24473 content: "message that should remain queryable".into(),
24474 extra_json: serde_json::Value::Null,
24475 snippets: Vec::new(),
24476 }],
24477 source_id: LOCAL_SOURCE_ID.into(),
24478 origin_host: None,
24479 };
24480 storage
24481 .insert_conversation_tree(agent_id, None, &conversation)
24482 .unwrap();
24483 drop(storage);
24484 materialize_fresh_fts_schema_via_rusqlite(&db_path).unwrap();
24485
24486 let duplicate_legacy_fts_sql = "CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, message_id UNINDEXED, tokenize='porter')";
24487 let conn = rusqlite_test_fixture_conn(&db_path);
24488 conn.execute_batch("PRAGMA writable_schema = ON;").unwrap();
24489 conn.execute(
24490 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
24491 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
24492 [duplicate_legacy_fts_sql],
24493 )
24494 .unwrap();
24495 conn.execute(
24496 "DELETE FROM meta WHERE key = ?1",
24497 [FTS_FRANKEN_REBUILD_META_KEY],
24498 )
24499 .unwrap();
24500 conn.execute_batch("PRAGMA writable_schema = OFF;").unwrap();
24503
24504 let duplicate_rows: i64 = conn
24505 .query_row(
24506 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
24507 [],
24508 |row| row.get(0),
24509 )
24510 .unwrap();
24511 assert_eq!(duplicate_rows, 2);
24512 drop(conn);
24513
24514 let reopened = FrankenStorage::open(&db_path).unwrap();
24515 assert_eq!(reopened.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24516 let generation_rows: Vec<String> = reopened
24517 .raw()
24518 .query_map_collect(
24519 "SELECT value FROM meta WHERE key = ?1",
24520 fparams![FTS_FRANKEN_REBUILD_META_KEY],
24521 |row| row.get_typed(0),
24522 )
24523 .unwrap();
24524 assert_eq!(
24525 generation_rows.len(),
24526 0,
24527 "canonical open should not eagerly rewrite FTS repair metadata"
24528 );
24529 reopened.ensure_search_fallback_fts_consistency().unwrap();
24530 let repaired = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
24531 assert_eq!(franken_fts_schema_rows(&repaired).unwrap(), 1);
24532
24533 let total_messages: i64 = reopened
24534 .raw()
24535 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
24536 row.get_typed(0)
24537 })
24538 .unwrap();
24539 let total_fts_rows: i64 = reopened
24540 .raw()
24541 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
24542 row.get_typed(0)
24543 })
24544 .unwrap();
24545 assert_eq!(total_fts_rows, total_messages);
24546 }
24547
24548 #[test]
24549 fn franken_storage_open_fresh_db_keeps_single_franken_fts_schema_row() {
24550 let dir = TempDir::new().unwrap();
24551 let db_path = dir.path().join("fresh-franken-storage-open.db");
24552
24553 let storage = FrankenStorage::open(&db_path).unwrap();
24554 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24555
24556 storage
24563 .ensure_search_fallback_fts_consistency()
24564 .expect("ensure FTS consistency after fresh open");
24565 drop(storage);
24566
24567 let c_reader = FrankenConnection::open(db_path.to_string_lossy().into_owned())
24568 .expect("open DB via frankensqlite for sqlite_master inspection");
24569 assert_eq!(
24570 franken_fts_schema_rows(&c_reader).unwrap(),
24571 1,
24572 "exactly one fts_messages schema row should exist after ensure_search_fallback_fts_consistency"
24573 );
24574 drop(c_reader);
24575
24576 let storage = FrankenStorage::open(&db_path).unwrap();
24577 assert!(
24578 storage
24579 .raw()
24580 .query("SELECT rowid FROM fts_messages LIMIT 1")
24581 .is_ok(),
24582 "fts_messages must be queryable through frankensqlite after open"
24583 );
24584 }
24585
24586 #[test]
24587 fn franken_storage_open_repairs_missing_analytics_tables_when_version_markers_lie() {
24588 let dir = TempDir::new().unwrap();
24589 let db_path = dir.path().join("test_repair_missing_analytics.db");
24590
24591 {
24592 let storage = FrankenStorage::open(&db_path).unwrap();
24593 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24594 }
24595
24596 {
24597 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
24598 for table in &[
24599 "usage_models_daily",
24600 "usage_daily",
24601 "usage_hourly",
24602 "message_metrics",
24603 "token_daily_stats",
24604 "token_usage",
24605 "model_pricing",
24606 "embedding_jobs",
24607 "daily_stats",
24608 ] {
24609 conn.execute(&format!("DROP TABLE IF EXISTS {table}"))
24610 .unwrap();
24611 }
24612 conn.execute_compat(
24613 "UPDATE meta SET value = ?1 WHERE key = 'schema_version'",
24614 &[ParamValue::from(CURRENT_SCHEMA_VERSION.to_string())],
24615 )
24616 .unwrap();
24617 }
24618
24619 let repaired = FrankenStorage::open(&db_path).unwrap();
24620 assert_eq!(repaired.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24621
24622 let analytics_count: i64 = repaired
24623 .raw()
24624 .query_row_map(
24625 "SELECT COUNT(*) FROM sqlite_master
24626 WHERE type='table'
24627 AND name IN (
24628 'daily_stats',
24629 'embedding_jobs',
24630 'token_usage',
24631 'token_daily_stats',
24632 'model_pricing',
24633 'message_metrics',
24634 'usage_hourly',
24635 'usage_daily',
24636 'usage_models_daily'
24637 )",
24638 &[],
24639 |row| row.get_typed(0),
24640 )
24641 .unwrap();
24642 assert_eq!(
24643 analytics_count, 9,
24644 "open() should recreate the missing analytics tables even when schema_version already says current"
24645 );
24646 }
24647
24648 #[test]
24649 fn current_schema_repair_batches_cover_every_required_probe() {
24650 let missing_tables: Vec<&'static str> = REQUIRED_CURRENT_SCHEMA_TABLE_PROBES
24651 .iter()
24652 .map(|(table_name, _)| *table_name)
24653 .collect();
24654
24655 let batches = current_schema_repair_batches_for_missing_tables(&missing_tables).unwrap();
24656 let covered_tables: HashSet<&'static str> = batches
24657 .iter()
24658 .flat_map(|batch| batch.tables.iter().copied())
24659 .collect();
24660
24661 for table_name in missing_tables {
24662 assert!(
24663 covered_tables.contains(table_name),
24664 "missing repair coverage for {table_name}"
24665 );
24666 }
24667 }
24668
24669 #[test]
24670 fn current_schema_repair_batches_do_not_replay_core_schema_bootstrap() {
24671 for batch in CURRENT_SCHEMA_REPAIR_BATCHES {
24672 assert!(
24673 !batch.sql.contains("CREATE TABLE IF NOT EXISTS meta"),
24674 "repair batch {} should not recreate meta",
24675 batch.name
24676 );
24677 assert!(
24678 !batch.sql.contains("CREATE TABLE IF NOT EXISTS agents"),
24679 "repair batch {} should not recreate agents",
24680 batch.name
24681 );
24682 assert!(
24683 !batch.sql.contains("CREATE TABLE IF NOT EXISTS workspaces"),
24684 "repair batch {} should not recreate workspaces",
24685 batch.name
24686 );
24687 assert!(
24688 !batch
24689 .sql
24690 .contains("CREATE TABLE IF NOT EXISTS conversations"),
24691 "repair batch {} should not recreate conversations",
24692 batch.name
24693 );
24694 assert!(
24695 !batch.sql.contains("CREATE TABLE IF NOT EXISTS messages"),
24696 "repair batch {} should not recreate messages",
24697 batch.name
24698 );
24699 assert!(
24700 !batch.sql.contains("CREATE TABLE IF NOT EXISTS snippets"),
24701 "repair batch {} should not recreate snippets",
24702 batch.name
24703 );
24704 assert!(
24705 !batch.sql.contains("CREATE VIRTUAL TABLE fts_messages"),
24706 "repair batch {} should not recreate FTS tables",
24707 batch.name
24708 );
24709 assert!(
24710 !batch.sql.contains("DROP TABLE"),
24711 "repair batch {} should never drop tables",
24712 batch.name
24713 );
24714 }
24715 }
24716
24717 #[test]
24718 fn build_cass_migrations_applies_combined_v13() {
24719 let conn = FrankenConnection::open(":memory:").unwrap();
24720 let base_result = build_cass_migrations_before_tail_cache()
24721 .run(&conn)
24722 .unwrap();
24723 assert!(apply_conversation_tail_state_cache_migration(&conn).unwrap());
24724 let post_result = build_cass_migrations_after_tail_cache().run(&conn).unwrap();
24725
24726 assert!(base_result.was_fresh);
24727 let mut applied = base_result.applied;
24728 applied.push(15);
24729 applied.extend(post_result.applied);
24730 assert_eq!(
24731 applied,
24732 (13..=CURRENT_SCHEMA_VERSION).collect::<Vec<i64>>(),
24733 "should apply combined V13 plus additive post-V13 migrations"
24734 );
24735 let current: i64 = conn
24736 .query("SELECT MAX(version) FROM _schema_migrations;")
24737 .unwrap()
24738 .first()
24739 .unwrap()
24740 .get_typed(0)
24741 .unwrap();
24742 assert_eq!(current, CURRENT_SCHEMA_VERSION);
24743 }
24744
24745 #[test]
24746 fn franken_insert_conversations_batched_populates_analytics_rollups() {
24747 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
24748 use frankensqlite::compat::{ConnectionExt, RowExt};
24749 use std::path::PathBuf;
24750
24751 let dir = TempDir::new().unwrap();
24752 let db_path = dir.path().join("franken-index.db");
24753 let storage = FrankenStorage::open(&db_path).unwrap();
24754
24755 let agent = Agent {
24756 id: None,
24757 slug: "claude_code".into(),
24758 name: "Claude Code".into(),
24759 version: Some("1.0".into()),
24760 kind: AgentKind::Cli,
24761 };
24762 let agent_id = storage.ensure_agent(&agent).unwrap();
24763
24764 let ts_ms = 1_770_551_400_000_i64;
24765 let usage_json = serde_json::json!({
24766 "message": {
24767 "model": "claude-opus-4-6",
24768 "usage": {
24769 "input_tokens": 100,
24770 "output_tokens": 50,
24771 "cache_read_input_tokens": 25,
24772 "cache_creation_input_tokens": 10,
24773 "service_tier": "standard"
24774 }
24775 }
24776 });
24777
24778 let conv = Conversation {
24779 id: None,
24780 agent_slug: "claude_code".into(),
24781 workspace: Some(PathBuf::from("/tmp/workspace")),
24782 external_id: Some("franken-batch-upsert".into()),
24783 title: Some("Franken batch upsert".into()),
24784 source_path: PathBuf::from("/tmp/franken.jsonl"),
24785 started_at: Some(ts_ms),
24786 ended_at: Some(ts_ms + 60_000),
24787 approx_tokens: None,
24788 metadata_json: serde_json::Value::Null,
24789 messages: vec![
24790 Message {
24791 id: None,
24792 idx: 0,
24793 role: MessageRole::User,
24794 author: None,
24795 created_at: Some(ts_ms),
24796 content: "Please make a plan.".into(),
24797 extra_json: serde_json::Value::Null,
24798 snippets: vec![],
24799 },
24800 Message {
24801 id: None,
24802 idx: 1,
24803 role: MessageRole::Agent,
24804 author: None,
24805 created_at: Some(ts_ms + 30_000),
24806 content: "## Plan\n\n1. Reproduce\n2. Patch\n3. Verify".into(),
24807 extra_json: usage_json,
24808 snippets: vec![],
24809 },
24810 ],
24811 source_id: "local".into(),
24812 origin_host: None,
24813 };
24814
24815 let outcomes = storage
24816 .insert_conversations_batched(&[(agent_id, None, &conv)])
24817 .unwrap();
24818 assert_eq!(outcomes.len(), 1);
24819 assert_eq!(outcomes[0].inserted_indices, vec![0, 1]);
24820
24821 let conn = storage.raw();
24822 let daily_stats_rows: i64 = conn
24823 .query_row_map("SELECT COUNT(*) FROM daily_stats", fparams![], |row| {
24824 row.get_typed(0)
24825 })
24826 .unwrap();
24827 let token_daily_rows: i64 = conn
24828 .query_row_map(
24829 "SELECT COUNT(*) FROM token_daily_stats",
24830 fparams![],
24831 |row| row.get_typed(0),
24832 )
24833 .unwrap();
24834 let usage_daily_rows: i64 = conn
24835 .query_row_map("SELECT COUNT(*) FROM usage_daily", fparams![], |row| {
24836 row.get_typed(0)
24837 })
24838 .unwrap();
24839 let model_daily_rows: i64 = conn
24840 .query_row_map(
24841 "SELECT COUNT(*) FROM usage_models_daily",
24842 fparams![],
24843 |row| row.get_typed(0),
24844 )
24845 .unwrap();
24846
24847 assert!(daily_stats_rows > 0, "daily_stats should be populated");
24848 assert!(
24849 token_daily_rows > 0,
24850 "token_daily_stats should be populated"
24851 );
24852 assert!(usage_daily_rows > 0, "usage_daily should be populated");
24853 assert!(
24854 model_daily_rows > 0,
24855 "usage_models_daily should be populated"
24856 );
24857 }
24858
24859 #[test]
24864 fn connection_manager_creates_readers() {
24865 let dir = TempDir::new().unwrap();
24866 let db_path = dir.path().join("cm.db");
24867
24868 let fs = FrankenStorage::open(&db_path).unwrap();
24870 drop(fs);
24871
24872 let config = ConnectionManagerConfig {
24873 reader_count: 3,
24874 max_writers: 2,
24875 };
24876 let mgr = FrankenConnectionManager::new(&db_path, config).unwrap();
24877 assert_eq!(mgr.reader_count(), 3);
24878 assert_eq!(mgr.max_writers(), 2);
24879 }
24880
24881 #[test]
24882 fn connection_manager_clamps_zero_writer_limit_to_prevent_deadlock() {
24883 let dir = TempDir::new().unwrap();
24884 let db_path = dir.path().join("cm.db");
24885
24886 let fs = FrankenStorage::open(&db_path).unwrap();
24887 drop(fs);
24888
24889 let mgr = std::sync::Arc::new(
24890 FrankenConnectionManager::new(
24891 &db_path,
24892 ConnectionManagerConfig {
24893 reader_count: 0,
24894 max_writers: 0,
24895 },
24896 )
24897 .unwrap(),
24898 );
24899 assert_eq!(mgr.reader_count(), 1);
24900 assert_eq!(mgr.max_writers(), 1);
24901
24902 let (tx, rx) = std::sync::mpsc::channel();
24903 let mgr_for_thread = std::sync::Arc::clone(&mgr);
24904 std::thread::spawn(move || {
24905 let result = mgr_for_thread.writer().map(|mut guard| {
24906 guard.mark_committed();
24907 });
24908 tx.send(result.is_ok()).expect("writer result send");
24909 });
24910
24911 assert!(
24912 rx.recv_timeout(Duration::from_secs(10)).unwrap(),
24913 "writer acquisition should not block forever when configured with zero writer slots"
24914 );
24915 }
24916
24917 #[test]
24918 fn connection_manager_reader_round_robin() {
24919 let dir = TempDir::new().unwrap();
24920 let db_path = dir.path().join("cm.db");
24921
24922 let fs = FrankenStorage::open(&db_path).unwrap();
24923 drop(fs);
24924
24925 let config = ConnectionManagerConfig {
24926 reader_count: 2,
24927 max_writers: 1,
24928 };
24929 let mgr = FrankenConnectionManager::new(&db_path, config).unwrap();
24930
24931 let idx_before = mgr.reader_idx.load(std::sync::atomic::Ordering::Relaxed);
24933 let _r1 = mgr.reader();
24934 let idx_after = mgr.reader_idx.load(std::sync::atomic::Ordering::Relaxed);
24935 assert_eq!(idx_after, idx_before + 1, "reader index should advance");
24936 }
24937
24938 #[test]
24939 fn connection_manager_writer_reads_and_writes() {
24940 use frankensqlite::compat::RowExt;
24941
24942 let dir = TempDir::new().unwrap();
24943 let db_path = dir.path().join("cm.db");
24944
24945 let fs = FrankenStorage::open(&db_path).unwrap();
24946 drop(fs);
24947
24948 let mgr = FrankenConnectionManager::new(&db_path, Default::default()).unwrap();
24949
24950 {
24952 let mut guard = mgr.writer().unwrap();
24953 guard
24954 .storage()
24955 .raw()
24956 .execute("CREATE TABLE IF NOT EXISTS cm_test (id INTEGER PRIMARY KEY, val TEXT)")
24957 .unwrap();
24958 guard
24959 .storage()
24960 .raw()
24961 .execute("INSERT INTO cm_test (val) VALUES ('hello')")
24962 .unwrap();
24963 guard.mark_committed();
24964 }
24965
24966 let reader_guard = mgr.reader();
24968 let rows = reader_guard.query("SELECT val FROM cm_test").unwrap();
24969 assert_eq!(rows.len(), 1);
24970 assert_eq!(rows[0].get_typed::<String>(0).unwrap(), "hello");
24971 }
24972
24973 #[test]
24974 fn connection_manager_writer_guard_drops_releases_slot() {
24975 let dir = TempDir::new().unwrap();
24976 let db_path = dir.path().join("cm.db");
24977
24978 let fs = FrankenStorage::open(&db_path).unwrap();
24979 drop(fs);
24980
24981 let config = ConnectionManagerConfig {
24982 reader_count: 1,
24983 max_writers: 1,
24984 };
24985 let mgr = FrankenConnectionManager::new(&db_path, config).unwrap();
24986
24987 {
24989 let mut guard = mgr.writer().unwrap();
24990 guard.mark_committed();
24991 }
24992
24993 let mut guard2 = mgr.writer().unwrap();
24995 guard2.mark_committed();
24996 }
24997
24998 #[test]
24999 fn connection_manager_concurrent_writer_works() {
25000 use frankensqlite::compat::RowExt;
25001
25002 let dir = TempDir::new().unwrap();
25003 let db_path = dir.path().join("cm.db");
25004
25005 let fs = FrankenStorage::open(&db_path).unwrap();
25006 drop(fs);
25007
25008 let config = ConnectionManagerConfig {
25009 reader_count: 1,
25010 max_writers: 2,
25011 };
25012 let mgr = FrankenConnectionManager::new(&db_path, config).unwrap();
25013
25014 {
25015 let mut guard = mgr.concurrent_writer().unwrap();
25016 guard
25017 .storage()
25018 .raw()
25019 .execute("CREATE TABLE IF NOT EXISTS cm_conc (id INTEGER PRIMARY KEY, val TEXT)")
25020 .unwrap();
25021 guard
25022 .storage()
25023 .raw()
25024 .execute("INSERT INTO cm_conc (val) VALUES ('concurrent')")
25025 .unwrap();
25026 guard.mark_committed();
25027 }
25028
25029 let reader_guard = mgr.reader();
25030 let rows = reader_guard.query("SELECT val FROM cm_conc").unwrap();
25031 assert_eq!(rows.len(), 1);
25032 assert_eq!(rows[0].get_typed::<String>(0).unwrap(), "concurrent");
25033 }
25034
25035 #[test]
25036 fn connection_manager_default_config() {
25037 let config = ConnectionManagerConfig::default();
25038 assert_eq!(config.reader_count, 4);
25039 assert!(config.max_writers > 0);
25040 }
25041
25042 #[test]
25043 fn purge_agent_archive_data_removes_only_target_agent_and_rebuilds_derived_tables() {
25044 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
25045 use std::path::PathBuf;
25046
25047 fn seed_conversation(storage: &FrankenStorage, agent_slug: &str, marker: &str) {
25048 let agent = Agent {
25049 id: None,
25050 slug: agent_slug.into(),
25051 name: agent_slug.into(),
25052 version: None,
25053 kind: AgentKind::Cli,
25054 };
25055 let agent_id = storage.ensure_agent(&agent).unwrap();
25056 let conversation = Conversation {
25057 id: None,
25058 agent_slug: agent_slug.into(),
25059 workspace: Some(PathBuf::from("/tmp/workspace")),
25060 external_id: Some(format!("{agent_slug}-{marker}")),
25061 title: Some(format!("{agent_slug} {marker}")),
25062 source_path: PathBuf::from(format!("/tmp/{agent_slug}-{marker}.jsonl")),
25063 started_at: Some(1_700_000_000_000),
25064 ended_at: Some(1_700_000_000_100),
25065 approx_tokens: None,
25066 metadata_json: serde_json::Value::Null,
25067 messages: vec![
25068 Message {
25069 id: None,
25070 idx: 0,
25071 role: MessageRole::User,
25072 author: Some("user".into()),
25073 created_at: Some(1_700_000_000_010),
25074 content: format!("{agent_slug} {marker} user"),
25075 extra_json: serde_json::Value::Null,
25076 snippets: Vec::new(),
25077 },
25078 Message {
25079 id: None,
25080 idx: 1,
25081 role: MessageRole::Agent,
25082 author: Some("assistant".into()),
25083 created_at: Some(1_700_000_000_020),
25084 content: format!("{agent_slug} {marker} assistant"),
25085 extra_json: serde_json::Value::Null,
25086 snippets: Vec::new(),
25087 },
25088 ],
25089 source_id: LOCAL_SOURCE_ID.into(),
25090 origin_host: None,
25091 };
25092 storage
25093 .insert_conversation_tree(agent_id, None, &conversation)
25094 .unwrap();
25095 }
25096
25097 let dir = TempDir::new().unwrap();
25098 let db_path = dir.path().join("agent_search.db");
25099 let storage = FrankenStorage::open(&db_path).unwrap();
25100
25101 seed_conversation(&storage, "openclaw", "purge-target");
25102 seed_conversation(&storage, "codex", "keep-target");
25103
25104 let purge = storage.purge_agent_archive_data("openclaw").unwrap();
25105 assert_eq!(purge.conversations_deleted, 1);
25106 assert_eq!(purge.messages_deleted, 2);
25107
25108 storage.rebuild_fts().unwrap();
25109 storage.rebuild_analytics().unwrap();
25110 storage.rebuild_daily_stats().unwrap();
25111 storage.rebuild_token_daily_stats().unwrap();
25112
25113 let agents = storage.list_agents().unwrap();
25114 assert_eq!(agents.len(), 1);
25115 assert_eq!(agents[0].slug, "codex");
25116 assert_eq!(storage.total_conversation_count().unwrap(), 1);
25117 assert_eq!(storage.total_message_count().unwrap(), 2);
25118
25119 let fts_rows: i64 = storage
25120 .raw()
25121 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
25122 row.get_typed(0)
25123 })
25124 .unwrap();
25125 assert_eq!(fts_rows, 2);
25126
25127 let total_daily_sessions: i64 = storage
25128 .raw()
25129 .query_row_map(
25130 "SELECT COALESCE(SUM(session_count), 0)
25131 FROM daily_stats
25132 WHERE agent_slug = 'all' AND source_id = 'all'",
25133 fparams![],
25134 |row| row.get_typed(0),
25135 )
25136 .unwrap();
25137 assert_eq!(total_daily_sessions, 1);
25138
25139 let openclaw_token_rows: i64 = storage
25140 .raw()
25141 .query_row_map(
25142 "SELECT COUNT(*) FROM token_daily_stats WHERE agent_slug = 'openclaw'",
25143 fparams![],
25144 |row| row.get_typed(0),
25145 )
25146 .unwrap();
25147 assert_eq!(openclaw_token_rows, 0);
25148 }
25149
25150 #[test]
25157 fn cleanup_orphan_fk_rows_removes_orphans_and_is_noop_on_clean_db() {
25158 let dir = TempDir::new().unwrap();
25159 let db_path = dir.path().join("orphan_fk_self_heal.db");
25160 let storage = FrankenStorage::open(&db_path).unwrap();
25161
25162 storage.raw().execute("PRAGMA foreign_keys = OFF").unwrap();
25165
25166 storage
25169 .raw()
25170 .execute_compat(
25171 "INSERT INTO agents(id, slug, name, kind, created_at, updated_at) \
25172 VALUES(1, 'test-agent', 'Test Agent', 'cli', 0, 0)",
25173 fparams![],
25174 )
25175 .unwrap();
25176 storage
25177 .raw()
25178 .execute_compat(
25179 "INSERT INTO conversations(id, agent_id, source_id, source_path, started_at) \
25180 VALUES(1, 1, 'local', '/tmp/real.jsonl', 0)",
25181 fparams![],
25182 )
25183 .unwrap();
25184 storage
25185 .raw()
25186 .execute_compat(
25187 "INSERT INTO messages(id, conversation_id, idx, role, content) \
25188 VALUES(1, 1, 0, 'user', 'real message')",
25189 fparams![],
25190 )
25191 .unwrap();
25192
25193 for (mid, cid, idx) in [(101_i64, 99_999_i64, 0_i64), (102, 0, 0), (103, 0, 1)] {
25197 storage
25198 .raw()
25199 .execute_compat(
25200 "INSERT INTO messages(id, conversation_id, idx, role, content) \
25201 VALUES(?1, ?2, ?3, 'user', 'orphan message')",
25202 fparams![mid, cid, idx],
25203 )
25204 .unwrap();
25205 }
25206
25207 for message_id in [1_i64, 101_i64, 102_i64] {
25212 storage
25213 .raw()
25214 .execute_compat(
25215 "INSERT INTO message_metrics(
25216 message_id, created_at_ms, hour_id, day_id, agent_slug,
25217 role, content_chars, content_tokens_est
25218 ) VALUES(?1, 0, 0, 0, 'test-agent', 'user', 13, 2)",
25219 fparams![message_id],
25220 )
25221 .unwrap();
25222 storage
25223 .raw()
25224 .execute_compat(
25225 "INSERT INTO token_usage(
25226 message_id, conversation_id, agent_id, timestamp_ms, day_id,
25227 role, content_chars
25228 ) VALUES(?1, 1, 1, 0, 0, 'user', 13)",
25229 fparams![message_id],
25230 )
25231 .unwrap();
25232 }
25233
25234 storage
25238 .raw()
25239 .execute_compat(
25240 "INSERT INTO snippets(message_id, file_path, start_line, end_line, language, snippet_text) \
25241 VALUES(99999, '/tmp/orphan-snippet.rs', 1, 2, 'rust', 'fn main() {}')",
25242 fparams![],
25243 )
25244 .unwrap();
25245
25246 storage.raw().execute("PRAGMA foreign_keys = ON").unwrap();
25247
25248 let messages_before: i64 = storage
25250 .raw()
25251 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
25252 row.get_typed(0)
25253 })
25254 .unwrap();
25255 assert_eq!(messages_before, 4); let snippets_before: i64 = storage
25257 .raw()
25258 .query_row_map("SELECT COUNT(*) FROM snippets", fparams![], |row| {
25259 row.get_typed(0)
25260 })
25261 .unwrap();
25262 assert_eq!(snippets_before, 1);
25263 let metrics_before: i64 = storage
25264 .raw()
25265 .query_row_map("SELECT COUNT(*) FROM message_metrics", fparams![], |row| {
25266 row.get_typed(0)
25267 })
25268 .unwrap();
25269 assert_eq!(metrics_before, 3);
25270 let token_usage_before: i64 = storage
25271 .raw()
25272 .query_row_map("SELECT COUNT(*) FROM token_usage", fparams![], |row| {
25273 row.get_typed(0)
25274 })
25275 .unwrap();
25276 assert_eq!(token_usage_before, 3);
25277
25278 let report = storage.cleanup_orphan_fk_rows().unwrap();
25280
25281 let messages_after: i64 = storage
25286 .raw()
25287 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
25288 row.get_typed(0)
25289 })
25290 .unwrap();
25291 assert_eq!(messages_after, 1, "real message must be preserved");
25292 let snippets_after: i64 = storage
25293 .raw()
25294 .query_row_map("SELECT COUNT(*) FROM snippets", fparams![], |row| {
25295 row.get_typed(0)
25296 })
25297 .unwrap();
25298 assert_eq!(snippets_after, 0);
25299 let metrics_after: i64 = storage
25300 .raw()
25301 .query_row_map("SELECT COUNT(*) FROM message_metrics", fparams![], |row| {
25302 row.get_typed(0)
25303 })
25304 .unwrap();
25305 assert_eq!(metrics_after, 1, "real message metric must be preserved");
25306 let token_usage_after: i64 = storage
25307 .raw()
25308 .query_row_map("SELECT COUNT(*) FROM token_usage", fparams![], |row| {
25309 row.get_typed(0)
25310 })
25311 .unwrap();
25312 assert_eq!(token_usage_after, 1, "real token row must be preserved");
25313
25314 assert_eq!(report.total, 4, "report total: {:?}", report);
25315 let messages_count = report
25316 .per_table
25317 .iter()
25318 .find(|(t, _)| *t == "messages")
25319 .map(|(_, c)| *c);
25320 assert_eq!(messages_count, Some(3));
25321 let snippets_count = report
25322 .per_table
25323 .iter()
25324 .find(|(t, _)| *t == "snippets")
25325 .map(|(_, c)| *c);
25326 assert_eq!(snippets_count, Some(1));
25327
25328 let second = storage.cleanup_orphan_fk_rows().unwrap();
25330 assert_eq!(second.total, 0);
25331 assert!(second.per_table.is_empty());
25332 }
25333
25334 #[test]
25335 fn cleanup_orphan_fk_rows_handles_more_than_one_delete_chunk() {
25336 let dir = TempDir::new().unwrap();
25337 let db_path = dir.path().join("orphan_fk_chunked_self_heal.db");
25338 let storage = FrankenStorage::open(&db_path).unwrap();
25339 let orphan_count = ORPHAN_FK_ID_CHUNK_SIZE + 3;
25340
25341 storage.raw().execute("PRAGMA foreign_keys = OFF").unwrap();
25342 {
25343 let mut tx = storage.raw().transaction().unwrap();
25344 for idx in 0..orphan_count {
25345 let message_id = 10_000_i64 + i64::try_from(idx).unwrap();
25346 let conversation_id = 20_000_i64 + i64::try_from(idx).unwrap();
25347 tx.execute_compat(
25348 "INSERT INTO messages(id, conversation_id, idx, role, content) \
25349 VALUES(?1, ?2, 0, 'user', 'orphan message')",
25350 fparams![message_id, conversation_id],
25351 )
25352 .unwrap();
25353 tx.execute_compat(
25354 "INSERT INTO message_metrics(
25355 message_id, created_at_ms, hour_id, day_id, agent_slug,
25356 role, content_chars, content_tokens_est
25357 ) VALUES(?1, 0, 0, 0, 'test-agent', 'user', 14, 2)",
25358 fparams![message_id],
25359 )
25360 .unwrap();
25361 }
25362 tx.commit().unwrap();
25363 }
25364 storage.raw().execute("PRAGMA foreign_keys = ON").unwrap();
25365
25366 let report = storage.cleanup_orphan_fk_rows().unwrap();
25367
25368 assert_eq!(report.total, i64::try_from(orphan_count).unwrap());
25369 let messages_count = report
25370 .per_table
25371 .iter()
25372 .find(|(table, _)| *table == "messages")
25373 .map(|(_, count)| *count);
25374 assert_eq!(messages_count, Some(i64::try_from(orphan_count).unwrap()));
25375 let messages_after: i64 = storage
25376 .raw()
25377 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
25378 row.get_typed(0)
25379 })
25380 .unwrap();
25381 assert_eq!(messages_after, 0);
25382 let metrics_after: i64 = storage
25383 .raw()
25384 .query_row_map("SELECT COUNT(*) FROM message_metrics", fparams![], |row| {
25385 row.get_typed(0)
25386 })
25387 .unwrap();
25388 assert_eq!(metrics_after, 0);
25389 }
25390
25391 #[test]
25392 fn cleanup_orphan_fk_rows_pages_direct_child_orphans() {
25393 let dir = TempDir::new().unwrap();
25394 let db_path = dir.path().join("direct_orphan_fk_paged_self_heal.db");
25395 let storage = FrankenStorage::open(&db_path).unwrap();
25396 let orphan_count = (ORPHAN_FK_ID_CHUNK_SIZE * 2) + 5;
25397
25398 storage.raw().execute("PRAGMA foreign_keys = OFF").unwrap();
25399 {
25400 let mut tx = storage.raw().transaction().unwrap();
25401 for idx in 0..orphan_count {
25402 let message_id = 50_000_i64 + i64::try_from(idx).unwrap();
25403 tx.execute_compat(
25404 "INSERT INTO message_metrics(
25405 message_id, created_at_ms, hour_id, day_id, agent_slug,
25406 role, content_chars, content_tokens_est
25407 ) VALUES(?1, 0, 0, 0, 'test-agent', 'user', 21, 3)",
25408 fparams![message_id],
25409 )
25410 .unwrap();
25411 }
25412 tx.commit().unwrap();
25413 }
25414 storage.raw().execute("PRAGMA foreign_keys = ON").unwrap();
25415
25416 let report = storage.cleanup_orphan_fk_rows().unwrap();
25417
25418 assert_eq!(report.total, i64::try_from(orphan_count).unwrap());
25419 let metrics_count = report
25420 .per_table
25421 .iter()
25422 .filter(|(table, _)| *table == "message_metrics")
25423 .map(|(_, count)| *count)
25424 .sum::<i64>();
25425 assert_eq!(metrics_count, i64::try_from(orphan_count).unwrap());
25426 assert_eq!(
25427 report
25428 .per_table
25429 .iter()
25430 .filter(|(table, _)| *table == "message_metrics")
25431 .count(),
25432 1,
25433 "paged cleanup should aggregate report entries by table: {report:?}"
25434 );
25435 let metrics_after: i64 = storage
25436 .raw()
25437 .query_row_map("SELECT COUNT(*) FROM message_metrics", fparams![], |row| {
25438 row.get_typed(0)
25439 })
25440 .unwrap();
25441 assert_eq!(metrics_after, 0);
25442 }
25443}