1use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole, Snippet};
4use crate::sources::provenance::{LOCAL_SOURCE_ID, Source, SourceKind};
5use anyhow::{Context, Result, anyhow, bail};
6use frankensqlite::{
7 Connection as FrankenConnection, Row as FrankenRow, SqliteValue,
8 compat::{
9 ConnectionExt as FrankenConnectionExt, OpenFlags as FrankenOpenFlags,
10 OptionalExtension as FrankenOptionalExtension, ParamValue, RowExt as FrankenRowExt,
11 Transaction as FrankenTransaction, TransactionExt as FrankenTransactionExt,
12 open_with_flags as open_franken_with_flags, param_slice_to_values, params_from_iter,
13 },
14 migrate::MigrationRunner,
15};
16use serde::{Deserialize, Serialize};
17use smallvec::SmallVec;
18use std::borrow::Cow;
19use std::collections::{HashMap, HashSet};
20use std::fs;
21use std::io::{BufRead, BufReader, Write};
22use std::process::{Command, Stdio};
23use std::sync::{
24 Arc,
25 atomic::{AtomicBool, AtomicI8, AtomicI64, AtomicU64, AtomicUsize, Ordering},
26};
27
28macro_rules! fparams {
30 () => {
31 &[] as &[ParamValue]
32 };
33 ($($val:expr),+ $(,)?) => {
34 &[$(ParamValue::from($val)),+] as &[ParamValue]
35 };
36}
37use std::path::{Path, PathBuf};
38use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
39use thiserror::Error;
40use tracing::info;
41
42const DOCTOR_MUTATION_DB_OPEN_LOCK_TIMEOUT: Duration = Duration::from_secs(30);
43const DOCTOR_MUTATION_LOCK_MAX_METADATA_READ: u64 = 64 * 1024;
44
45#[derive(Debug, Error)]
54pub enum LazyDbError {
55 #[error("Database not found at {0}")]
56 NotFound(PathBuf),
57 #[error("Failed to open FrankenSQLite database at {path}: {source}")]
58 FrankenOpenFailed {
59 path: PathBuf,
60 source: frankensqlite::FrankenError,
61 },
62}
63
64pub struct SendFrankenConnection(FrankenConnection, i64, u64);
75
76unsafe impl Send for SendFrankenConnection {}
79
80impl SendFrankenConnection {
81 pub(crate) fn new(conn: FrankenConnection) -> Self {
82 Self(
83 conn,
84 UNSET_INDEX_WRITER_CHECKPOINT_PAGES,
85 UNSET_INDEX_WRITER_BUSY_TIMEOUT_MS,
86 )
87 }
88
89 pub(crate) fn new_with_index_writer_state(
90 conn: FrankenConnection,
91 checkpoint_pages: i64,
92 busy_timeout_ms: u64,
93 ) -> Self {
94 Self(conn, checkpoint_pages, busy_timeout_ms)
95 }
96
97 pub(crate) fn into_parts(self) -> (FrankenConnection, i64, u64) {
98 (self.0, self.1, self.2)
99 }
100}
101
102impl std::ops::Deref for SendFrankenConnection {
103 type Target = FrankenConnection;
104 fn deref(&self) -> &FrankenConnection {
105 &self.0
106 }
107}
108
109pub struct LazyFrankenDb {
115 path: PathBuf,
116 conn: parking_lot::Mutex<Option<SendFrankenConnection>>,
117}
118
119pub struct LazyFrankenDbGuard<'a>(parking_lot::MutexGuard<'a, Option<SendFrankenConnection>>);
121
122impl std::fmt::Debug for LazyFrankenDbGuard<'_> {
123 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
124 f.debug_tuple("LazyFrankenDbGuard")
125 .field(&self.0.is_some())
126 .finish()
127 }
128}
129
130impl std::ops::Deref for LazyFrankenDbGuard<'_> {
131 type Target = FrankenConnection;
132 fn deref(&self) -> &FrankenConnection {
133 self.0
134 .as_ref()
135 .expect("LazyFrankenDb connection must be initialized before access")
136 }
137}
138
139impl LazyFrankenDb {
140 pub fn new(path: PathBuf) -> Self {
142 Self {
143 path,
144 conn: parking_lot::Mutex::new(None),
145 }
146 }
147
148 pub fn from_overrides(data_dir: &Option<PathBuf>, db_override: Option<PathBuf>) -> Self {
152 let data_dir = data_dir.clone().unwrap_or_else(crate::default_data_dir);
153 let path = db_override.unwrap_or_else(|| data_dir.join("agent_search.db"));
154 Self::new(path)
155 }
156
157 pub fn get(&self, reason: &str) -> std::result::Result<LazyFrankenDbGuard<'_>, LazyDbError> {
162 let mut guard = self.conn.lock();
163 if guard.is_none() {
164 if !self.path.exists() {
165 return Err(LazyDbError::NotFound(self.path.clone()));
166 }
167 let start = Instant::now();
168 let _doctor_guard = acquire_doctor_mutation_db_open_guard(
169 &self.path,
170 DOCTOR_MUTATION_DB_OPEN_LOCK_TIMEOUT,
171 )
172 .map_err(|err| LazyDbError::FrankenOpenFailed {
173 path: self.path.clone(),
174 source: frankensqlite::FrankenError::Internal(err.to_string()),
175 })?;
176 let conn =
177 FrankenConnection::open(self.path.to_string_lossy().into_owned()).map_err(|e| {
178 LazyDbError::FrankenOpenFailed {
179 path: self.path.clone(),
180 source: e,
181 }
182 })?;
183 let elapsed_ms = start.elapsed().as_millis();
184 info!(
185 path = %self.path.display(),
186 elapsed_ms = elapsed_ms,
187 reason = reason,
188 "lazily opened FrankenSQLite database"
189 );
190 *guard = Some(SendFrankenConnection::new(conn));
191 }
192 Ok(LazyFrankenDbGuard(guard))
193 }
194
195 pub fn get_with_timeout(
201 &self,
202 reason: &str,
203 timeout: Duration,
204 ) -> std::result::Result<LazyFrankenDbGuard<'_>, LazyDbError> {
205 let mut guard = self.conn.lock();
206 if guard.is_none() {
207 if !self.path.exists() {
208 return Err(LazyDbError::NotFound(self.path.clone()));
209 }
210 let start = Instant::now();
211 let path_owned = self.path.to_string_lossy().into_owned();
212 let path_for_guard = self.path.clone();
213 let (tx, rx) = std::sync::mpsc::channel();
214 std::thread::spawn(move || {
215 let _doctor_guard =
216 match acquire_doctor_mutation_db_open_guard(&path_for_guard, timeout) {
217 Ok(guard) => guard,
218 Err(err) => {
219 let _ = tx
220 .send(Err(frankensqlite::FrankenError::Internal(err.to_string())));
221 return;
222 }
223 };
224 let _ =
225 tx.send(FrankenConnection::open(path_owned).map(SendFrankenConnection::new));
226 });
227 let conn = rx
228 .recv_timeout(timeout)
229 .map_err(|_| LazyDbError::FrankenOpenFailed {
230 path: self.path.clone(),
231 source: frankensqlite::FrankenError::Internal(format!(
232 "database open timed out after {}s (possible corruption or lock contention)",
233 timeout.as_secs()
234 )),
235 })?
236 .map_err(|e| LazyDbError::FrankenOpenFailed {
237 path: self.path.clone(),
238 source: e,
239 })?;
240 let elapsed_ms = start.elapsed().as_millis();
241 info!(
242 path = %self.path.display(),
243 elapsed_ms = elapsed_ms,
244 reason = reason,
245 "lazily opened FrankenSQLite database (with timeout)"
246 );
247 *guard = Some(conn);
248 }
249 Ok(LazyFrankenDbGuard(guard))
250 }
251
252 pub fn path(&self) -> &Path {
254 &self.path
255 }
256
257 pub fn is_open(&self) -> bool {
259 self.conn.lock().is_some()
260 }
261}
262
263static FRANKEN_RETRY_JITTER_STATE: AtomicU64 = AtomicU64::new(0x9e37_79b9_7f4a_7c15);
264static DOCTOR_MUTATION_DB_OPEN_BYPASS_DEPTH: AtomicUsize = AtomicUsize::new(0);
265static MESSAGE_LOOKUP_TRACE_ENABLED: AtomicBool = AtomicBool::new(false);
266static MESSAGE_LOOKUP_EXACT_IDX_PROBES: AtomicU64 = AtomicU64::new(0);
267static MESSAGE_LOOKUP_BOUNDED_QUERIES: AtomicU64 = AtomicU64::new(0);
268static MESSAGE_LOOKUP_FULL_SCAN_QUERIES: AtomicU64 = AtomicU64::new(0);
269static MESSAGE_LOOKUP_ROWS_MATERIALIZED: AtomicU64 = AtomicU64::new(0);
270
271#[derive(Debug, Clone, Copy, Default, Serialize)]
272pub(crate) struct MessageLookupTraceCounters {
273 pub exact_idx_probes: u64,
274 pub bounded_lookup_queries: u64,
275 pub full_scan_queries: u64,
276 pub rows_materialized: u64,
277}
278
279impl MessageLookupTraceCounters {
280 pub(crate) fn saturating_sub(self, before: Self) -> Self {
281 Self {
282 exact_idx_probes: self
283 .exact_idx_probes
284 .saturating_sub(before.exact_idx_probes),
285 bounded_lookup_queries: self
286 .bounded_lookup_queries
287 .saturating_sub(before.bounded_lookup_queries),
288 full_scan_queries: self
289 .full_scan_queries
290 .saturating_sub(before.full_scan_queries),
291 rows_materialized: self
292 .rows_materialized
293 .saturating_sub(before.rows_materialized),
294 }
295 }
296
297 pub(crate) fn lookups_against_global(self) -> u64 {
298 self.exact_idx_probes.saturating_add(self.rows_materialized)
299 }
300}
301
302pub(crate) fn set_message_lookup_trace_enabled(enabled: bool) -> bool {
303 MESSAGE_LOOKUP_TRACE_ENABLED.swap(enabled, Ordering::Relaxed)
304}
305
306pub(crate) fn message_lookup_trace_snapshot() -> MessageLookupTraceCounters {
307 MessageLookupTraceCounters {
308 exact_idx_probes: MESSAGE_LOOKUP_EXACT_IDX_PROBES.load(Ordering::Relaxed),
309 bounded_lookup_queries: MESSAGE_LOOKUP_BOUNDED_QUERIES.load(Ordering::Relaxed),
310 full_scan_queries: MESSAGE_LOOKUP_FULL_SCAN_QUERIES.load(Ordering::Relaxed),
311 rows_materialized: MESSAGE_LOOKUP_ROWS_MATERIALIZED.load(Ordering::Relaxed),
312 }
313}
314
315fn record_message_lookup_exact_idx_probe() {
316 if MESSAGE_LOOKUP_TRACE_ENABLED.load(Ordering::Relaxed) {
317 MESSAGE_LOOKUP_EXACT_IDX_PROBES.fetch_add(1, Ordering::Relaxed);
318 }
319}
320
321fn record_message_lookup_bounded_queries(query_count: u64, rows: usize) {
322 if MESSAGE_LOOKUP_TRACE_ENABLED.load(Ordering::Relaxed) {
323 MESSAGE_LOOKUP_BOUNDED_QUERIES.fetch_add(query_count, Ordering::Relaxed);
324 MESSAGE_LOOKUP_ROWS_MATERIALIZED.fetch_add(rows as u64, Ordering::Relaxed);
325 }
326}
327
328fn record_message_lookup_full_scan_query(rows: usize) {
329 if MESSAGE_LOOKUP_TRACE_ENABLED.load(Ordering::Relaxed) {
330 MESSAGE_LOOKUP_FULL_SCAN_QUERIES.fetch_add(1, Ordering::Relaxed);
331 MESSAGE_LOOKUP_ROWS_MATERIALIZED.fetch_add(rows as u64, Ordering::Relaxed);
332 }
333}
334
335pub(crate) struct DoctorMutationDbOpenBypassGuard;
336
337impl Drop for DoctorMutationDbOpenBypassGuard {
338 fn drop(&mut self) {
339 DOCTOR_MUTATION_DB_OPEN_BYPASS_DEPTH.fetch_sub(1, Ordering::SeqCst);
340 }
341}
342
343pub(crate) fn enter_doctor_mutation_db_open_bypass() -> DoctorMutationDbOpenBypassGuard {
344 DOCTOR_MUTATION_DB_OPEN_BYPASS_DEPTH.fetch_add(1, Ordering::SeqCst);
345 DoctorMutationDbOpenBypassGuard
346}
347
348fn doctor_mutation_db_open_bypass_active() -> bool {
349 DOCTOR_MUTATION_DB_OPEN_BYPASS_DEPTH.load(Ordering::SeqCst) > 0
350}
351
352fn next_franken_retry_jitter_ms(max_inclusive: u64) -> u64 {
353 let mut value = FRANKEN_RETRY_JITTER_STATE.fetch_add(0x9e37_79b9_7f4a_7c15, Ordering::Relaxed);
354 value ^= value >> 30;
355 value = value.wrapping_mul(0xbf58_476d_1ce4_e5b9);
356 value ^= value >> 27;
357 value = value.wrapping_mul(0x94d0_49bb_1331_11eb);
358 value ^= value >> 31;
359 value % max_inclusive.saturating_add(1)
360}
361
362pub(crate) fn sleep_with_franken_retry_backoff(
365 backoff: &mut Duration,
366 remaining: Duration,
367 max_backoff: Duration,
368) {
369 let capped = (*backoff).min(remaining);
370 let extra_budget = remaining.saturating_sub(capped).min(capped);
371 let extra_ms = extra_budget.as_millis().min(u128::from(u64::MAX)) as u64;
372 let sleep_for = if extra_ms == 0 {
373 capped
374 } else {
375 capped
376 .saturating_add(Duration::from_millis(next_franken_retry_jitter_ms(
377 extra_ms,
378 )))
379 .min(remaining)
380 };
381 std::thread::sleep(sleep_for);
382 *backoff = backoff.saturating_mul(2).min(max_backoff);
383}
384
385struct DoctorMutationDbOpenGuard(Option<fs::File>);
386
387impl Drop for DoctorMutationDbOpenGuard {
388 fn drop(&mut self) {
389 if let Some(file) = self.0.as_ref() {
390 let _ = fs2::FileExt::unlock(file);
391 }
392 }
393}
394
395fn doctor_mutation_lock_path_for_db_open(db_path: &Path) -> Option<PathBuf> {
396 if db_path.file_name().and_then(|name| name.to_str()) != Some("agent_search.db") {
397 return None;
398 }
399
400 Some(
401 db_path
402 .parent()?
403 .join("doctor")
404 .join("locks")
405 .join("doctor-repair.lock"),
406 )
407}
408
409fn doctor_lock_metadata_pid_is_current_process(raw: &str) -> bool {
410 raw.lines().any(|line| {
411 let Some((key, value)) = line.split_once('=') else {
412 return false;
413 };
414 key.trim() == "pid"
415 && value
416 .trim()
417 .parse::<u32>()
418 .is_ok_and(|pid| pid == std::process::id())
419 })
420}
421
422fn doctor_lock_file_pid_is_current_process(file: &fs::File) -> bool {
423 use std::io::Read as _;
424
425 let Ok(mut file) = file.try_clone() else {
426 return false;
427 };
428 let mut raw = String::new();
429 let _ = std::io::Read::take(&mut file, DOCTOR_MUTATION_LOCK_MAX_METADATA_READ)
430 .read_to_string(&mut raw);
431 doctor_lock_metadata_pid_is_current_process(&raw)
432}
433
434fn acquire_doctor_mutation_db_open_guard(
435 db_path: &Path,
436 timeout: Duration,
437) -> Result<DoctorMutationDbOpenGuard> {
438 let Some(lock_path) = doctor_mutation_lock_path_for_db_open(db_path) else {
439 return Ok(DoctorMutationDbOpenGuard(None));
440 };
441 if doctor_mutation_db_open_bypass_active() {
442 return Ok(DoctorMutationDbOpenGuard(None));
443 }
444
445 if let Some(parent) = lock_path.parent() {
446 fs::create_dir_all(parent).with_context(|| {
447 format!(
448 "creating doctor mutation lock directory {} before opening {}",
449 parent.display(),
450 db_path.display()
451 )
452 })?;
453 }
454
455 let deadline = Instant::now() + timeout;
456 let mut backoff = Duration::from_millis(4);
457 loop {
458 let file = fs::OpenOptions::new()
459 .create(true)
460 .truncate(false)
461 .read(true)
462 .write(true)
463 .open(&lock_path)
464 .with_context(|| {
465 format!(
466 "opening doctor mutation lock {} before opening {}",
467 lock_path.display(),
468 db_path.display()
469 )
470 })?;
471
472 if doctor_lock_file_pid_is_current_process(&file) {
473 return Ok(DoctorMutationDbOpenGuard(None));
474 }
475
476 match fs2::FileExt::try_lock_shared(&file) {
477 Ok(()) => return Ok(DoctorMutationDbOpenGuard(Some(file))),
478 Err(err) if err.kind() == std::io::ErrorKind::WouldBlock => {
479 let now = Instant::now();
480 if now >= deadline {
481 return Err(anyhow!(
482 "doctor mutation lock {} is active while opening {}; refusing to open during repair after waiting {}ms",
483 lock_path.display(),
484 db_path.display(),
485 timeout.as_millis()
486 ));
487 }
488 let remaining = deadline.saturating_duration_since(now);
489 sleep_with_franken_retry_backoff(
490 &mut backoff,
491 remaining,
492 Duration::from_millis(128),
493 );
494 }
495 Err(err) => {
496 return Err(anyhow!(
497 "failed to acquire shared doctor mutation lock {} before opening {}: {}",
498 lock_path.display(),
499 db_path.display(),
500 err
501 ));
502 }
503 }
504 }
505}
506
507pub(crate) fn open_franken_storage_with_timeout(
508 path: &Path,
509 timeout: Duration,
510) -> Result<FrankenStorage> {
511 if !path.exists() {
512 return Err(anyhow!("Database not found at {}", path.display()));
513 }
514
515 let deadline = Instant::now() + timeout;
516 let mut backoff = Duration::from_millis(4);
517 loop {
518 match FrankenStorage::open(path) {
519 Ok(storage) => return Ok(storage),
520 Err(err) if retryable_franken_anyhow(&err) => {
521 let now = Instant::now();
522 if now >= deadline {
523 return Err(err);
524 }
525 let remaining = deadline.saturating_duration_since(now);
526 sleep_with_franken_retry_backoff(
527 &mut backoff,
528 remaining,
529 Duration::from_millis(128),
530 );
531 }
532 Err(err) => return Err(err),
533 }
534 }
535}
536
537pub(crate) fn open_current_schema_storage_with_timeout(
538 path: &Path,
539 timeout: Duration,
540) -> Result<Option<FrankenStorage>> {
541 if !path.exists() {
542 return Ok(None);
543 }
544
545 let mut storage = FrankenStorage::new(
546 open_franken_raw_connection_with_timeout(path, timeout)?,
547 path.to_path_buf(),
548 );
549 storage.apply_open_stage_busy_timeout();
550
551 let version = storage
552 .raw()
553 .query("SELECT value FROM meta WHERE key = 'schema_version';")
554 .ok()
555 .and_then(|rows| rows.first().cloned())
556 .and_then(|row| row.get_typed::<String>(0).ok())
557 .and_then(|raw| raw.parse::<i64>().ok());
558
559 if version != Some(CURRENT_SCHEMA_VERSION) {
560 if let Err(close_err) = storage.close_without_checkpoint_in_place() {
561 tracing::debug!(
562 error = %close_err,
563 db_path = %path.display(),
564 "open_current_schema_storage_with_timeout: close_without_checkpoint_in_place failed; falling back to best-effort close"
565 );
566 storage.close_best_effort_in_place();
567 }
568 return Ok(None);
569 }
570
571 transition_from_meta_version(&storage.conn)?;
572 storage.repair_missing_current_schema_objects()?;
573 storage.apply_config()?;
574 Ok(Some(storage))
575}
576
577pub(crate) fn open_franken_readonly_storage_with_timeout(
578 path: &Path,
579 timeout: Duration,
580) -> Result<FrankenStorage> {
581 if !path.exists() {
582 return Err(anyhow!("Database not found at {}", path.display()));
583 }
584
585 let deadline = Instant::now() + timeout;
586 let mut backoff = Duration::from_millis(4);
587 loop {
588 match FrankenStorage::open_readonly(path) {
589 Ok(storage) => return Ok(storage),
590 Err(err) if retryable_franken_anyhow(&err) => {
591 let now = Instant::now();
592 if now >= deadline {
593 return Err(err);
594 }
595 let remaining = deadline.saturating_duration_since(now);
596 sleep_with_franken_retry_backoff(
597 &mut backoff,
598 remaining,
599 Duration::from_millis(128),
600 );
601 }
602 Err(err) => return Err(err),
603 }
604 }
605}
606
607pub(crate) fn open_franken_raw_connection_with_timeout(
608 path: &Path,
609 timeout: Duration,
610) -> Result<FrankenConnection> {
611 if !path.exists() {
612 return Err(anyhow!("Database not found at {}", path.display()));
613 }
614
615 let path_str = path.to_string_lossy().to_string();
616 let deadline = Instant::now() + timeout;
617 let mut backoff = Duration::from_millis(4);
618 loop {
619 let _doctor_guard = acquire_doctor_mutation_db_open_guard(path, timeout)?;
620 match FrankenConnection::open(&path_str)
621 .with_context(|| format!("opening raw frankensqlite db at {}", path.display()))
622 {
623 Ok(conn) => return Ok(conn),
624 Err(err) if retryable_franken_anyhow(&err) => {
625 let now = Instant::now();
626 if now >= deadline {
627 return Err(err);
628 }
629 let remaining = deadline.saturating_duration_since(now);
630 sleep_with_franken_retry_backoff(
631 &mut backoff,
632 remaining,
633 Duration::from_millis(128),
634 );
635 }
636 Err(err) => return Err(err),
637 }
638 }
639}
640
641pub(crate) fn open_franken_raw_readonly_connection_with_timeout(
642 path: &Path,
643 timeout: Duration,
644) -> Result<FrankenConnection> {
645 if !path.exists() {
646 return Err(anyhow!("Database not found at {}", path.display()));
647 }
648
649 let path_str = path.to_string_lossy().to_string();
650 let deadline = Instant::now() + timeout;
651 let mut backoff = Duration::from_millis(4);
652 loop {
653 let _doctor_guard = acquire_doctor_mutation_db_open_guard(path, timeout)?;
654 match open_franken_with_flags(&path_str, FrankenOpenFlags::SQLITE_OPEN_READ_ONLY)
655 .with_context(|| {
656 format!(
657 "opening raw frankensqlite db readonly at {}",
658 path.display()
659 )
660 }) {
661 Ok(conn) => return Ok(conn),
662 Err(err) if retryable_franken_anyhow(&err) => {
663 let now = Instant::now();
664 if now >= deadline {
665 return Err(err);
666 }
667 let remaining = deadline.saturating_duration_since(now);
668 sleep_with_franken_retry_backoff(
669 &mut backoff,
670 remaining,
671 Duration::from_millis(128),
672 );
673 }
674 Err(err) => return Err(err),
675 }
676 }
677}
678
679pub(crate) fn retryable_franken_error(err: &frankensqlite::FrankenError) -> bool {
680 matches!(
681 err,
682 frankensqlite::FrankenError::Busy
683 | frankensqlite::FrankenError::BusyRecovery
684 | frankensqlite::FrankenError::BusySnapshot { .. }
685 | frankensqlite::FrankenError::DatabaseLocked { .. }
686 | frankensqlite::FrankenError::LockFailed { .. }
687 | frankensqlite::FrankenError::WriteConflict { .. }
688 | frankensqlite::FrankenError::SerializationFailure { .. }
689 ) || retryable_storage_error_message(&err.to_string())
690}
691
692pub(crate) fn retryable_storage_error_message(message: &str) -> bool {
693 let lower = message.to_ascii_lowercase();
694 lower.contains("busy")
695 || lower.contains("locked")
696 || lower.contains("locking")
697 || lower.contains("contention")
698 || lower.contains("temporarily unavailable")
699 || lower.contains("would block")
700}
701
702pub(crate) fn retryable_franken_anyhow(err: &anyhow::Error) -> bool {
703 err.chain().any(|cause| {
704 cause
705 .downcast_ref::<frankensqlite::FrankenError>()
706 .is_some_and(retryable_franken_error)
707 || retryable_storage_error_message(&cause.to_string())
708 })
709}
710
711impl Drop for LazyFrankenDb {
712 fn drop(&mut self) {
713 let Some(mut conn) = self.conn.get_mut().take() else {
714 return;
715 };
716 conn.0.close_best_effort_in_place();
717 }
718}
719
720#[derive(Debug, Clone)]
729pub struct ConnectionManagerConfig {
730 pub reader_count: usize,
732 pub max_writers: usize,
734}
735
736impl Default for ConnectionManagerConfig {
737 fn default() -> Self {
738 let cpus = std::thread::available_parallelism()
739 .map(|n| n.get())
740 .unwrap_or(4);
741 Self {
742 reader_count: 4,
743 max_writers: cpus,
744 }
745 }
746}
747
748pub struct FrankenConnectionManager {
758 db_path: PathBuf,
759 readers: Vec<parking_lot::Mutex<SendFrankenConnection>>,
760 reader_idx: std::sync::atomic::AtomicUsize,
761 writer_tokens: (
764 crossbeam_channel::Sender<()>,
765 crossbeam_channel::Receiver<()>,
766 ),
767 config: ConnectionManagerConfig,
768}
769
770unsafe impl Send for FrankenConnectionManager {}
775unsafe impl Sync for FrankenConnectionManager {}
776
777impl FrankenConnectionManager {
778 pub fn new(db_path: impl Into<PathBuf>, config: ConnectionManagerConfig) -> Result<Self> {
783 let db_path = db_path.into();
784 let path_str = db_path.to_string_lossy().to_string();
785
786 let reader_count = config.reader_count.max(1);
787 let mut readers = Vec::with_capacity(reader_count);
788 for _ in 0..reader_count {
789 let conn = FrankenConnection::open(&path_str)
790 .with_context(|| format!("opening reader connection at {}", db_path.display()))?;
791 let _ = conn.execute("PRAGMA busy_timeout = 5000;"); let _ = conn.execute("PRAGMA cache_size = -16384;"); readers.push(parking_lot::Mutex::new(SendFrankenConnection::new(conn)));
795 }
796
797 let max_writers = config.max_writers.max(1);
798
799 let (tx, rx) = crossbeam_channel::bounded(max_writers);
803 for _ in 0..max_writers {
804 tx.send(())
805 .map_err(|_| anyhow!("writer token channel closed during initialization"))?;
806 }
807
808 Ok(Self {
809 db_path,
810 readers,
811 reader_idx: std::sync::atomic::AtomicUsize::new(0),
812 writer_tokens: (tx, rx),
813 config: ConnectionManagerConfig {
814 reader_count,
815 max_writers,
816 },
817 })
818 }
819
820 pub fn reader(&self) -> parking_lot::MutexGuard<'_, SendFrankenConnection> {
825 let idx = self
826 .reader_idx
827 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
828 self.readers[idx % self.readers.len()].lock()
829 }
830
831 pub fn writer(&self) -> Result<WriterGuard<'_>> {
837 self.writer_tokens
838 .1
839 .recv()
840 .map_err(|_| anyhow!("writer token channel closed"))?;
841 let path_str = self.db_path.to_string_lossy().to_string();
842 let conn = match FrankenConnection::open(&path_str) {
843 Ok(c) => c,
844 Err(e) => {
845 let _ = self.writer_tokens.0.send(());
846 return Err(anyhow::Error::from(e).context(format!(
847 "opening writer connection at {}",
848 self.db_path.display()
849 )));
850 }
851 };
852 let storage = FrankenStorage::new(conn, self.db_path.clone());
853 if let Err(e) = storage.apply_config() {
854 let _ = self.writer_tokens.0.send(());
855 return Err(e);
856 }
857 Ok(WriterGuard {
858 storage,
859 mgr: self,
860 committed: false,
861 })
862 }
863
864 pub fn concurrent_writer(&self) -> Result<WriterGuard<'_>> {
869 self.writer_tokens
870 .1
871 .recv()
872 .map_err(|_| anyhow!("writer token channel closed"))?;
873 let path_str = self.db_path.to_string_lossy().to_string();
874 let conn = match FrankenConnection::open(&path_str) {
875 Ok(c) => c,
876 Err(e) => {
877 let _ = self.writer_tokens.0.send(());
878 return Err(anyhow::Error::from(e).context(format!(
879 "opening concurrent writer at {}",
880 self.db_path.display()
881 )));
882 }
883 };
884 let storage = FrankenStorage::new(conn, self.db_path.clone());
885 if let Err(e) = storage.apply_config() {
886 let _ = self.writer_tokens.0.send(());
887 return Err(e);
888 }
889 let _ = storage.raw().execute("PRAGMA cache_size = -4096;");
891 Ok(WriterGuard {
892 storage,
893 mgr: self,
894 committed: false,
895 })
896 }
897
898 pub fn db_path(&self) -> &Path {
900 &self.db_path
901 }
902
903 pub fn reader_count(&self) -> usize {
905 self.readers.len()
906 }
907
908 pub fn max_writers(&self) -> usize {
910 self.config.max_writers
911 }
912}
913
914impl Drop for FrankenConnectionManager {
915 fn drop(&mut self) {
916 for reader in &mut self.readers {
917 reader.get_mut().0.close_best_effort_in_place();
918 }
919 }
920}
921
922pub struct WriterGuard<'a> {
927 storage: FrankenStorage,
928 mgr: &'a FrankenConnectionManager,
929 committed: bool,
930}
931
932impl<'a> WriterGuard<'a> {
933 pub fn storage(&self) -> &FrankenStorage {
935 &self.storage
936 }
937
938 pub fn mark_committed(&mut self) {
943 self.committed = true;
944 }
945}
946
947impl Drop for WriterGuard<'_> {
948 fn drop(&mut self) {
949 if !self.committed {
950 let _ = self.storage.raw().execute("ROLLBACK;");
952 }
953 self.storage.close_best_effort_in_place();
954 let _ = self.mgr.writer_tokens.0.send(());
956 }
957}
958
959fn serialize_json_to_msgpack(value: &serde_json::Value) -> Option<Vec<u8>> {
968 if value.is_null() || value.as_object().is_some_and(|o| o.is_empty()) {
969 return None;
970 }
971 rmp_serde::to_vec(value).ok()
972}
973
974fn deserialize_msgpack_to_json(bytes: &[u8]) -> serde_json::Value {
977 if bytes.is_empty() {
978 return serde_json::Value::Object(serde_json::Map::new());
979 }
980 rmp_serde::from_slice(bytes).unwrap_or_else(|e| {
981 tracing::debug!(
982 error = %e,
983 bytes_len = bytes.len(),
984 "Failed to deserialize metadata - returning empty object"
985 );
986 serde_json::Value::Object(serde_json::Map::new())
987 })
988}
989
990fn franken_read_metadata_compat(
992 row: &FrankenRow,
993 json_idx: usize,
994 bin_idx: usize,
995) -> serde_json::Value {
996 if let Ok(Some(bytes)) = row.get_typed::<Option<Vec<u8>>>(bin_idx)
998 && !bytes.is_empty()
999 {
1000 return deserialize_msgpack_to_json(&bytes);
1001 }
1002
1003 if let Ok(Some(json_str)) = row.get_typed::<Option<String>>(json_idx) {
1005 return serde_json::from_str(&json_str)
1006 .unwrap_or_else(|_| serde_json::Value::Object(serde_json::Map::new()));
1007 }
1008
1009 serde_json::Value::Object(serde_json::Map::new())
1010}
1011
1012fn franken_read_message_extra_compat(
1013 row: &FrankenRow,
1014 json_idx: usize,
1015 bin_idx: usize,
1016) -> serde_json::Value {
1017 if let Ok(Some(bytes)) = row.get_typed::<Option<Vec<u8>>>(bin_idx)
1018 && !bytes.is_empty()
1019 {
1020 return deserialize_msgpack_to_json(&bytes);
1021 }
1022
1023 if let Ok(Some(json_str)) = row.get_typed::<Option<String>>(json_idx) {
1024 return serde_json::from_str(&json_str).unwrap_or(serde_json::Value::Null);
1025 }
1026
1027 serde_json::Value::Null
1028}
1029
1030#[derive(Debug, Error)]
1036pub enum MigrationError {
1037 #[error("Rebuild required: {reason}")]
1039 RebuildRequired {
1040 reason: String,
1041 backup_path: Option<std::path::PathBuf>,
1042 },
1043
1044 #[error("Database error: {0}")]
1046 Database(#[from] frankensqlite::FrankenError),
1047
1048 #[error("I/O error: {0}")]
1050 Io(#[from] std::io::Error),
1051
1052 #[error("{0}")]
1054 Other(String),
1055}
1056
1057impl From<anyhow::Error> for MigrationError {
1058 fn from(e: anyhow::Error) -> Self {
1059 MigrationError::Other(e.to_string())
1060 }
1061}
1062
1063const MAX_BACKUPS: usize = 3;
1065const BACKUP_VACUUM_BUSY_TIMEOUT_PRAGMA: &str = "PRAGMA busy_timeout = 30000;";
1066
1067const USER_DATA_FILES: &[&str] = &["bookmarks.db", "tui_state.json", "sources.toml", ".env"];
1069
1070pub fn is_user_data_file(path: &Path) -> bool {
1072 path.file_name()
1073 .and_then(|n| n.to_str())
1074 .map(|name| USER_DATA_FILES.contains(&name))
1075 .unwrap_or(false)
1076}
1077
1078pub const FTS5_REGISTER_SQL: &str = "\
1085 CREATE VIRTUAL TABLE IF NOT EXISTS fts_messages USING fts5(\
1086 content, title, agent, workspace, source_path, \
1087 created_at UNINDEXED, \
1088 content='', tokenize='porter'\
1089 )";
1090
1091const FTS_FRANKEN_REBUILD_META_KEY: &str = "fts_frankensqlite_rebuild_generation";
1092const FTS_FRANKEN_REBUILD_FINGERPRINT_META_KEY: &str = "fts_frankensqlite_archive_fingerprint";
1093const FTS_FRANKEN_REBUILD_GENERATION: i64 = 1;
1094const DAILY_STATS_HEALTH_META_KEY: &str = "daily_stats_archive_fingerprint";
1095const DAILY_STATS_HEALTH_GENERATION_META_KEY: &str = "daily_stats_health_generation";
1096const DAILY_STATS_HEALTH_GENERATION: i64 = 1;
1097
1098pub const FTS5_DELETE_ALL_SQL: &str =
1102 "INSERT INTO fts_messages(fts_messages) VALUES('delete-all');";
1103
1104#[cfg(test)]
1105pub(crate) fn materialize_fresh_fts_schema_via_rusqlite(db_path: &Path) -> Result<()> {
1106 let storage = FrankenStorage::open(db_path).with_context(|| {
1112 format!(
1113 "opening frankensqlite db at {} for FTS materialization",
1114 db_path.display()
1115 )
1116 })?;
1117 storage.rebuild_fts_via_frankensqlite().map(|_| ())
1118}
1119
1120#[cfg(test)]
1121pub(crate) fn rebuild_fts_via_rusqlite(db_path: &Path) -> Result<usize> {
1122 let storage = FrankenStorage::open(db_path).with_context(|| {
1123 format!(
1124 "opening frankensqlite db at {} for FTS rebuild",
1125 db_path.display()
1126 )
1127 })?;
1128 let inserted = storage.rebuild_fts_via_frankensqlite()?;
1129 storage.record_fts_franken_rebuild_generation()?;
1130 Ok(inserted)
1131}
1132
1133pub(crate) fn ensure_fts_consistency_via_rusqlite(db_path: &Path) -> Result<FtsConsistencyRepair> {
1134 let storage = FrankenStorage::open(db_path).with_context(|| {
1138 format!(
1139 "opening frankensqlite db at {} for FTS consistency check",
1140 db_path.display()
1141 )
1142 })?;
1143 storage.ensure_search_fallback_fts_consistency()
1144}
1145
1146pub fn create_backup(db_path: &Path) -> Result<Option<std::path::PathBuf>, MigrationError> {
1150 if !bundle_path_exists(db_path)? {
1151 return Ok(None);
1152 }
1153
1154 if !copyable_bundle_file_exists(db_path)? {
1155 return Ok(None);
1156 }
1157 let _ = copyable_bundle_sidecar_sources(db_path)?;
1158
1159 let backup_path = unique_backup_path(db_path);
1160 let vacuum_stage_path = vacuum_stage_backup_path(&backup_path);
1161
1162 match vacuum_into_backup_stage(db_path, &vacuum_stage_path) {
1165 Ok(()) => {
1166 fs::rename(&vacuum_stage_path, &backup_path)?;
1167 }
1168 Err(err) if backup_vacuum_error_requires_consistent_retry(&err) => {
1169 tracing::warn!(
1170 db_path = %db_path.display(),
1171 error = %err,
1172 "create_backup: VACUUM INTO hit transient contention; refusing raw WAL bundle copy"
1173 );
1174 return Err(MigrationError::Database(err));
1175 }
1176 Err(err) => {
1177 tracing::warn!(
1178 db_path = %db_path.display(),
1179 error = %err,
1180 "create_backup: VACUUM INTO failed; falling back to raw evidence copy"
1181 );
1182 }
1183 }
1184
1185 if backup_path.exists() {
1186 sync_file_if_exists(&backup_path)?;
1187 if let Some(parent) = backup_path.parent() {
1188 sync_parent_directory(parent)?;
1189 }
1190 return Ok(Some(backup_path));
1191 }
1192
1193 copy_database_bundle(db_path, &backup_path)?;
1198
1199 Ok(Some(backup_path))
1200}
1201
1202fn vacuum_into_backup_stage(
1203 db_path: &Path,
1204 stage_path: &Path,
1205) -> std::result::Result<(), frankensqlite::FrankenError> {
1206 let mut conn = open_franken_with_flags(
1207 &db_path.to_string_lossy(),
1208 FrankenOpenFlags::SQLITE_OPEN_READ_ONLY,
1209 )?;
1210 let result = (|| {
1211 conn.execute(BACKUP_VACUUM_BUSY_TIMEOUT_PRAGMA)?;
1212 let path_str = stage_path.to_string_lossy();
1213 conn.execute_compat("VACUUM INTO ?", fparams![path_str.as_ref()])?;
1214 Ok(())
1215 })();
1216 if let Err(close_err) = conn.close_in_place() {
1217 tracing::warn!(
1218 error = %close_err,
1219 db_path = %db_path.display(),
1220 "create_backup: close_in_place failed after VACUUM INTO; falling back to best-effort close"
1221 );
1222 conn.close_best_effort_in_place();
1223 }
1224 result
1225}
1226
1227fn backup_vacuum_error_requires_consistent_retry(err: &frankensqlite::FrankenError) -> bool {
1228 retryable_franken_error(err)
1229}
1230
1231#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1232pub struct DatabaseBundleMoveResult {
1233 pub database: bool,
1234 pub wal: bool,
1235 pub shm: bool,
1236}
1237
1238impl DatabaseBundleMoveResult {
1239 pub fn moved_any(&self) -> bool {
1240 self.database || self.wal || self.shm
1241 }
1242}
1243
1244fn database_sidecar_path(path: &Path, suffix: &str) -> PathBuf {
1245 PathBuf::from(format!("{}{}", path.to_string_lossy(), suffix))
1246}
1247
1248pub(crate) fn move_database_bundle(
1255 source_root: &Path,
1256 destination_root: &Path,
1257) -> std::io::Result<DatabaseBundleMoveResult> {
1258 let mut moved = DatabaseBundleMoveResult::default();
1259 if let Some(parent) = destination_root.parent() {
1260 fs::create_dir_all(parent)?;
1261 sync_parent_directory(parent)?;
1262 }
1263
1264 if bundle_path_exists(source_root)? {
1265 fs::rename(source_root, destination_root)?;
1266 moved.database = true;
1267 }
1268
1269 let wal_source = database_sidecar_path(source_root, "-wal");
1270 if bundle_path_exists(&wal_source)? {
1271 fs::rename(&wal_source, database_sidecar_path(destination_root, "-wal"))?;
1272 moved.wal = true;
1273 }
1274
1275 let shm_source = database_sidecar_path(source_root, "-shm");
1276 if bundle_path_exists(&shm_source)? {
1277 fs::rename(&shm_source, database_sidecar_path(destination_root, "-shm"))?;
1278 moved.shm = true;
1279 }
1280
1281 if moved.moved_any() {
1282 if let Some(parent) = source_root.parent() {
1283 sync_parent_directory(parent)?;
1284 }
1285 if let Some(parent) = destination_root.parent() {
1286 sync_parent_directory(parent)?;
1287 }
1288 }
1289
1290 Ok(moved)
1291}
1292
1293fn bundle_path_exists(path: &Path) -> std::io::Result<bool> {
1294 match fs::symlink_metadata(path) {
1295 Ok(_) => Ok(true),
1296 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
1297 Err(err) => Err(err),
1298 }
1299}
1300
1301fn copy_database_bundle(source_root: &Path, destination_root: &Path) -> Result<()> {
1302 if let Some(parent) = destination_root.parent() {
1303 fs::create_dir_all(parent).with_context(|| {
1304 format!(
1305 "creating destination directory for database bundle copy: {}",
1306 parent.display()
1307 )
1308 })?;
1309 sync_parent_directory(parent)
1310 .with_context(|| format!("syncing destination directory {}", parent.display()))?;
1311 }
1312
1313 if !copyable_bundle_file_exists(source_root)? {
1314 bail!(
1315 "database bundle root is missing before copy: {}",
1316 source_root.display()
1317 );
1318 }
1319
1320 let sidecars = copyable_bundle_sidecar_sources(source_root)?;
1321
1322 fs::copy(source_root, destination_root).with_context(|| {
1323 format!(
1324 "copying database bundle {} -> {}",
1325 source_root.display(),
1326 destination_root.display()
1327 )
1328 })?;
1329 sync_file_if_exists(destination_root).with_context(|| {
1330 format!(
1331 "syncing copied database bundle {}",
1332 destination_root.display()
1333 )
1334 })?;
1335
1336 for (source_sidecar, suffix) in sidecars {
1337 let destination_sidecar = database_sidecar_path(destination_root, suffix);
1338 fs::copy(&source_sidecar, &destination_sidecar).with_context(|| {
1339 format!(
1340 "copying database bundle sidecar {} -> {}",
1341 source_sidecar.display(),
1342 destination_sidecar.display()
1343 )
1344 })?;
1345 sync_file_if_exists(&destination_sidecar).with_context(|| {
1346 format!(
1347 "syncing copied database bundle sidecar {}",
1348 destination_sidecar.display()
1349 )
1350 })?;
1351 }
1352
1353 if let Some(parent) = destination_root.parent() {
1354 sync_parent_directory(parent)
1355 .with_context(|| format!("syncing destination directory {}", parent.display()))?;
1356 }
1357
1358 Ok(())
1359}
1360
1361fn copyable_bundle_sidecar_sources(source_root: &Path) -> Result<Vec<(PathBuf, &'static str)>> {
1362 let mut sidecars = Vec::new();
1363 for suffix in ["-wal", "-shm"] {
1364 let source_sidecar = database_sidecar_path(source_root, suffix);
1365 if copyable_bundle_file_exists(&source_sidecar)? {
1366 sidecars.push((source_sidecar, suffix));
1367 }
1368 }
1369 Ok(sidecars)
1370}
1371
1372fn copyable_bundle_file_exists(path: &Path) -> Result<bool> {
1373 match fs::symlink_metadata(path) {
1374 Ok(metadata) => {
1375 let file_type = metadata.file_type();
1376 if file_type.is_symlink() {
1377 bail!(
1378 "refusing to copy database bundle symlink: {}",
1379 path.display()
1380 );
1381 }
1382 if !file_type.is_file() {
1383 bail!(
1384 "refusing to copy non-file database bundle path: {}",
1385 path.display()
1386 );
1387 }
1388 Ok(true)
1389 }
1390 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
1391 Err(err) => Err(err).with_context(|| {
1392 format!(
1393 "checking database bundle path before copy: {}",
1394 path.display()
1395 )
1396 }),
1397 }
1398}
1399
1400pub(crate) fn remove_database_files(path: &Path) -> std::io::Result<()> {
1402 let mut removed_any = false;
1403
1404 match fs::remove_file(path) {
1405 Ok(()) => removed_any = true,
1406 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
1407 Err(err) => return Err(err),
1408 }
1409
1410 for suffix in ["-wal", "-shm"] {
1412 match fs::remove_file(database_sidecar_path(path, suffix)) {
1413 Ok(()) => removed_any = true,
1414 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
1415 Err(err) => return Err(err),
1416 }
1417 }
1418
1419 if removed_any && let Some(parent) = path.parent() {
1420 sync_parent_directory(parent)?;
1421 }
1422
1423 Ok(())
1424}
1425
1426#[cfg(not(windows))]
1427fn sync_parent_directory(path: &Path) -> std::io::Result<()> {
1428 fs::File::open(path)?.sync_all()
1429}
1430
1431#[cfg(windows)]
1432fn sync_parent_directory(_path: &Path) -> std::io::Result<()> {
1433 Ok(())
1434}
1435
1436fn sync_file_if_exists(path: &Path) -> std::io::Result<()> {
1437 if path.exists() {
1438 fs::File::open(path)?.sync_all()?;
1439 }
1440 Ok(())
1441}
1442
1443pub fn cleanup_old_backups(db_path: &Path, keep_count: usize) -> Result<(), std::io::Error> {
1445 let parent = match db_path.parent() {
1446 Some(p) => p,
1447 None => return Ok(()),
1448 };
1449
1450 let db_name = db_path.file_name().and_then(|n| n.to_str()).unwrap_or("db");
1451
1452 let prefix = format!("{}.backup.", db_name);
1453
1454 let mut backups: Vec<(std::path::PathBuf, SystemTime)> = Vec::new();
1456
1457 if let Ok(entries) = fs::read_dir(parent) {
1458 for entry in entries.flatten() {
1459 let path = entry.path();
1460 if let Some(name) = path.file_name().and_then(|n| n.to_str())
1461 && is_backup_root_name(name, &prefix)
1462 && let Ok(meta) = fs::metadata(&path)
1463 && meta.is_file()
1464 && let Ok(mtime) = meta.modified()
1465 {
1466 backups.push((path, mtime));
1467 }
1468 }
1469 }
1470
1471 backups.sort_by_key(|entry| std::cmp::Reverse(entry.1));
1473
1474 for (path, _) in backups.into_iter().skip(keep_count) {
1476 let _ = fs::remove_file(&path);
1477
1478 let _ = fs::remove_file(database_sidecar_path(&path, "-wal"));
1480 let _ = fs::remove_file(database_sidecar_path(&path, "-shm"));
1481 }
1482
1483 Ok(())
1484}
1485
1486#[derive(Debug, Clone)]
1487pub(crate) struct HistoricalDatabaseBundle {
1488 root_path: PathBuf,
1489 total_bytes: u64,
1490 modified_at_ms: i64,
1491 supports_direct_readonly: bool,
1492 probe: HistoricalBundleProbe,
1493}
1494
1495#[derive(Debug, Clone, Copy, Default)]
1496struct HistoricalBundleProbe {
1497 schema_version: Option<i64>,
1498 fts_schema_rows: Option<i64>,
1499 fts_queryable: bool,
1500 max_message_id: i64,
1501}
1502
1503#[cfg(test)]
1504#[allow(dead_code)]
1505#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1506pub(crate) struct SqliteDatabaseHealthProbe {
1507 pub schema_version: Option<i64>,
1508 pub quick_check_ok: bool,
1509 pub fts_schema_rows: i64,
1510 pub fts_queryable: bool,
1511 pub message_count: i64,
1512 pub max_message_id: i64,
1513}
1514
1515#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1516pub(crate) enum FtsConsistencyRepair {
1517 AlreadyHealthy {
1518 rows: usize,
1519 },
1520 IncrementalCatchUp {
1521 inserted_rows: usize,
1522 total_rows: usize,
1523 },
1524 Rebuilt {
1525 inserted_rows: usize,
1526 },
1527}
1528
1529#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
1530pub struct HistoricalSalvageOutcome {
1531 pub bundles_considered: usize,
1532 pub bundles_imported: usize,
1533 pub conversations_imported: usize,
1534 pub messages_imported: usize,
1535}
1536
1537impl HistoricalSalvageOutcome {
1538 pub(crate) fn accumulate(&mut self, other: Self) {
1539 self.bundles_considered += other.bundles_considered;
1540 self.bundles_imported += other.bundles_imported;
1541 self.conversations_imported += other.conversations_imported;
1542 self.messages_imported += other.messages_imported;
1543 }
1544}
1545
1546#[derive(Debug)]
1547struct HistoricalReadConnection {
1548 conn: FrankenConnection,
1549 method: &'static str,
1550 root_path: PathBuf,
1551 _tempdir: Option<tempfile::TempDir>,
1552}
1553
1554const HISTORICAL_RECOVERY_CORE_SCHEMA: &str = r"
1555CREATE TABLE sources (
1556 id TEXT PRIMARY KEY,
1557 kind TEXT,
1558 host_label TEXT,
1559 machine_id TEXT,
1560 platform TEXT,
1561 config_json TEXT,
1562 created_at INTEGER,
1563 updated_at INTEGER
1564);
1565CREATE TABLE agents (
1566 id INTEGER PRIMARY KEY,
1567 slug TEXT,
1568 name TEXT,
1569 version TEXT,
1570 kind TEXT,
1571 created_at INTEGER,
1572 updated_at INTEGER
1573);
1574CREATE TABLE workspaces (
1575 id INTEGER PRIMARY KEY,
1576 path TEXT,
1577 display_name TEXT
1578);
1579CREATE TABLE conversations (
1580 id INTEGER PRIMARY KEY,
1581 agent_id INTEGER,
1582 workspace_id INTEGER,
1583 source_id TEXT,
1584 external_id TEXT,
1585 title TEXT,
1586 source_path TEXT,
1587 started_at INTEGER,
1588 ended_at INTEGER,
1589 approx_tokens INTEGER,
1590 metadata_json TEXT,
1591 origin_host TEXT,
1592 metadata_bin BLOB,
1593 total_input_tokens INTEGER,
1594 total_output_tokens INTEGER,
1595 total_cache_read_tokens INTEGER,
1596 total_cache_creation_tokens INTEGER,
1597 grand_total_tokens INTEGER,
1598 estimated_cost_usd REAL,
1599 primary_model TEXT,
1600 api_call_count INTEGER,
1601 tool_call_count INTEGER,
1602 user_message_count INTEGER,
1603 assistant_message_count INTEGER,
1604 last_message_idx INTEGER,
1605 last_message_created_at INTEGER
1606);
1607CREATE TABLE messages (
1608 id INTEGER PRIMARY KEY,
1609 conversation_id INTEGER,
1610 idx INTEGER,
1611 role TEXT,
1612 author TEXT,
1613 created_at INTEGER,
1614 content TEXT,
1615 extra_json TEXT,
1616 extra_bin BLOB
1617);
1618CREATE TABLE snippets (
1619 id INTEGER PRIMARY KEY,
1620 message_id INTEGER,
1621 file_path TEXT,
1622 start_line INTEGER,
1623 end_line INTEGER,
1624 language TEXT,
1625 snippet_text TEXT
1626);
1627";
1628const HISTORICAL_SALVAGE_LEDGER_VERSION: u32 = 2;
1629const HISTORICAL_SALVAGE_PROGRESS_VERSION: u32 = 1;
1630const SOURCE_PATH_MERGE_START_TOLERANCE_MS: i64 = 5 * 60 * 1000;
1631
1632#[derive(Debug, Clone, Serialize, Deserialize)]
1633struct HistoricalBundleProgress {
1634 progress_version: u32,
1635 path: String,
1636 bytes: u64,
1637 modified_at_ms: i64,
1638 method: String,
1639 last_completed_source_row_id: i64,
1640 conversations_imported: usize,
1641 messages_imported: usize,
1642 updated_at_ms: i64,
1643}
1644
1645#[derive(Debug, Clone)]
1646struct HistoricalBatchEntry {
1647 source_row_id: i64,
1648 agent_id: i64,
1649 workspace_id: Option<i64>,
1650 conversation: Conversation,
1651}
1652
1653#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1654struct HistoricalBatchImportTotals {
1655 inserted_source_rows: usize,
1656 inserted_messages: usize,
1657}
1658
1659fn historical_bundle_root_paths(db_path: &Path) -> Vec<PathBuf> {
1660 let mut roots = Vec::new();
1661 let Some(parent) = db_path.parent() else {
1662 return roots;
1663 };
1664 let db_name = db_path
1665 .file_name()
1666 .and_then(|n| n.to_str())
1667 .unwrap_or("agent_search.db");
1668 let db_stem = db_path
1669 .file_stem()
1670 .and_then(|n| n.to_str())
1671 .unwrap_or("agent_search");
1672
1673 let mut push_root = |path: PathBuf| {
1674 if path == db_path {
1675 return;
1676 }
1677 if !roots.iter().any(|existing| existing == &path) {
1678 roots.push(path);
1679 }
1680 };
1681
1682 if let Ok(entries) = fs::read_dir(parent) {
1683 for entry in entries.flatten() {
1684 let path = entry.path();
1685 let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
1686 continue;
1687 };
1688 if has_db_sidecar_suffix(name) {
1689 continue;
1690 }
1691 if name.starts_with(&format!("{db_name}.backup."))
1692 || name.starts_with(&format!("{db_stem}.corrupt."))
1693 {
1694 push_root(path);
1695 }
1696 }
1697 }
1698
1699 let backups_dir = parent.join("backups");
1700 if let Ok(entries) = fs::read_dir(backups_dir) {
1701 for entry in entries.flatten() {
1702 let path = entry.path();
1703 let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
1704 continue;
1705 };
1706 if has_db_sidecar_suffix(name) {
1707 continue;
1708 }
1709 if name.starts_with(&format!("{db_name}.")) && name.ends_with(".bak") {
1710 push_root(path);
1711 }
1712 }
1713 }
1714
1715 push_named_database_children(&mut roots, db_path, &parent.join("repair-lab"), db_name);
1716 push_named_database_children(&mut roots, db_path, &parent.join("snapshots"), db_name);
1717
1718 roots
1719}
1720
1721fn push_named_database_children(
1722 roots: &mut Vec<PathBuf>,
1723 canonical_db_path: &Path,
1724 dir: &Path,
1725 db_name: &str,
1726) {
1727 if let Ok(entries) = fs::read_dir(dir) {
1728 for entry in entries.flatten() {
1729 let candidate = entry.path().join(db_name);
1730 if candidate == canonical_db_path {
1731 continue;
1732 }
1733 if candidate.exists() && !roots.iter().any(|existing| existing == &candidate) {
1734 roots.push(candidate);
1735 }
1736 }
1737 }
1738}
1739
1740fn file_mtime_ms(path: &Path) -> i64 {
1741 fs::metadata(path)
1742 .and_then(|meta| meta.modified())
1743 .ok()
1744 .and_then(|ts| ts.duration_since(UNIX_EPOCH).ok())
1745 .map(|d| d.as_millis() as i64)
1746 .unwrap_or(0)
1747}
1748
1749fn bundle_total_bytes(root_path: &Path) -> u64 {
1750 let mut total = fs::metadata(root_path).map(|meta| meta.len()).unwrap_or(0);
1751 for suffix in ["-wal", "-shm"] {
1752 let sidecar = database_sidecar_path(root_path, suffix);
1753 total = total.saturating_add(fs::metadata(sidecar).map(|meta| meta.len()).unwrap_or(0));
1754 }
1755 total
1756}
1757
1758pub(crate) fn discover_historical_database_bundles(
1759 db_path: &Path,
1760) -> Vec<HistoricalDatabaseBundle> {
1761 let mut bundles: Vec<_> = historical_bundle_root_paths(db_path)
1762 .into_iter()
1763 .filter(|root| root.exists())
1764 .map(|root_path| {
1765 let modified_at_ms = file_mtime_ms(&root_path);
1766 let total_bytes = bundle_total_bytes(&root_path);
1767 let supports_direct_readonly = historical_bundle_supports_direct_readonly(&root_path);
1768 let probe = probe_historical_bundle(&root_path);
1769 HistoricalDatabaseBundle {
1770 modified_at_ms,
1771 total_bytes,
1772 supports_direct_readonly,
1773 root_path,
1774 probe,
1775 }
1776 })
1777 .filter(|bundle| bundle.total_bytes > 0)
1778 .collect();
1779
1780 fn bundle_priority(path: &Path) -> i32 {
1781 let path_str = path.to_string_lossy();
1782 if path_str.contains("/repair-lab/replay-") {
1783 return 5;
1784 }
1785 if path_str.contains("/repair-lab/") {
1786 return 4;
1787 }
1788 if path_str.contains("/snapshots/") {
1789 return 3;
1790 }
1791 if path_str.contains(".corrupt.") || path_str.contains("failed-baseline-seed") {
1792 return 0;
1793 }
1794 1
1795 }
1796
1797 fn bundle_health_rank(bundle: &HistoricalDatabaseBundle) -> i32 {
1798 let fts_clean = match bundle.probe.fts_schema_rows {
1821 Some(1) => bundle.probe.fts_queryable,
1822 Some(0) => bundle.probe.schema_version == Some(CURRENT_SCHEMA_VERSION),
1823 _ => false,
1824 };
1825
1826 let clean_schema14_fts =
1827 bundle.probe.schema_version == Some(CURRENT_SCHEMA_VERSION) && fts_clean;
1828 if clean_schema14_fts {
1829 return 5;
1830 }
1831
1832 if fts_clean {
1833 return 4;
1834 }
1835
1836 if bundle.probe.schema_version == Some(CURRENT_SCHEMA_VERSION)
1837 && bundle.supports_direct_readonly
1838 {
1839 return 3;
1840 }
1841
1842 if bundle.supports_direct_readonly {
1843 return 2;
1844 }
1845
1846 1
1847 }
1848
1849 bundles.sort_by(|left, right| {
1850 bundle_health_rank(right)
1851 .cmp(&bundle_health_rank(left))
1852 .then_with(|| right.probe.max_message_id.cmp(&left.probe.max_message_id))
1853 .then_with(|| bundle_priority(&right.root_path).cmp(&bundle_priority(&left.root_path)))
1854 .then_with(|| {
1855 right
1856 .supports_direct_readonly
1857 .cmp(&left.supports_direct_readonly)
1858 })
1859 .then_with(|| right.total_bytes.cmp(&left.total_bytes))
1860 .then_with(|| right.modified_at_ms.cmp(&left.modified_at_ms))
1861 .then_with(|| right.root_path.cmp(&left.root_path))
1862 });
1863 bundles
1864}
1865
1866fn probe_historical_bundle(root_path: &Path) -> HistoricalBundleProbe {
1867 let Ok(conn) = open_historical_bundle_readonly(root_path) else {
1868 return probe_historical_bundle_via_sqlite3_metadata(root_path).unwrap_or_default();
1869 };
1870
1871 let schema_version = read_meta_schema_version(&conn).ok().flatten();
1872 let fts_schema_rows: Option<i64> = conn
1873 .query_row_map(
1874 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
1875 fparams![],
1876 |row| row.get_typed(0),
1877 )
1878 .ok();
1879 let fts_queryable =
1880 historical_bundle_fts_queryable_via_frankensqlite(root_path, fts_schema_rows);
1881 let max_message_id: i64 = conn
1882 .query_row_map(
1883 "SELECT COALESCE(MAX(id), 0) FROM messages",
1884 fparams![],
1885 |row| row.get_typed(0),
1886 )
1887 .unwrap_or(0);
1888
1889 let probe = HistoricalBundleProbe {
1890 schema_version,
1891 fts_schema_rows,
1892 fts_queryable,
1893 max_message_id,
1894 };
1895
1896 if probe.schema_version.is_none()
1897 && probe.fts_schema_rows.is_none()
1898 && probe.max_message_id == 0
1899 {
1900 return probe_historical_bundle_via_sqlite3_metadata(root_path).unwrap_or(probe);
1901 }
1902
1903 probe
1904}
1905
1906fn probe_historical_bundle_via_sqlite3_metadata(root_path: &Path) -> Option<HistoricalBundleProbe> {
1907 let bundle_uri = format!("file:{}?immutable=1", root_path.to_string_lossy());
1908 let output = Command::new("sqlite3")
1909 .arg("-batch")
1910 .arg("-noheader")
1911 .arg(&bundle_uri)
1912 .arg(
1913 "PRAGMA writable_schema=ON;
1914 SELECT COALESCE((SELECT value FROM meta WHERE key = 'schema_version'), '');
1915 SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages';
1916 SELECT COALESCE(MAX(id), 0) FROM messages;",
1917 )
1918 .output()
1919 .ok()?;
1920 if !output.status.success() {
1921 return None;
1922 }
1923
1924 let stdout = String::from_utf8(output.stdout).ok()?;
1925 let mut lines = stdout.lines();
1926 let schema_version = lines.next().and_then(|raw| raw.trim().parse::<i64>().ok());
1927 let fts_schema_rows = lines.next().and_then(|raw| raw.trim().parse::<i64>().ok());
1928 let max_message_id = lines
1929 .next()
1930 .and_then(|raw| raw.trim().parse::<i64>().ok())
1931 .unwrap_or(0);
1932
1933 Some(HistoricalBundleProbe {
1934 schema_version,
1935 fts_schema_rows,
1936 fts_queryable: false,
1937 max_message_id,
1938 })
1939}
1940
1941fn historical_bundle_fts_queryable_via_frankensqlite(
1942 root_path: &Path,
1943 fts_schema_rows: Option<i64>,
1944) -> bool {
1945 matches!(fts_schema_rows, Some(1))
1946 && FrankenStorage::open_readonly(root_path)
1947 .map(|storage| {
1948 storage
1949 .raw()
1950 .query("SELECT COUNT(*) FROM fts_messages")
1951 .is_ok()
1952 })
1953 .unwrap_or(false)
1954}
1955
1956fn historical_bundle_supports_direct_readonly(root_path: &Path) -> bool {
1957 open_historical_bundle_readonly(root_path)
1958 .and_then(|conn| historical_bundle_has_queryable_core_tables(&conn))
1959 .is_ok()
1960}
1961
1962fn historical_table_exists(conn: &FrankenConnection, table: &str) -> Result<bool> {
1963 let found: Option<i64> = conn
1964 .query_row_map(
1965 "SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = ?1 LIMIT 1",
1966 fparams![table],
1967 |row| row.get_typed(0),
1968 )
1969 .optional()
1970 .with_context(|| format!("checking for historical table {table}"))?;
1971 Ok(found.is_some())
1972}
1973
1974fn probe_historical_table_reads(conn: &FrankenConnection, table: &str) -> Result<()> {
1975 if !historical_table_exists(conn, table)? {
1976 return Err(anyhow!(
1977 "historical database missing required table {table}"
1978 ));
1979 }
1980
1981 let sql = format!("SELECT rowid FROM {table} LIMIT 1");
1982 let _: Option<i64> = conn
1983 .query_row_map(&sql, fparams![], |row| row.get_typed(0))
1984 .optional()
1985 .with_context(|| format!("probing rows from historical table {table}"))?;
1986 Ok(())
1987}
1988
1989fn historical_bundle_has_queryable_core_tables(conn: &FrankenConnection) -> Result<()> {
1990 probe_historical_table_reads(conn, "conversations")?;
1991 probe_historical_table_reads(conn, "messages")?;
1992 Ok(())
1993}
1994
1995fn open_historical_bundle_readonly(root_path: &Path) -> Result<FrankenConnection> {
1996 let path_str = root_path.to_string_lossy();
1997 let flags = FrankenOpenFlags::SQLITE_OPEN_READ_ONLY;
1998 let conn = open_franken_with_flags(&path_str, flags)
1999 .with_context(|| format!("opening historical database {}", root_path.display()))?;
2000 Ok(conn)
2001}
2002
2003fn is_recoverable_insert_line(line: &str) -> bool {
2004 [
2005 "sources",
2006 "agents",
2007 "workspaces",
2008 "conversations",
2009 "messages",
2010 "snippets",
2011 ]
2012 .iter()
2013 .any(|table| {
2014 line.starts_with(&format!("INSERT INTO '{table}'"))
2015 || line.starts_with(&format!("INSERT OR IGNORE INTO '{table}'"))
2016 || line.starts_with(&format!("INSERT INTO \"{table}\""))
2017 || line.starts_with(&format!("INSERT OR IGNORE INTO \"{table}\""))
2018 })
2019}
2020
2021fn recover_historical_bundle_via_sqlite3(
2022 bundle: &HistoricalDatabaseBundle,
2023) -> Result<HistoricalReadConnection> {
2024 let tempdir = tempfile::TempDir::new().context("creating temporary salvage directory")?;
2025 let recovered_db = tempdir.path().join("historical-recovered.db");
2026 let temp_conn = FrankenConnection::open(recovered_db.to_string_lossy().as_ref())
2027 .with_context(|| format!("creating recovered database {}", recovered_db.display()))?;
2028 temp_conn
2029 .execute_batch(HISTORICAL_RECOVERY_CORE_SCHEMA)
2030 .with_context(|| format!("initializing recovered schema {}", recovered_db.display()))?;
2031 drop(temp_conn);
2032
2033 let bundle_uri = format!("file:{}?immutable=1", bundle.root_path.to_string_lossy());
2034 let mut recover = Command::new("sqlite3")
2035 .arg(&bundle_uri)
2036 .arg(".recover")
2037 .stdout(Stdio::piped())
2038 .spawn()
2039 .with_context(|| {
2040 format!(
2041 "launching sqlite3 .recover for historical bundle {}",
2042 bundle.root_path.display()
2043 )
2044 })?;
2045 let recover_stdout = recover
2046 .stdout
2047 .take()
2048 .context("capturing sqlite3 .recover stdout")?;
2049
2050 let mut importer = Command::new("sqlite3")
2051 .arg(&recovered_db)
2052 .stdin(Stdio::piped())
2053 .spawn()
2054 .with_context(|| {
2055 format!(
2056 "launching sqlite3 importer for recovered bundle {}",
2057 recovered_db.display()
2058 )
2059 })?;
2060
2061 {
2062 let importer_stdin = importer
2063 .stdin
2064 .as_mut()
2065 .context("opening sqlite3 importer stdin")?;
2066 importer_stdin
2067 .write_all(b"BEGIN;\n")
2068 .context("starting recovery import transaction")?;
2069
2070 let reader = BufReader::new(recover_stdout);
2071 for line in reader.lines() {
2072 let line = line.context("reading sqlite3 .recover output")?;
2073 if is_recoverable_insert_line(&line) {
2074 importer_stdin
2075 .write_all(line.as_bytes())
2076 .context("writing recovered INSERT")?;
2077 importer_stdin
2078 .write_all(b"\n")
2079 .context("writing recovered INSERT newline")?;
2080 }
2081 }
2082
2083 importer_stdin
2084 .write_all(b"COMMIT;\n")
2085 .context("committing recovery import transaction")?;
2086 }
2087
2088 let recover_status = recover
2089 .wait()
2090 .context("waiting for sqlite3 .recover process")?;
2091 if !recover_status.success() {
2092 anyhow::bail!(
2093 "sqlite3 .recover exited with status {} for {}",
2094 recover_status,
2095 bundle.root_path.display()
2096 );
2097 }
2098
2099 let importer_status = importer
2100 .wait()
2101 .context("waiting for sqlite3 recovery importer")?;
2102 if !importer_status.success() {
2103 anyhow::bail!(
2104 "sqlite3 recovery importer exited with status {} for {}",
2105 importer_status,
2106 recovered_db.display()
2107 );
2108 }
2109
2110 let conn = open_historical_bundle_readonly(&recovered_db)?;
2111 historical_bundle_has_queryable_core_tables(&conn)?;
2112 Ok(HistoricalReadConnection {
2113 conn,
2114 method: "sqlite3-recover",
2115 root_path: recovered_db,
2116 _tempdir: Some(tempdir),
2117 })
2118}
2119
2120fn open_historical_bundle_for_salvage(
2121 bundle: &HistoricalDatabaseBundle,
2122) -> Result<HistoricalReadConnection> {
2123 match open_historical_bundle_readonly(&bundle.root_path) {
2124 Ok(conn) => {
2125 if historical_bundle_has_queryable_core_tables(&conn).is_ok() {
2126 return Ok(HistoricalReadConnection {
2127 conn,
2128 method: "direct-readonly",
2129 root_path: bundle.root_path.clone(),
2130 _tempdir: None,
2131 });
2132 }
2133 }
2134 Err(err) => {
2135 tracing::warn!(
2136 path = %bundle.root_path.display(),
2137 error = %err,
2138 "historical bundle direct open failed; falling back to sqlite3 .recover"
2139 );
2140 }
2141 }
2142
2143 recover_historical_bundle_via_sqlite3(bundle)
2144}
2145
2146fn historical_bundle_counts(conn: &FrankenConnection) -> Result<(usize, usize)> {
2147 let conversations: i64 =
2148 conn.query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
2149 row.get_typed(0)
2150 })?;
2151 let messages: i64 = conn.query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
2152 row.get_typed(0)
2153 })?;
2154 Ok((
2155 usize::try_from(conversations.max(0)).unwrap_or(usize::MAX),
2156 usize::try_from(messages.max(0)).unwrap_or(usize::MAX),
2157 ))
2158}
2159
2160fn clear_seeded_runtime_meta(conn: &FrankenConnection) -> Result<()> {
2161 conn.execute(
2162 "DELETE FROM meta
2163 WHERE key LIKE 'historical_bundle_salvaged:%'
2164 OR key IN ('last_scan_ts', 'last_indexed_at', 'last_embedded_message_id')",
2165 )?;
2166 Ok(())
2167}
2168
2169fn record_historical_bundle_import(
2170 conn: &FrankenConnection,
2171 bundle: &HistoricalDatabaseBundle,
2172 method: &str,
2173 conversations_imported: usize,
2174 messages_imported: usize,
2175) -> Result<()> {
2176 let key = FrankenStorage::historical_bundle_meta_key(bundle);
2177 let value = serde_json::json!({
2178 "salvage_version": HISTORICAL_SALVAGE_LEDGER_VERSION,
2179 "path": bundle.root_path.display().to_string(),
2180 "bytes": bundle.total_bytes,
2181 "modified_at_ms": bundle.modified_at_ms,
2182 "method": method,
2183 "conversations_imported": conversations_imported,
2184 "messages_imported": messages_imported,
2185 "recorded_at_ms": FrankenStorage::now_millis(),
2186 });
2187 let value_str = serde_json::to_string(&value)?;
2188 conn.execute_compat(
2189 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
2190 fparams![key, value_str],
2191 )?;
2192 Ok(())
2193}
2194
2195fn finalize_seeded_canonical_bundle_via_rusqlite(
2196 canonical_db_path: &Path,
2197 bundle: &HistoricalDatabaseBundle,
2198 conversations_imported: usize,
2199 messages_imported: usize,
2200) -> Result<()> {
2201 let _fts_repair =
2202 ensure_fts_consistency_via_rusqlite(canonical_db_path).with_context(|| {
2203 format!(
2204 "repairing staged canonical FTS consistency before finalization: {}",
2205 canonical_db_path.display()
2206 )
2207 })?;
2208
2209 let path_str = canonical_db_path.to_string_lossy();
2210 let conn = FrankenConnection::open(path_str.as_ref()).with_context(|| {
2211 format!(
2212 "opening seeded canonical database for post-seed finalization: {}",
2213 canonical_db_path.display()
2214 )
2215 })?;
2216 conn.execute("PRAGMA busy_timeout = 30000;")
2217 .with_context(|| {
2218 format!(
2219 "configuring busy timeout for seeded canonical database {}",
2220 canonical_db_path.display()
2221 )
2222 })?;
2223 let schema_version = read_meta_schema_version(&conn)?;
2224
2225 if let Some(version) = schema_version
2226 && version < CURRENT_SCHEMA_VERSION
2227 && version != 13
2228 {
2229 anyhow::bail!(
2230 "seeded canonical bundle schema_version {version} is too old for baseline import and cannot be finalized automatically"
2231 );
2232 }
2233
2234 clear_seeded_runtime_meta(&conn)?;
2235
2236 conn.execute_compat(
2237 "INSERT OR REPLACE INTO meta(key, value) VALUES('schema_version', ?1)",
2238 fparams![CURRENT_SCHEMA_VERSION.to_string()],
2239 )?;
2240
2241 conn.execute_compat(
2242 "INSERT OR IGNORE INTO _schema_migrations(version, name) VALUES(?1, 'fts_contentless')",
2243 fparams![CURRENT_SCHEMA_VERSION],
2244 )?;
2245 record_historical_bundle_import(
2246 &conn,
2247 bundle,
2248 "baseline-bulk-sql-copy",
2249 conversations_imported,
2250 messages_imported,
2251 )?;
2252 Ok(())
2253}
2254
2255fn read_meta_schema_version(conn: &FrankenConnection) -> Result<Option<i64>> {
2256 let version: Option<String> = conn
2257 .query_row_map(
2258 "SELECT value FROM meta WHERE key = 'schema_version'",
2259 fparams![],
2260 |row| row.get_typed(0),
2261 )
2262 .optional()?;
2263 Ok(version.and_then(|raw| raw.parse::<i64>().ok()))
2264}
2265
2266#[cfg(test)]
2267fn franken_fts_schema_rows(conn: &FrankenConnection) -> Result<i64> {
2268 conn.query_row_map(
2269 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
2270 fparams![],
2271 |row| row.get_typed(0),
2272 )
2273 .context("counting sqlite_master rows for fts_messages via frankensqlite")
2274}
2275
2276#[cfg(test)]
2277fn franken_fts_limit_probe(conn: &FrankenConnection) -> bool {
2278 conn.query("SELECT COUNT(*) FROM fts_messages").is_ok()
2279}
2280
2281#[cfg(test)]
2282#[allow(dead_code)]
2283pub(crate) fn probe_database_health_via_frankensqlite(
2284 db_path: &Path,
2285) -> Result<SqliteDatabaseHealthProbe> {
2286 let path_str = db_path.to_string_lossy();
2287 let conn = FrankenConnection::open(path_str.as_ref()).with_context(|| {
2288 format!(
2289 "opening frankensqlite db at {} for database health probe",
2290 db_path.display()
2291 )
2292 })?;
2293 conn.execute_batch("PRAGMA busy_timeout = 30000;")
2294 .with_context(|| {
2295 format!(
2296 "configuring busy timeout for database health probe at {}",
2297 db_path.display()
2298 )
2299 })?;
2300
2301 let schema_version = read_meta_schema_version(&conn)?;
2302 let quick_check_status: String = conn
2303 .query_row_map("PRAGMA quick_check(1)", fparams![], |row| row.get_typed(0))
2304 .with_context(|| format!("running PRAGMA quick_check(1) for {}", db_path.display()))?;
2305 let quick_check_ok = quick_check_status.trim().eq_ignore_ascii_case("ok");
2306 let fts_schema_rows = franken_fts_schema_rows(&conn)?;
2307 let fts_queryable = fts_schema_rows == 1 && franken_fts_limit_probe(&conn);
2308
2309 if !quick_check_ok {
2310 return Ok(SqliteDatabaseHealthProbe {
2311 schema_version,
2312 quick_check_ok,
2313 fts_schema_rows,
2314 fts_queryable,
2315 message_count: 0,
2316 max_message_id: 0,
2317 });
2318 }
2319
2320 let message_count: i64 = conn
2321 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
2322 row.get_typed(0)
2323 })
2324 .context("counting messages during frankensqlite database health probe")?;
2325 let max_message_id: i64 = conn
2326 .query_row_map(
2327 "SELECT COALESCE(MAX(id), 0) FROM messages",
2328 fparams![],
2329 |row| row.get_typed(0),
2330 )
2331 .context("reading max message id during frankensqlite database health probe")?;
2332
2333 Ok(SqliteDatabaseHealthProbe {
2334 schema_version,
2335 quick_check_ok,
2336 fts_schema_rows,
2337 fts_queryable,
2338 message_count,
2339 max_message_id,
2340 })
2341}
2342
2343struct StagedHistoricalSeed {
2344 tempdir: tempfile::TempDir,
2345 db_path: PathBuf,
2346}
2347
2348fn stage_historical_bundle_for_seed(
2349 canonical_db_path: &Path,
2350 source_root_path: &Path,
2351) -> Result<StagedHistoricalSeed> {
2352 let canonical_parent = canonical_db_path.parent().unwrap_or_else(|| Path::new("."));
2353 fs::create_dir_all(canonical_parent).with_context(|| {
2354 format!(
2355 "creating canonical database directory before bulk historical seed import: {}",
2356 canonical_parent.display()
2357 )
2358 })?;
2359 let tempdir = tempfile::TempDir::new_in(canonical_parent)
2360 .context("creating temporary baseline seed directory")?;
2361 let staged_seed_db = tempdir.path().join("baseline-seed-output.db");
2362 copy_database_bundle(source_root_path, &staged_seed_db)?;
2363
2364 Ok(StagedHistoricalSeed {
2365 tempdir,
2366 db_path: staged_seed_db,
2367 })
2368}
2369
2370fn promote_staged_historical_seed(
2371 canonical_db_path: &Path,
2372 staged_seed: &StagedHistoricalSeed,
2373) -> Result<()> {
2374 let canonical_backup = staged_seed
2375 .tempdir
2376 .path()
2377 .join("pre-seed-canonical-backup.db");
2378 let had_canonical = canonical_db_path.exists()
2379 || database_sidecar_path(canonical_db_path, "-wal").exists()
2380 || database_sidecar_path(canonical_db_path, "-shm").exists();
2381
2382 if had_canonical {
2383 move_database_bundle(canonical_db_path, &canonical_backup).with_context(|| {
2384 format!(
2385 "backing up canonical database before promoting staged historical seed import: {}",
2386 canonical_db_path.display()
2387 )
2388 })?;
2389 }
2390
2391 if let Err(err) =
2392 move_database_bundle(&staged_seed.db_path, canonical_db_path).with_context(|| {
2393 format!(
2394 "promoting staged historical seed database bundle {} into canonical path {}",
2395 staged_seed.db_path.display(),
2396 canonical_db_path.display()
2397 )
2398 })
2399 {
2400 if had_canonical {
2401 let _ = move_database_bundle(&canonical_backup, canonical_db_path);
2402 }
2403 return Err(err);
2404 }
2405
2406 Ok(())
2407}
2408
2409pub(crate) fn seed_canonical_from_best_historical_bundle(
2410 canonical_db_path: &Path,
2411) -> Result<Option<HistoricalSalvageOutcome>> {
2412 let ordered_bundles = discover_historical_database_bundles(canonical_db_path);
2413 let mut last_seed_error: Option<anyhow::Error> = None;
2414 for bundle in ordered_bundles {
2415 if let Some(version) = bundle.probe.schema_version
2416 && version < 13
2417 {
2418 let err = anyhow!(
2419 "historical bundle {} schema_version {version} is too old for baseline import",
2420 bundle.root_path.display()
2421 );
2422 tracing::warn!(
2423 path = %bundle.root_path.display(),
2424 schema_version = version,
2425 "historical bundle is too old for baseline seed import"
2426 );
2427 last_seed_error = Some(err);
2428 continue;
2429 }
2430
2431 let source = open_historical_bundle_for_salvage(&bundle).with_context(|| {
2432 format!(
2433 "opening historical seed bundle {} for baseline import",
2434 bundle.root_path.display()
2435 )
2436 })?;
2437 let (conversations_imported, messages_imported) = historical_bundle_counts(&source.conn)?;
2438
2439 let staged_seed = match stage_historical_bundle_for_seed(
2440 canonical_db_path,
2441 &source.root_path,
2442 ) {
2443 Ok(staged_seed) => staged_seed,
2444 Err(err) => {
2445 tracing::warn!(
2446 path = %bundle.root_path.display(),
2447 error = %err,
2448 "bulk baseline seed staging from historical bundle failed; trying next candidate"
2449 );
2450 last_seed_error = Some(err);
2451 continue;
2452 }
2453 };
2454
2455 if let Err(err) = finalize_seeded_canonical_bundle_via_rusqlite(
2456 &staged_seed.db_path,
2457 &bundle,
2458 conversations_imported,
2459 messages_imported,
2460 ) {
2461 tracing::warn!(
2462 path = %bundle.root_path.display(),
2463 error = %err,
2464 "finalizing staged historical seed import failed; trying next candidate"
2465 );
2466 last_seed_error = Some(err);
2467 continue;
2468 }
2469
2470 if let Err(err) = promote_staged_historical_seed(canonical_db_path, &staged_seed) {
2471 tracing::warn!(
2472 path = %bundle.root_path.display(),
2473 error = %err,
2474 "promoting staged historical seed import failed; trying next candidate"
2475 );
2476 last_seed_error = Some(err);
2477 continue;
2478 }
2479
2480 tracing::info!(
2481 path = %bundle.root_path.display(),
2482 conversations_imported,
2483 messages_imported,
2484 "seeded empty canonical database from largest healthy historical bundle"
2485 );
2486
2487 return Ok(Some(HistoricalSalvageOutcome {
2488 bundles_considered: 0,
2489 bundles_imported: 1,
2490 conversations_imported,
2491 messages_imported,
2492 }));
2493 }
2494 if let Some(err) = last_seed_error {
2495 return Err(err);
2496 }
2497 Ok(None)
2498}
2499
2500fn parse_json_column(value: Option<String>) -> serde_json::Value {
2501 value
2502 .and_then(|raw| serde_json::from_str(&raw).ok())
2503 .unwrap_or(serde_json::Value::Null)
2504}
2505
2506const HISTORICAL_RAW_JSON_SENTINEL_KEY: &str = "__cass_historical_raw_json__";
2507
2508fn wrap_historical_raw_json(raw: String) -> serde_json::Value {
2509 serde_json::json!({ HISTORICAL_RAW_JSON_SENTINEL_KEY: raw })
2510}
2511
2512fn historical_raw_json(value: &serde_json::Value) -> Option<&str> {
2513 match value {
2514 serde_json::Value::Object(map) if map.len() == 1 => map
2515 .get(HISTORICAL_RAW_JSON_SENTINEL_KEY)
2516 .and_then(serde_json::Value::as_str),
2517 _ => None,
2518 }
2519}
2520
2521fn parse_historical_json_column(value: Option<String>) -> serde_json::Value {
2522 match value {
2523 Some(raw) if raw.trim().is_empty() => serde_json::Value::Null,
2524 Some(raw) => wrap_historical_raw_json(raw),
2525 None => serde_json::Value::Null,
2526 }
2527}
2528
2529fn historical_salvage_debug_enabled() -> bool {
2530 std::env::var_os("CASS_DEBUG_HISTORICAL_SALVAGE").is_some()
2531}
2532
2533#[derive(Debug, Clone, Copy)]
2534struct HistoricalImportBatchLimits {
2535 conversations: usize,
2536 messages: usize,
2537 payload_chars: usize,
2538}
2539
2540fn env_positive_usize(key: &str) -> Option<usize> {
2541 dotenvy::var(key)
2542 .ok()
2543 .and_then(|value| value.parse::<usize>().ok())
2544 .filter(|value| *value > 0)
2545}
2546
2547fn historical_import_batch_limits() -> HistoricalImportBatchLimits {
2548 let cpu_count = std::thread::available_parallelism()
2549 .map(std::num::NonZeroUsize::get)
2550 .unwrap_or(1);
2551
2552 let default_limits = if cpu_count >= 32 {
2553 HistoricalImportBatchLimits {
2554 conversations: 128,
2555 messages: 16_384,
2556 payload_chars: 12_000_000,
2557 }
2558 } else {
2559 HistoricalImportBatchLimits {
2560 conversations: 32,
2561 messages: 4_096,
2562 payload_chars: 3_000_000,
2563 }
2564 };
2565
2566 HistoricalImportBatchLimits {
2567 conversations: env_positive_usize("CASS_HISTORICAL_IMPORT_BATCH_CONVERSATIONS")
2568 .unwrap_or(default_limits.conversations),
2569 messages: env_positive_usize("CASS_HISTORICAL_IMPORT_BATCH_MESSAGES")
2570 .unwrap_or(default_limits.messages),
2571 payload_chars: env_positive_usize("CASS_HISTORICAL_IMPORT_BATCH_CHARS")
2572 .unwrap_or(default_limits.payload_chars),
2573 }
2574}
2575
2576fn json_value_size_hint(value: &serde_json::Value) -> usize {
2577 if let Some(raw) = historical_raw_json(value) {
2578 return raw.len();
2579 }
2580 match value {
2581 serde_json::Value::Null => 0,
2582 other => serde_json::to_string(other)
2583 .map(|raw| raw.len())
2584 .unwrap_or(0),
2585 }
2586}
2587
2588fn message_payload_size_hint(message: &Message) -> usize {
2589 message
2590 .content
2591 .len()
2592 .saturating_add(json_value_size_hint(&message.extra_json))
2593}
2594
2595fn is_backup_root_name(name: &str, prefix: &str) -> bool {
2596 name.starts_with(prefix) && !name.ends_with("-wal") && !name.ends_with("-shm")
2597}
2598
2599fn has_db_sidecar_suffix(name: &str) -> bool {
2606 const SIDECAR_SUFFIXES: &[&str] = &[
2607 "-wal",
2608 "-shm",
2609 "-lock-shared",
2610 "-lock-reserved",
2611 "-lock-pending",
2612 ];
2613 SIDECAR_SUFFIXES.iter().any(|suffix| name.ends_with(suffix))
2614}
2615
2616pub const CURRENT_SCHEMA_VERSION: i64 = 20;
2618const MIN_IN_PLACE_MIGRATION_SCHEMA_VERSION: i64 = 13;
2619
2620#[derive(Debug, Clone)]
2622pub enum SchemaCheck {
2623 Compatible,
2625 NeedsMigration,
2627 NeedsRebuild(String),
2629}
2630
2631fn schema_check_error_requires_rebuild(err: &frankensqlite::FrankenError) -> bool {
2632 matches!(
2636 err,
2637 frankensqlite::FrankenError::DatabaseCorrupt { .. }
2638 | frankensqlite::FrankenError::WalCorrupt { .. }
2639 | frankensqlite::FrankenError::NotADatabase { .. }
2640 | frankensqlite::FrankenError::ShortRead { .. }
2641 )
2642}
2643
2644fn unique_backup_path(path: &Path) -> PathBuf {
2645 static NEXT_NONCE: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0);
2646
2647 let timestamp = SystemTime::now()
2648 .duration_since(UNIX_EPOCH)
2649 .map(|d| d.as_nanos())
2650 .unwrap_or(0);
2651 let nonce = NEXT_NONCE.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
2652 let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("db");
2653
2654 path.with_file_name(format!(
2655 "{file_name}.backup.{}.{}.{}",
2656 std::process::id(),
2657 timestamp,
2658 nonce
2659 ))
2660}
2661
2662fn vacuum_stage_backup_path(backup_path: &Path) -> PathBuf {
2663 let file_name = backup_path
2664 .file_name()
2665 .and_then(|name| name.to_str())
2666 .unwrap_or("db.backup");
2667 backup_path.with_file_name(format!(".{file_name}.vacuum-in-progress"))
2668}
2669
2670fn check_schema_compatibility(
2674 path: &Path,
2675) -> std::result::Result<SchemaCheck, frankensqlite::FrankenError> {
2676 let mut conn = open_franken_with_flags(
2677 &path.to_string_lossy(),
2678 FrankenOpenFlags::SQLITE_OPEN_READ_ONLY,
2679 )?;
2680
2681 let result = (|| {
2682 let meta_exists: i32 = conn.query_row_map(
2684 "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='meta'",
2685 fparams![],
2686 |row| row.get_typed(0),
2687 )?;
2688
2689 if meta_exists == 0 {
2690 let table_count: i32 = conn.query_row_map(
2693 "SELECT COUNT(*) FROM sqlite_master WHERE type='table'",
2694 fparams![],
2695 |row| row.get_typed(0),
2696 )?;
2697
2698 if table_count == 0 {
2699 return Ok(SchemaCheck::NeedsMigration);
2701 }
2702
2703 return Ok(SchemaCheck::NeedsRebuild(
2705 "Database missing schema version metadata".to_string(),
2706 ));
2707 }
2708
2709 let version: Option<i64> = conn
2711 .query_row_map(
2712 "SELECT value FROM meta WHERE key = 'schema_version'",
2713 fparams![],
2714 |row| Ok(row.get_typed::<String>(0)?.parse().ok()),
2715 )
2716 .ok()
2717 .flatten();
2718
2719 match version {
2720 Some(v) if v == SCHEMA_VERSION => Ok(SchemaCheck::Compatible),
2721 Some(v) if (MIN_IN_PLACE_MIGRATION_SCHEMA_VERSION..SCHEMA_VERSION).contains(&v) => {
2722 Ok(SchemaCheck::NeedsMigration)
2723 }
2724 Some(v) if v > 0 && v < MIN_IN_PLACE_MIGRATION_SCHEMA_VERSION => {
2725 Ok(SchemaCheck::NeedsRebuild(format!(
2726 "Schema version {} is too old for in-place migration; supported upgrade path starts at version {}",
2727 v, MIN_IN_PLACE_MIGRATION_SCHEMA_VERSION
2728 )))
2729 }
2730 Some(v) => {
2731 Ok(SchemaCheck::NeedsRebuild(format!(
2733 "Schema version {} is newer than supported version {}",
2734 v, SCHEMA_VERSION
2735 )))
2736 }
2737 None => Ok(SchemaCheck::NeedsRebuild(
2738 "Schema version not found or invalid".to_string(),
2739 )),
2740 }
2741 })();
2742
2743 if let Err(close_err) = conn.close_in_place() {
2744 tracing::warn!(
2745 error = %close_err,
2746 db_path = %path.display(),
2747 "check_schema_compatibility: close_in_place failed; falling back to best-effort close"
2748 );
2749 conn.close_best_effort_in_place();
2750 }
2751
2752 result
2753}
2754
2755const SCHEMA_VERSION: i64 = CURRENT_SCHEMA_VERSION;
2756
2757#[cfg(test)]
2758const MIGRATION_V1: &str = r"
2759PRAGMA foreign_keys = ON;
2760
2761CREATE TABLE IF NOT EXISTS meta (
2762 key TEXT PRIMARY KEY,
2763 value TEXT NOT NULL
2764);
2765
2766CREATE TABLE IF NOT EXISTS agents (
2767 id INTEGER PRIMARY KEY,
2768 slug TEXT NOT NULL UNIQUE,
2769 name TEXT NOT NULL,
2770 version TEXT,
2771 kind TEXT NOT NULL,
2772 created_at INTEGER NOT NULL,
2773 updated_at INTEGER NOT NULL
2774);
2775
2776CREATE TABLE IF NOT EXISTS workspaces (
2777 id INTEGER PRIMARY KEY,
2778 path TEXT NOT NULL UNIQUE,
2779 display_name TEXT
2780);
2781
2782CREATE TABLE IF NOT EXISTS conversations (
2783 id INTEGER PRIMARY KEY,
2784 agent_id INTEGER NOT NULL REFERENCES agents(id),
2785 workspace_id INTEGER REFERENCES workspaces(id),
2786 external_id TEXT,
2787 title TEXT,
2788 source_path TEXT NOT NULL,
2789 started_at INTEGER,
2790 ended_at INTEGER,
2791 approx_tokens INTEGER,
2792 metadata_json TEXT,
2793 UNIQUE(agent_id, external_id)
2794);
2795
2796CREATE TABLE IF NOT EXISTS messages (
2797 id INTEGER PRIMARY KEY,
2798 conversation_id INTEGER NOT NULL REFERENCES conversations(id) ON DELETE CASCADE,
2799 idx INTEGER NOT NULL,
2800 role TEXT NOT NULL,
2801 author TEXT,
2802 created_at INTEGER,
2803 content TEXT NOT NULL,
2804 extra_json TEXT,
2805 UNIQUE(conversation_id, idx)
2806);
2807
2808CREATE TABLE IF NOT EXISTS snippets (
2809 id INTEGER PRIMARY KEY,
2810 message_id INTEGER NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
2811 file_path TEXT,
2812 start_line INTEGER,
2813 end_line INTEGER,
2814 language TEXT,
2815 snippet_text TEXT
2816);
2817
2818CREATE TABLE IF NOT EXISTS tags (
2819 id INTEGER PRIMARY KEY,
2820 name TEXT NOT NULL UNIQUE
2821);
2822
2823CREATE TABLE IF NOT EXISTS conversation_tags (
2824 conversation_id INTEGER NOT NULL REFERENCES conversations(id) ON DELETE CASCADE,
2825 tag_id INTEGER NOT NULL REFERENCES tags(id) ON DELETE CASCADE,
2826 PRIMARY KEY (conversation_id, tag_id)
2827);
2828
2829CREATE INDEX IF NOT EXISTS idx_conversations_agent_started
2830 ON conversations(agent_id, started_at DESC);
2831
2832CREATE INDEX IF NOT EXISTS idx_messages_conv_idx
2833 ON messages(conversation_id, idx);
2834
2835";
2836
2837#[cfg(test)]
2838const MIGRATION_V2: &str = r"
2839CREATE VIRTUAL TABLE IF NOT EXISTS fts_messages USING fts5(
2840 content,
2841 title,
2842 agent,
2843 workspace,
2844 source_path,
2845 created_at UNINDEXED,
2846 message_id UNINDEXED,
2847 tokenize='porter'
2848);
2849INSERT INTO fts_messages(content, title, agent, workspace, source_path, created_at, message_id)
2850SELECT
2851 m.content,
2852 c.title,
2853 a.slug,
2854 w.path,
2855 c.source_path,
2856 m.created_at,
2857 m.id
2858FROM messages m
2859JOIN conversations c ON m.conversation_id = c.id
2860JOIN agents a ON c.agent_id = a.id
2861LEFT JOIN workspaces w ON c.workspace_id = w.id;
2862";
2863
2864#[cfg(test)]
2865#[allow(dead_code)]
2866const MIGRATION_V3: &str = r"
2867DROP TABLE IF EXISTS fts_messages;
2868CREATE VIRTUAL TABLE fts_messages USING fts5(
2869 content,
2870 title,
2871 agent,
2872 workspace,
2873 source_path,
2874 created_at UNINDEXED,
2875 message_id UNINDEXED,
2876 tokenize='porter'
2877);
2878INSERT INTO fts_messages(content, title, agent, workspace, source_path, created_at, message_id)
2879SELECT
2880 m.content,
2881 c.title,
2882 a.slug,
2883 w.path,
2884 c.source_path,
2885 m.created_at,
2886 m.id
2887FROM messages m
2888JOIN conversations c ON m.conversation_id = c.id
2889JOIN agents a ON c.agent_id = a.id
2890LEFT JOIN workspaces w ON c.workspace_id = w.id;
2891";
2892
2893#[cfg(test)]
2894const MIGRATION_V4: &str = r"
2895-- Sources table for tracking where conversations come from
2896CREATE TABLE IF NOT EXISTS sources (
2897 id TEXT PRIMARY KEY, -- source_id (e.g., 'local', 'work-laptop')
2898 kind TEXT NOT NULL, -- 'local', 'ssh', etc.
2899 host_label TEXT, -- display label
2900 machine_id TEXT, -- optional stable machine id
2901 platform TEXT, -- 'macos', 'linux', 'windows'
2902 config_json TEXT, -- JSON blob for extra config (SSH params, path rewrites)
2903 created_at INTEGER NOT NULL,
2904 updated_at INTEGER NOT NULL
2905);
2906
2907-- Bootstrap: Insert the default 'local' source
2908INSERT OR IGNORE INTO sources (id, kind, host_label, created_at, updated_at)
2909VALUES ('local', 'local', NULL, strftime('%s','now')*1000, strftime('%s','now')*1000);
2910";
2911
2912#[cfg(test)]
2913const MIGRATION_V5: &str = r"
2914-- Add provenance columns to conversations table
2915-- SQLite cannot alter unique constraints, so we need to recreate the table
2916
2917-- Create new table with provenance columns and updated unique constraint
2918CREATE TABLE conversations_new (
2919 id INTEGER PRIMARY KEY,
2920 agent_id INTEGER NOT NULL REFERENCES agents(id),
2921 workspace_id INTEGER REFERENCES workspaces(id),
2922 source_id TEXT NOT NULL DEFAULT 'local' REFERENCES sources(id),
2923 external_id TEXT,
2924 title TEXT,
2925 source_path TEXT NOT NULL,
2926 started_at INTEGER,
2927 ended_at INTEGER,
2928 approx_tokens INTEGER,
2929 metadata_json TEXT,
2930 origin_host TEXT,
2931 UNIQUE(source_id, agent_id, external_id)
2932);
2933
2934-- Copy data from old table (all existing conversations get source_id='local')
2935INSERT INTO conversations_new (id, agent_id, workspace_id, source_id, external_id, title,
2936 source_path, started_at, ended_at, approx_tokens, metadata_json, origin_host)
2937SELECT id, agent_id, workspace_id, 'local', external_id, title,
2938 source_path, started_at, ended_at, approx_tokens, metadata_json, NULL
2939FROM conversations;
2940
2941-- Drop old table and rename new
2942DROP TABLE conversations;
2943ALTER TABLE conversations_new RENAME TO conversations;
2944
2945-- Recreate indexes
2946CREATE INDEX IF NOT EXISTS idx_conversations_agent_started ON conversations(agent_id, started_at DESC);
2947CREATE INDEX IF NOT EXISTS idx_conversations_source_id ON conversations(source_id);
2948";
2949
2950#[cfg(test)]
2951const MIGRATION_V6: &str = r"
2952-- Optimize lookup by source_path (used by TUI detail view)
2953CREATE INDEX IF NOT EXISTS idx_conversations_source_path ON conversations(source_path);
2954";
2955
2956#[cfg(test)]
2957const MIGRATION_V7: &str = r"
2958-- Add binary columns for MessagePack serialization (Opt 3.1)
2959-- Binary format is 50-70% smaller than JSON and faster to parse
2960ALTER TABLE conversations ADD COLUMN metadata_bin BLOB;
2961ALTER TABLE messages ADD COLUMN extra_bin BLOB;
2962";
2963
2964#[cfg(test)]
2965const MIGRATION_V8: &str = r"
2966-- Opt 3.2: Daily stats materialized table for O(1) time-range histograms
2967-- Provides fast aggregated queries for stats/dashboard without full table scans
2968
2969CREATE TABLE IF NOT EXISTS daily_stats (
2970 day_id INTEGER NOT NULL, -- Days since 2020-01-01 (Unix epoch + offset)
2971 agent_slug TEXT NOT NULL, -- 'all' for totals, or specific agent slug
2972 source_id TEXT NOT NULL DEFAULT 'all', -- 'all' for totals, or specific source
2973 session_count INTEGER NOT NULL DEFAULT 0,
2974 message_count INTEGER NOT NULL DEFAULT 0,
2975 total_chars INTEGER NOT NULL DEFAULT 0,
2976 last_updated INTEGER NOT NULL,
2977 PRIMARY KEY (day_id, agent_slug, source_id)
2978);
2979
2980CREATE INDEX IF NOT EXISTS idx_daily_stats_agent ON daily_stats(agent_slug, day_id);
2981CREATE INDEX IF NOT EXISTS idx_daily_stats_source ON daily_stats(source_id, day_id);
2982";
2983
2984#[cfg(test)]
2985const MIGRATION_V9: &str = r"
2986-- Background embedding jobs tracking table
2987CREATE TABLE IF NOT EXISTS embedding_jobs (
2988 id INTEGER PRIMARY KEY AUTOINCREMENT,
2989 db_path TEXT NOT NULL,
2990 model_id TEXT NOT NULL,
2991 status TEXT NOT NULL DEFAULT 'pending',
2992 total_docs INTEGER NOT NULL DEFAULT 0,
2993 completed_docs INTEGER NOT NULL DEFAULT 0,
2994 error_message TEXT,
2995 created_at TEXT NOT NULL DEFAULT (datetime('now')),
2996 started_at TEXT,
2997 completed_at TEXT
2998);
2999
3000-- Only one pending or running job per (db_path, model_id) at a time.
3001-- Multiple completed/failed/cancelled jobs are allowed for history.
3002CREATE UNIQUE INDEX IF NOT EXISTS idx_embedding_jobs_active
3003ON embedding_jobs(db_path, model_id)
3004WHERE status IN ('pending', 'running');
3005";
3006
3007#[cfg(test)]
3008const MIGRATION_V10: &str = r"
3009-- Token analytics: per-message token usage ledger
3010CREATE TABLE IF NOT EXISTS token_usage (
3011 id INTEGER PRIMARY KEY AUTOINCREMENT,
3012 message_id INTEGER NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
3013 conversation_id INTEGER NOT NULL,
3014 agent_id INTEGER NOT NULL,
3015 workspace_id INTEGER,
3016 source_id TEXT NOT NULL DEFAULT 'local',
3017
3018 -- Timing
3019 timestamp_ms INTEGER NOT NULL,
3020 day_id INTEGER NOT NULL,
3021
3022 -- Model identification
3023 model_name TEXT,
3024 model_family TEXT,
3025 model_tier TEXT,
3026 service_tier TEXT,
3027 provider TEXT,
3028
3029 -- Token counts (nullable — not all agents provide all fields)
3030 input_tokens INTEGER,
3031 output_tokens INTEGER,
3032 cache_read_tokens INTEGER,
3033 cache_creation_tokens INTEGER,
3034 thinking_tokens INTEGER,
3035 total_tokens INTEGER,
3036
3037 -- Cost estimation
3038 estimated_cost_usd REAL,
3039
3040 -- Message context
3041 role TEXT NOT NULL,
3042 content_chars INTEGER NOT NULL,
3043 has_tool_calls INTEGER NOT NULL DEFAULT 0,
3044 tool_call_count INTEGER NOT NULL DEFAULT 0,
3045
3046 -- Data quality
3047 data_source TEXT NOT NULL DEFAULT 'api',
3048
3049 UNIQUE(message_id)
3050);
3051
3052CREATE INDEX IF NOT EXISTS idx_token_usage_day ON token_usage(day_id, agent_id);
3053CREATE INDEX IF NOT EXISTS idx_token_usage_conv ON token_usage(conversation_id);
3054CREATE INDEX IF NOT EXISTS idx_token_usage_model ON token_usage(model_family, day_id);
3055CREATE INDEX IF NOT EXISTS idx_token_usage_workspace ON token_usage(workspace_id, day_id);
3056CREATE INDEX IF NOT EXISTS idx_token_usage_timestamp ON token_usage(timestamp_ms);
3057
3058-- Token analytics: pre-aggregated daily rollups
3059CREATE TABLE IF NOT EXISTS token_daily_stats (
3060 day_id INTEGER NOT NULL,
3061 agent_slug TEXT NOT NULL,
3062 source_id TEXT NOT NULL DEFAULT 'all',
3063 model_family TEXT NOT NULL DEFAULT 'all',
3064
3065 api_call_count INTEGER NOT NULL DEFAULT 0,
3066 user_message_count INTEGER NOT NULL DEFAULT 0,
3067 assistant_message_count INTEGER NOT NULL DEFAULT 0,
3068 tool_message_count INTEGER NOT NULL DEFAULT 0,
3069
3070 total_input_tokens INTEGER NOT NULL DEFAULT 0,
3071 total_output_tokens INTEGER NOT NULL DEFAULT 0,
3072 total_cache_read_tokens INTEGER NOT NULL DEFAULT 0,
3073 total_cache_creation_tokens INTEGER NOT NULL DEFAULT 0,
3074 total_thinking_tokens INTEGER NOT NULL DEFAULT 0,
3075 grand_total_tokens INTEGER NOT NULL DEFAULT 0,
3076
3077 total_content_chars INTEGER NOT NULL DEFAULT 0,
3078 total_tool_calls INTEGER NOT NULL DEFAULT 0,
3079
3080 estimated_cost_usd REAL NOT NULL DEFAULT 0.0,
3081
3082 session_count INTEGER NOT NULL DEFAULT 0,
3083
3084 last_updated INTEGER NOT NULL,
3085
3086 PRIMARY KEY (day_id, agent_slug, source_id, model_family)
3087);
3088
3089CREATE INDEX IF NOT EXISTS idx_token_daily_stats_agent ON token_daily_stats(agent_slug, day_id);
3090CREATE INDEX IF NOT EXISTS idx_token_daily_stats_model ON token_daily_stats(model_family, day_id);
3091
3092-- Model pricing lookup table
3093CREATE TABLE IF NOT EXISTS model_pricing (
3094 model_pattern TEXT NOT NULL,
3095 provider TEXT NOT NULL,
3096 input_cost_per_mtok REAL NOT NULL,
3097 output_cost_per_mtok REAL NOT NULL,
3098 cache_read_cost_per_mtok REAL,
3099 cache_creation_cost_per_mtok REAL,
3100 effective_date TEXT NOT NULL,
3101 PRIMARY KEY (model_pattern, effective_date)
3102);
3103
3104-- Seed with current pricing (as of 2026-02)
3105INSERT OR IGNORE INTO model_pricing VALUES
3106 ('claude-opus-4%', 'anthropic', 15.0, 75.0, 1.5, 18.75, '2025-10-01'),
3107 ('claude-sonnet-4%', 'anthropic', 3.0, 15.0, 0.3, 3.75, '2025-10-01'),
3108 ('claude-haiku-4%', 'anthropic', 0.80, 4.0, 0.08, 1.0, '2025-10-01'),
3109 ('gpt-4o%', 'openai', 2.50, 10.0, NULL, NULL, '2025-01-01'),
3110 ('gpt-4-turbo%', 'openai', 10.0, 30.0, NULL, NULL, '2024-04-01'),
3111 ('gpt-4.1%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
3112 ('o3%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
3113 ('o4-mini%', 'openai', 1.10, 4.40, NULL, NULL, '2025-04-01'),
3114 ('gemini-2%flash%', 'google', 0.075, 0.30, NULL, NULL, '2025-01-01'),
3115 ('gemini-2%pro%', 'google', 1.25, 10.0, NULL, NULL, '2025-01-01');
3116
3117-- Extend conversations table with token summary columns
3118ALTER TABLE conversations ADD COLUMN total_input_tokens INTEGER;
3119ALTER TABLE conversations ADD COLUMN total_output_tokens INTEGER;
3120ALTER TABLE conversations ADD COLUMN total_cache_read_tokens INTEGER;
3121ALTER TABLE conversations ADD COLUMN total_cache_creation_tokens INTEGER;
3122ALTER TABLE conversations ADD COLUMN grand_total_tokens INTEGER;
3123ALTER TABLE conversations ADD COLUMN estimated_cost_usd REAL;
3124ALTER TABLE conversations ADD COLUMN primary_model TEXT;
3125ALTER TABLE conversations ADD COLUMN api_call_count INTEGER;
3126ALTER TABLE conversations ADD COLUMN tool_call_count INTEGER;
3127ALTER TABLE conversations ADD COLUMN user_message_count INTEGER;
3128ALTER TABLE conversations ADD COLUMN assistant_message_count INTEGER;
3129";
3130
3131const MIGRATION_V14: &str = r"
3132-- Switch FTS5 from internal-content to contentless mode (CASS #163).
3133-- Drop the old V13 internal-content fts_messages first so that
3134-- sqlite_schema does not contain two conflicting CREATE VIRTUAL TABLE
3135-- entries, which makes the database completely unreadable.
3136-- The current contentless table is recreated lazily after open() only when the
3137-- frankensqlite FTS consistency check finds it missing or malformed.
3138DROP TABLE IF EXISTS fts_messages;
3139";
3140
3141const MIGRATION_V15_TAIL_STATE_TABLE: &str = r"
3142CREATE TABLE IF NOT EXISTS conversation_tail_state (
3143 -- Deliberately no FOREIGN KEY: this hot row is maintained by insert/append
3144 -- paths, and FK metadata keeps frankensqlite off the direct rowid update path.
3145 conversation_id INTEGER PRIMARY KEY,
3146 ended_at INTEGER,
3147 last_message_idx INTEGER,
3148 last_message_created_at INTEGER
3149);
3150";
3151
3152const MIGRATION_V16: &str = r"
3153-- UNIQUE(conversation_id, idx) already creates sqlite_autoindex_messages_1,
3154-- which covers the same lookup/order key as idx_messages_conv_idx. Keeping both
3155-- doubles message insert index maintenance on the hot indexing path.
3156DROP INDEX IF EXISTS idx_messages_conv_idx;
3157";
3158
3159const MIGRATION_V17: &str = r"
3160-- Drop the global messages(created_at) secondary index from the ingest hot
3161-- path. Search/time filters are served by the derived search layer and
3162-- conversation/analytics indexes, while this index is maintained on every
3163-- message insert.
3164DROP INDEX IF EXISTS idx_messages_created;
3165";
3166
3167const MIGRATION_V18: &str = r"
3168-- Move append-tail state out of the wide, indexed conversations row. The hot
3169-- append path updates this cache for every appended conversation; keeping it in
3170-- a tiny rowid table avoids rewriting the large conversation record.
3171CREATE TABLE IF NOT EXISTS conversation_tail_state (
3172 -- Deliberately no FOREIGN KEY: this hot row is maintained by insert/append
3173 -- paths, and FK metadata keeps frankensqlite off the direct rowid update path.
3174 conversation_id INTEGER PRIMARY KEY,
3175 ended_at INTEGER,
3176 last_message_idx INTEGER,
3177 last_message_created_at INTEGER
3178);
3179
3180INSERT OR REPLACE INTO conversation_tail_state (
3181 conversation_id, ended_at, last_message_idx, last_message_created_at
3182)
3183SELECT id, ended_at, last_message_idx, last_message_created_at
3184FROM conversations
3185WHERE ended_at IS NOT NULL
3186 OR last_message_idx IS NOT NULL
3187 OR last_message_created_at IS NOT NULL;
3188";
3189
3190const MIGRATION_V19: &str = r"
3191-- Materialize external conversation provenance into one compact lookup key.
3192-- This keeps the hot append/new-conversation probe on a single primary-key
3193-- lookup instead of a composite conversations-table predicate.
3194CREATE TABLE IF NOT EXISTS conversation_external_lookup (
3195 lookup_key TEXT PRIMARY KEY,
3196 conversation_id INTEGER NOT NULL
3197);
3198
3199INSERT OR REPLACE INTO conversation_external_lookup (lookup_key, conversation_id)
3200SELECT
3201 CAST(length(source_id) AS TEXT) || ':' || source_id || ':' ||
3202 CAST(agent_id AS TEXT) || ':' ||
3203 CAST(length(external_id) AS TEXT) || ':' || external_id,
3204 id
3205FROM conversations
3206WHERE external_id IS NOT NULL;
3207";
3208
3209const MIGRATION_V20: &str = r"
3210-- Fuse external conversation lookup with append-tail state. Append-heavy
3211-- workloads can resolve both the conversation id and tail plan from one
3212-- primary-key probe.
3213CREATE TABLE IF NOT EXISTS conversation_external_tail_lookup (
3214 lookup_key TEXT PRIMARY KEY,
3215 conversation_id INTEGER NOT NULL,
3216 ended_at INTEGER,
3217 last_message_idx INTEGER,
3218 last_message_created_at INTEGER
3219);
3220
3221INSERT OR REPLACE INTO conversation_external_tail_lookup (
3222 lookup_key,
3223 conversation_id,
3224 ended_at,
3225 last_message_idx,
3226 last_message_created_at
3227)
3228SELECT
3229 CAST(length(c.source_id) AS TEXT) || ':' || c.source_id || ':' ||
3230 CAST(c.agent_id AS TEXT) || ':' ||
3231 CAST(length(c.external_id) AS TEXT) || ':' || c.external_id,
3232 c.id,
3233 (SELECT ts.ended_at
3234 FROM conversation_tail_state ts
3235 WHERE ts.conversation_id = c.id),
3236 (SELECT ts.last_message_idx
3237 FROM conversation_tail_state ts
3238 WHERE ts.conversation_id = c.id),
3239 (SELECT ts.last_message_created_at
3240 FROM conversation_tail_state ts
3241 WHERE ts.conversation_id = c.id)
3242FROM conversations c
3243WHERE c.external_id IS NOT NULL;
3244";
3245
3246#[derive(Debug, Clone)]
3248pub struct EmbeddingJobRow {
3249 pub id: i64,
3250 pub db_path: String,
3251 pub model_id: String,
3252 pub status: String,
3253 pub total_docs: i64,
3254 pub completed_docs: i64,
3255 pub error_message: Option<String>,
3256 pub created_at: String,
3257 pub started_at: Option<String>,
3258 pub completed_at: Option<String>,
3259}
3260
3261#[derive(Debug, Clone)]
3268pub struct LexicalRebuildConversationRow {
3269 pub id: Option<i64>,
3270 pub agent_slug: String,
3271 pub workspace: Option<PathBuf>,
3272 pub external_id: Option<String>,
3273 pub title: Option<String>,
3274 pub source_path: PathBuf,
3275 pub started_at: Option<i64>,
3276 pub ended_at: Option<i64>,
3277 pub source_id: String,
3278 pub origin_host: Option<String>,
3279}
3280
3281#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3284pub struct LexicalRebuildConversationFootprintRow {
3285 pub conversation_id: i64,
3286 pub message_count: usize,
3287 pub message_bytes: usize,
3288}
3289
3290pub(crate) const LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE: usize = 4 * 1024;
3291const LEXICAL_REBUILD_FOOTPRINT_POINT_TAIL_FALLBACK_LIMIT: usize = 64;
3292
3293fn lexical_rebuild_tail_metadata_coverage_is_sufficient(
3294 total_conversations: usize,
3295 covered_conversations: usize,
3296) -> bool {
3297 total_conversations == 0
3298 || total_conversations.saturating_sub(covered_conversations.min(total_conversations))
3299 <= LEXICAL_REBUILD_FOOTPRINT_POINT_TAIL_FALLBACK_LIMIT
3300}
3301
3302fn lexical_rebuild_message_count_from_tail_idx(last_message_idx: Option<i64>) -> Option<usize> {
3303 let last_message_idx = u64::try_from(last_message_idx?).ok()?;
3304 let high_water = last_message_idx.checked_add(1)?;
3305 usize::try_from(high_water).ok()
3306}
3307
3308fn lexical_rebuild_conversation_footprint_from_count(
3309 conversation_id: i64,
3310 message_count: usize,
3311) -> LexicalRebuildConversationFootprintRow {
3312 LexicalRebuildConversationFootprintRow {
3313 conversation_id,
3314 message_count,
3315 message_bytes: message_count
3316 .saturating_mul(LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE),
3317 }
3318}
3319
3320#[derive(Debug, Clone)]
3322pub struct LexicalRebuildMessageRow {
3323 pub conversation_id: i64,
3324 pub id: i64,
3325 pub idx: i64,
3326 pub role: String,
3327 pub author: Option<String>,
3328 pub created_at: Option<i64>,
3329 pub content: String,
3330}
3331
3332#[derive(Debug, Clone, PartialEq, Eq)]
3336pub struct LexicalRebuildGroupedMessageRow {
3337 pub idx: i64,
3338 pub is_tool_role: bool,
3339 pub created_at: Option<i64>,
3340 pub content: String,
3341}
3342
3343pub type LexicalRebuildGroupedMessageRows = SmallVec<[LexicalRebuildGroupedMessageRow; 32]>;
3344
3345pub type SqliteStorage = FrankenStorage;
3347
3348pub struct FrankenStorage {
3350 conn: FrankenConnection,
3351 db_path: PathBuf,
3352 ephemeral_writer_preflight_verified: AtomicBool,
3353 index_writer_checkpoint_pages: AtomicI64,
3354 index_writer_busy_timeout_ms: AtomicU64,
3355 cached_ephemeral_writer: parking_lot::Mutex<CachedEphemeralWriter>,
3356 ensured_agents: Arc<parking_lot::Mutex<HashMap<EnsuredAgentKey, i64>>>,
3357 ensured_workspaces: Arc<parking_lot::Mutex<HashMap<EnsuredWorkspaceKey, i64>>>,
3358 ensured_conversation_sources: Arc<parking_lot::Mutex<HashSet<EnsuredConversationSourceKey>>>,
3359 ensured_daily_stats_keys: Arc<parking_lot::Mutex<HashSet<EnsuredDailyStatsKey>>>,
3360 fts_messages_present_cache: AtomicI8,
3361}
3362
3363const DEFAULT_WAL_AUTOCHECKPOINT_PAGES: i64 = 4096;
3367const UNSET_INDEX_WRITER_CHECKPOINT_PAGES: i64 = i64::MIN;
3368const UNSET_INDEX_WRITER_BUSY_TIMEOUT_MS: u64 = 0;
3369const FTS_MESSAGES_PRESENT_UNKNOWN: i8 = 0;
3370const FTS_MESSAGES_PRESENT_ABSENT: i8 = 1;
3371const FTS_MESSAGES_PRESENT_PRESENT: i8 = 2;
3372
3373enum CachedEphemeralWriter {
3374 Uninitialized,
3375 Cached(Box<SendFrankenConnection>),
3376 InUse,
3377}
3378
3379#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3380struct EnsuredAgentKey {
3381 slug: String,
3382 name: String,
3383 version: Option<String>,
3384 kind: String,
3385}
3386
3387impl EnsuredAgentKey {
3388 fn from_agent(agent: &Agent) -> Self {
3389 Self {
3390 slug: agent.slug.clone(),
3391 name: agent.name.clone(),
3392 version: agent.version.clone(),
3393 kind: agent_kind_str(agent.kind.clone()),
3394 }
3395 }
3396}
3397
3398#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3399struct EnsuredWorkspaceKey {
3400 path: String,
3401 display_name: Option<String>,
3402}
3403
3404impl EnsuredWorkspaceKey {
3405 fn new(path: String, display_name: Option<&str>) -> Self {
3406 Self {
3407 path,
3408 display_name: display_name.map(str::to_owned),
3409 }
3410 }
3411}
3412
3413#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3414struct EnsuredConversationSourceKey {
3415 id: String,
3416 kind: SourceKind,
3417 host_label: Option<String>,
3418}
3419
3420impl EnsuredConversationSourceKey {
3421 fn from_source(source: &Source) -> Self {
3422 Self {
3423 id: source.id.clone(),
3424 kind: source.kind,
3425 host_label: source.host_label.clone(),
3426 }
3427 }
3428}
3429
3430#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3431struct EnsuredDailyStatsKey {
3432 day_id: i64,
3433 agent_slug: String,
3434 source_id: String,
3435}
3436
3437impl EnsuredDailyStatsKey {
3438 fn new(day_id: i64, agent_slug: &str, source_id: &str) -> Self {
3439 Self {
3440 day_id,
3441 agent_slug: agent_slug.to_owned(),
3442 source_id: source_id.to_owned(),
3443 }
3444 }
3445}
3446
3447const AUTOCOMMIT_RETAIN_OFF_PRAGMAS: [&str; 2] = [
3448 "PRAGMA fsqlite.autocommit_retain = OFF;",
3449 "PRAGMA autocommit_retain = OFF;",
3450];
3451
3452fn disable_autocommit_retain<E>(
3453 mut execute: impl FnMut(&'static str) -> std::result::Result<(), E>,
3454) -> Result<&'static str>
3455where
3456 E: std::fmt::Display,
3457{
3458 let mut failures = Vec::new();
3459 for pragma in AUTOCOMMIT_RETAIN_OFF_PRAGMAS {
3460 match execute(pragma) {
3461 Ok(()) => return Ok(pragma),
3462 Err(err) => {
3463 let error = err.to_string();
3464 tracing::debug!(
3465 %pragma,
3466 error = %error,
3467 "autocommit_retain PRAGMA variant not supported"
3468 );
3469 failures.push(format!("{pragma}: {error}"));
3470 }
3471 }
3472 }
3473
3474 Err(anyhow!(
3475 "failed to disable autocommit_retain on frankensqlite connection; \
3476 refusing to keep a long-lived MVCC connection that may accumulate \
3477 unbounded write snapshots. Upgrade frankensqlite to a version that \
3478 supports one of these PRAGMAs or use a short-lived connection path. \
3479 attempts: {}",
3480 failures.join("; ")
3481 ))
3482}
3483
3484impl FrankenStorage {
3485 fn new(conn: FrankenConnection, db_path: PathBuf) -> Self {
3486 Self::new_with_shared_caches(
3487 conn,
3488 db_path,
3489 Arc::new(parking_lot::Mutex::new(HashMap::new())),
3490 Arc::new(parking_lot::Mutex::new(HashMap::new())),
3491 Arc::new(parking_lot::Mutex::new(HashSet::new())),
3492 Arc::new(parking_lot::Mutex::new(HashSet::new())),
3493 )
3494 }
3495
3496 fn new_with_shared_caches(
3497 conn: FrankenConnection,
3498 db_path: PathBuf,
3499 ensured_agents: Arc<parking_lot::Mutex<HashMap<EnsuredAgentKey, i64>>>,
3500 ensured_workspaces: Arc<parking_lot::Mutex<HashMap<EnsuredWorkspaceKey, i64>>>,
3501 ensured_conversation_sources: Arc<
3502 parking_lot::Mutex<HashSet<EnsuredConversationSourceKey>>,
3503 >,
3504 ensured_daily_stats_keys: Arc<parking_lot::Mutex<HashSet<EnsuredDailyStatsKey>>>,
3505 ) -> Self {
3506 Self {
3507 conn,
3508 db_path,
3509 ephemeral_writer_preflight_verified: AtomicBool::new(false),
3510 index_writer_checkpoint_pages: AtomicI64::new(UNSET_INDEX_WRITER_CHECKPOINT_PAGES),
3511 index_writer_busy_timeout_ms: AtomicU64::new(UNSET_INDEX_WRITER_BUSY_TIMEOUT_MS),
3512 cached_ephemeral_writer: parking_lot::Mutex::new(CachedEphemeralWriter::Uninitialized),
3513 ensured_agents,
3514 ensured_workspaces,
3515 ensured_conversation_sources,
3516 ensured_daily_stats_keys,
3517 fts_messages_present_cache: AtomicI8::new(FTS_MESSAGES_PRESENT_UNKNOWN),
3518 }
3519 }
3520
3521 fn apply_open_stage_busy_timeout(&self) {
3522 if let Err(err) = self.conn.execute("PRAGMA busy_timeout = 5000;") {
3523 tracing::debug!(
3524 error = %err,
3525 "failed to apply open-stage busy_timeout before migrations"
3526 );
3527 }
3528 }
3529
3530 pub fn open(path: &Path) -> Result<Self> {
3536 if let Some(parent) = path.parent() {
3537 fs::create_dir_all(parent)
3538 .with_context(|| format!("creating db directory {}", parent.display()))?;
3539 }
3540
3541 let path_str = path.to_string_lossy().to_string();
3542 let _doctor_guard =
3543 acquire_doctor_mutation_db_open_guard(path, DOCTOR_MUTATION_DB_OPEN_LOCK_TIMEOUT)?;
3544 let conn = FrankenConnection::open(&path_str)
3545 .with_context(|| format!("opening frankensqlite db at {}", path.display()))?;
3546 let storage = Self::new(conn, path.to_path_buf());
3547 storage.apply_open_stage_busy_timeout();
3548 storage.run_migrations()?;
3549 storage.repair_missing_current_schema_objects()?;
3550 storage.apply_config()?;
3551 Ok(storage)
3552 }
3553
3554 pub fn open_writer(path: &Path) -> Result<Self> {
3560 Self::open_writer_with_shared_caches(
3561 path,
3562 Arc::new(parking_lot::Mutex::new(HashMap::new())),
3563 Arc::new(parking_lot::Mutex::new(HashMap::new())),
3564 Arc::new(parking_lot::Mutex::new(HashSet::new())),
3565 Arc::new(parking_lot::Mutex::new(HashSet::new())),
3566 )
3567 }
3568
3569 fn open_writer_with_shared_caches(
3570 path: &Path,
3571 ensured_agents: Arc<parking_lot::Mutex<HashMap<EnsuredAgentKey, i64>>>,
3572 ensured_workspaces: Arc<parking_lot::Mutex<HashMap<EnsuredWorkspaceKey, i64>>>,
3573 ensured_conversation_sources: Arc<
3574 parking_lot::Mutex<HashSet<EnsuredConversationSourceKey>>,
3575 >,
3576 ensured_daily_stats_keys: Arc<parking_lot::Mutex<HashSet<EnsuredDailyStatsKey>>>,
3577 ) -> Result<Self> {
3578 let path_str = path.to_string_lossy().to_string();
3579 let _doctor_guard =
3580 acquire_doctor_mutation_db_open_guard(path, DOCTOR_MUTATION_DB_OPEN_LOCK_TIMEOUT)?;
3581 let conn = FrankenConnection::open(&path_str)
3582 .with_context(|| format!("opening frankensqlite writer at {}", path.display()))?;
3583 let storage = Self::new_with_shared_caches(
3584 conn,
3585 path.to_path_buf(),
3586 ensured_agents,
3587 ensured_workspaces,
3588 ensured_conversation_sources,
3589 ensured_daily_stats_keys,
3590 );
3591 storage.apply_config()?;
3592 Ok(storage)
3593 }
3594
3595 pub(crate) fn acquire_cached_ephemeral_writer(&self) -> Result<(Self, bool)> {
3596 let mut cached = self.cached_ephemeral_writer.lock();
3597 match std::mem::replace(&mut *cached, CachedEphemeralWriter::InUse) {
3598 CachedEphemeralWriter::Cached(conn) => {
3599 let (conn, checkpoint_pages, busy_timeout_ms) = (*conn).into_parts();
3600 let writer = Self::new_with_shared_caches(
3601 conn,
3602 self.db_path.clone(),
3603 Arc::clone(&self.ensured_agents),
3604 Arc::clone(&self.ensured_workspaces),
3605 Arc::clone(&self.ensured_conversation_sources),
3606 Arc::clone(&self.ensured_daily_stats_keys),
3607 );
3608 writer
3609 .index_writer_checkpoint_pages
3610 .store(checkpoint_pages, Ordering::Relaxed);
3611 writer
3612 .index_writer_busy_timeout_ms
3613 .store(busy_timeout_ms, Ordering::Relaxed);
3614 Ok((writer, true))
3615 }
3616 CachedEphemeralWriter::Uninitialized => {
3617 drop(cached);
3618 match Self::open_writer_with_shared_caches(
3619 &self.db_path,
3620 Arc::clone(&self.ensured_agents),
3621 Arc::clone(&self.ensured_workspaces),
3622 Arc::clone(&self.ensured_conversation_sources),
3623 Arc::clone(&self.ensured_daily_stats_keys),
3624 ) {
3625 Ok(writer) => Ok((writer, true)),
3626 Err(err) => {
3627 let mut cached = self.cached_ephemeral_writer.lock();
3628 if matches!(&*cached, CachedEphemeralWriter::InUse) {
3629 *cached = CachedEphemeralWriter::Uninitialized;
3630 }
3631 Err(err)
3632 }
3633 }
3634 }
3635 CachedEphemeralWriter::InUse => {
3636 *cached = CachedEphemeralWriter::InUse;
3637 drop(cached);
3638 Ok((
3639 Self::open_writer_with_shared_caches(
3640 &self.db_path,
3641 Arc::clone(&self.ensured_agents),
3642 Arc::clone(&self.ensured_workspaces),
3643 Arc::clone(&self.ensured_conversation_sources),
3644 Arc::clone(&self.ensured_daily_stats_keys),
3645 )?,
3646 false,
3647 ))
3648 }
3649 }
3650 }
3651
3652 pub(crate) fn release_cached_ephemeral_writer(&self, writer: Self) {
3653 let checkpoint_pages = writer.index_writer_checkpoint_pages.load(Ordering::Relaxed);
3654 let busy_timeout_ms = writer.index_writer_busy_timeout_ms.load(Ordering::Relaxed);
3655 let conn = writer.into_raw();
3656 let mut cached = self.cached_ephemeral_writer.lock();
3657 debug_assert!(
3658 matches!(&*cached, CachedEphemeralWriter::InUse),
3659 "cached ephemeral writer state should be in-use when releasing"
3660 );
3661 *cached = CachedEphemeralWriter::Cached(Box::new(
3662 SendFrankenConnection::new_with_index_writer_state(
3663 conn,
3664 checkpoint_pages,
3665 busy_timeout_ms,
3666 ),
3667 ));
3668 }
3669
3670 pub(crate) fn discard_cached_ephemeral_writer(&self, mut writer: Self) {
3671 writer.close_best_effort_in_place();
3672 let mut cached = self.cached_ephemeral_writer.lock();
3673 if matches!(&*cached, CachedEphemeralWriter::InUse) {
3674 *cached = CachedEphemeralWriter::Uninitialized;
3675 }
3676 }
3677
3678 fn cached_agent_id(&self, key: &EnsuredAgentKey) -> Option<i64> {
3679 self.ensured_agents.lock().get(key).copied()
3680 }
3681
3682 fn mark_agent_ensured(&self, key: EnsuredAgentKey, id: i64) {
3683 self.ensured_agents.lock().insert(key, id);
3684 }
3685
3686 fn cached_workspace_id(&self, key: &EnsuredWorkspaceKey) -> Option<i64> {
3687 self.ensured_workspaces.lock().get(key).copied()
3688 }
3689
3690 fn mark_workspace_ensured(&self, key: EnsuredWorkspaceKey, id: i64) {
3691 self.ensured_workspaces.lock().insert(key, id);
3692 }
3693
3694 fn conversation_source_already_ensured(&self, key: &EnsuredConversationSourceKey) -> bool {
3695 self.ensured_conversation_sources.lock().contains(key)
3696 }
3697
3698 fn mark_conversation_source_ensured(&self, key: EnsuredConversationSourceKey) {
3699 self.ensured_conversation_sources.lock().insert(key);
3700 }
3701
3702 fn daily_stats_key_already_ensured(&self, key: &EnsuredDailyStatsKey) -> bool {
3703 self.ensured_daily_stats_keys.lock().contains(key)
3704 }
3705
3706 fn daily_stats_keys_already_ensured(&self, keys: &[EnsuredDailyStatsKey; 4]) -> bool {
3707 let ensured = self.ensured_daily_stats_keys.lock();
3708 keys.iter().all(|key| ensured.contains(key))
3709 }
3710
3711 fn mark_daily_stats_key_ensured(&self, key: EnsuredDailyStatsKey) {
3712 self.ensured_daily_stats_keys.lock().insert(key);
3713 }
3714
3715 fn fts_messages_present_cached(&self, tx: &FrankenTransaction<'_>) -> bool {
3716 match self.fts_messages_present_cache.load(Ordering::Acquire) {
3717 FTS_MESSAGES_PRESENT_PRESENT => return true,
3718 FTS_MESSAGES_PRESENT_ABSENT => return false,
3719 _ => {}
3720 }
3721
3722 let present = tx
3723 .query_row_map(
3724 "SELECT COUNT(*) FROM sqlite_master
3725 WHERE name = 'fts_messages'
3726 AND rootpage > 0",
3727 fparams![],
3728 |row| row.get_typed::<i64>(0),
3729 )
3730 .map(|count| count > 0)
3731 .unwrap_or_else(|err| {
3732 tracing::debug!(
3733 error = %err,
3734 "failed to probe fts_messages presence; skipping db-resident FTS maintenance"
3735 );
3736 false
3737 });
3738 self.set_fts_messages_present_cache(present);
3739 present
3740 }
3741
3742 fn set_fts_messages_present_cache(&self, present: bool) {
3743 self.fts_messages_present_cache.store(
3744 if present {
3745 FTS_MESSAGES_PRESENT_PRESENT
3746 } else {
3747 FTS_MESSAGES_PRESENT_ABSENT
3748 },
3749 Ordering::Release,
3750 );
3751 }
3752
3753 fn invalidate_fts_messages_present_cache(&self) {
3754 self.fts_messages_present_cache
3755 .store(FTS_MESSAGES_PRESENT_UNKNOWN, Ordering::Release);
3756 }
3757
3758 fn invalidate_conversation_source_cache(&self, source_id: &str) {
3759 self.ensured_conversation_sources
3760 .lock()
3761 .retain(|key| key.id != source_id);
3762 }
3763
3764 fn close_cached_ephemeral_writer_best_effort_in_place(&mut self) {
3765 let cached = self.cached_ephemeral_writer.get_mut();
3766 if let CachedEphemeralWriter::Cached(conn) =
3767 std::mem::replace(cached, CachedEphemeralWriter::Uninitialized)
3768 {
3769 let mut conn = conn;
3770 conn.0.close_best_effort_in_place();
3771 }
3772 }
3773
3774 fn close_cached_ephemeral_writer_without_checkpoint_in_place(&mut self) -> Result<()> {
3775 let cached = self.cached_ephemeral_writer.get_mut();
3776 match std::mem::replace(cached, CachedEphemeralWriter::Uninitialized) {
3777 CachedEphemeralWriter::Cached(mut conn) => conn
3778 .0
3779 .close_without_checkpoint_in_place()
3780 .with_context(|| "closing cached frankensqlite writer without final checkpoint"),
3781 CachedEphemeralWriter::Uninitialized | CachedEphemeralWriter::InUse => Ok(()),
3782 }
3783 }
3784
3785 pub fn open_readonly(path: &Path) -> Result<Self> {
3787 Self::open_readonly_with_doctor_lock_timeout(path, DOCTOR_MUTATION_DB_OPEN_LOCK_TIMEOUT)
3788 }
3789
3790 pub fn open_readonly_with_doctor_lock_timeout(path: &Path, timeout: Duration) -> Result<Self> {
3795 let path_str = path.to_string_lossy().to_string();
3796 let _doctor_guard = acquire_doctor_mutation_db_open_guard(path, timeout)?;
3797 let conn = open_franken_with_flags(&path_str, FrankenOpenFlags::SQLITE_OPEN_READ_ONLY)
3798 .with_context(|| format!("opening frankensqlite db readonly at {}", path.display()))?;
3799 let storage = Self::new(conn, path.to_path_buf());
3800 storage.apply_readonly_config()?;
3801 Ok(storage)
3802 }
3803
3804 pub fn close(self) -> Result<()> {
3805 let mut this = self;
3806 this.close_cached_ephemeral_writer_best_effort_in_place();
3807 this.conn
3808 .close()
3809 .with_context(|| "closing frankensqlite connection")
3810 }
3811
3812 pub fn close_without_checkpoint(self) -> Result<()> {
3813 let mut this = self;
3814 this.close_cached_ephemeral_writer_without_checkpoint_in_place()?;
3815 this.conn
3816 .close_without_checkpoint()
3817 .with_context(|| "closing frankensqlite connection without final checkpoint")
3818 }
3819
3820 pub fn close_best_effort_in_place(&mut self) {
3821 self.close_cached_ephemeral_writer_best_effort_in_place();
3822 self.conn.close_best_effort_in_place();
3823 }
3824
3825 pub fn close_without_checkpoint_in_place(&mut self) -> Result<()> {
3826 self.close_cached_ephemeral_writer_without_checkpoint_in_place()?;
3827 self.conn
3828 .close_without_checkpoint_in_place()
3829 .with_context(|| "closing frankensqlite connection without final checkpoint")
3830 }
3831
3832 pub fn raw(&self) -> &FrankenConnection {
3834 &self.conn
3835 }
3836
3837 pub fn into_raw(self) -> FrankenConnection {
3840 let mut this = self;
3841 this.close_cached_ephemeral_writer_best_effort_in_place();
3842 this.conn
3843 }
3844
3845 pub fn apply_config(&self) -> Result<()> {
3852 self.conn
3856 .execute("PRAGMA journal_mode = WAL;")
3857 .with_context(|| "setting journal_mode")?;
3858 self.conn
3859 .execute("PRAGMA synchronous = NORMAL;")
3860 .with_context(|| "setting synchronous")?;
3861
3862 self.conn
3864 .execute("PRAGMA cache_size = -65536;")
3865 .with_context(|| "setting cache_size")?;
3866
3867 self.conn
3869 .execute("PRAGMA foreign_keys = ON;")
3870 .with_context(|| "setting foreign_keys")?;
3871
3872 self.conn
3874 .execute("PRAGMA busy_timeout = 5000;")
3875 .with_context(|| "setting busy_timeout")?;
3876
3877 let checkpoint_pragma =
3885 format!("PRAGMA wal_autocheckpoint = {DEFAULT_WAL_AUTOCHECKPOINT_PAGES};");
3886 let _ = self.conn.execute(&checkpoint_pragma);
3887 self.index_writer_checkpoint_pages
3888 .store(DEFAULT_WAL_AUTOCHECKPOINT_PAGES, Ordering::Relaxed);
3889 let _ = self.conn.execute("PRAGMA fsqlite.concurrent_mode = ON;");
3892 let _ = self.conn.execute("PRAGMA concurrent_mode = ON;");
3893 let autocommit_pragma =
3904 disable_autocommit_retain(|pragma| self.conn.execute(pragma).map(|_| ()))?;
3905 tracing::debug!(
3906 pragma = autocommit_pragma,
3907 "disabled frankensqlite autocommit_retain for storage connection"
3908 );
3909
3910 Ok(())
3911 }
3912
3913 fn apply_readonly_config(&self) -> Result<()> {
3914 self.conn
3915 .execute("PRAGMA query_only = 1;")
3916 .with_context(|| "setting query_only")?;
3917 self.conn
3918 .execute("PRAGMA busy_timeout = 5000;")
3919 .with_context(|| "setting busy_timeout")?;
3920 self.conn
3921 .execute("PRAGMA cache_size = -65536;")
3922 .with_context(|| "setting cache_size")?;
3923 self.conn
3924 .execute("PRAGMA foreign_keys = ON;")
3925 .with_context(|| "setting foreign_keys")?;
3926 Ok(())
3927 }
3928
3929 pub fn run_migrations(&self) -> Result<()> {
3947 transition_from_meta_version(&self.conn)?;
3948
3949 let base_result = build_cass_migrations_before_tail_cache()
3950 .run(&self.conn)
3951 .with_context(|| "running base schema migrations")?;
3952
3953 let mut applied = base_result.applied;
3954 if apply_conversation_tail_state_cache_migration(&self.conn)
3955 .with_context(|| "running conversation tail-state cache migration")?
3956 {
3957 applied.push(15);
3958 }
3959
3960 let post_result = build_cass_migrations_after_tail_cache()
3961 .run(&self.conn)
3962 .with_context(|| "running post-tail-cache schema migrations")?;
3963 applied.extend(post_result.applied);
3964
3965 let current = self.schema_version()?;
3966 if !applied.is_empty() {
3967 info!(
3968 applied = ?applied,
3969 current,
3970 was_fresh = base_result.was_fresh,
3971 "frankensqlite schema migrations applied"
3972 );
3973 }
3974
3975 self.sync_meta_schema_version(current)?;
3977
3978 Ok(())
3979 }
3980
3981 fn repair_missing_current_schema_objects(&self) -> Result<()> {
3986 let mut missing_tables = Vec::new();
3987 for &(table_name, probe_sql) in REQUIRED_CURRENT_SCHEMA_TABLE_PROBES {
3988 if let Err(err) = self.conn.query(probe_sql) {
3989 if error_indicates_missing_table(&err) {
3990 missing_tables.push(table_name);
3991 continue;
3992 }
3993 return Err(err).with_context(|| {
3994 format!("probing required schema table {table_name} for completeness")
3995 });
3996 }
3997 }
3998
3999 if !missing_tables.is_empty() {
4000 info!(
4001 missing_tables = ?missing_tables,
4002 "repairing missing current-schema tables on an already-versioned cass database"
4003 );
4004
4005 for batch in current_schema_repair_batches_for_missing_tables(&missing_tables)? {
4006 self.conn
4007 .execute_batch(batch.sql)
4008 .with_context(|| format!("repairing current-schema batch {}", batch.name))?;
4009 }
4010
4011 for &(table_name, probe_sql) in REQUIRED_CURRENT_SCHEMA_TABLE_PROBES {
4012 if !missing_tables.contains(&table_name) {
4013 continue;
4014 }
4015 self.conn
4016 .query(probe_sql)
4017 .with_context(|| format!("verifying repaired schema table {table_name}"))?;
4018 }
4019 }
4020 self.repair_missing_conversation_token_columns()?;
4021 Ok(())
4022 }
4023
4024 fn repair_missing_conversation_token_columns(&self) -> Result<()> {
4025 let columns = franken_table_column_names(&self.conn, "conversations")
4026 .with_context(|| "inspecting conversations columns for token-summary repair")?;
4027 let mut missing_columns = Vec::new();
4028 for &(column_name, column_type) in REQUIRED_CONVERSATION_TOKEN_COLUMNS {
4029 if columns.contains(column_name) {
4030 continue;
4031 }
4032 let sql = format!("ALTER TABLE conversations ADD COLUMN {column_name} {column_type};");
4033 self.conn.execute(&sql).with_context(|| {
4034 format!("adding missing conversations.{column_name} token-summary column")
4035 })?;
4036 missing_columns.push(column_name);
4037 }
4038 if !missing_columns.is_empty() {
4039 tracing::warn!(
4040 target: "cass::schema_repair",
4041 db_path = %self.db_path.display(),
4042 missing_columns = ?missing_columns,
4043 "cass#222: repaired missing conversations token-summary columns"
4044 );
4045 }
4046 Ok(())
4047 }
4048
4049 pub(crate) fn cleanup_orphan_fk_rows(&self) -> Result<OrphanFkCleanupReport> {
4068 let mut report = OrphanFkCleanupReport::default();
4069 let orphan_message_ids = match collect_orphan_message_ids(&self.conn) {
4070 Ok(ids) => ids,
4071 Err(err) if error_indicates_missing_table(&err) => {
4072 tracing::debug!(
4073 target: "cass::fk_repair",
4074 child_table = "messages",
4075 error = %err,
4076 "skipping orphan-message probe (table or column unavailable)"
4077 );
4078 Vec::new()
4079 }
4080 Err(err) => return Err(err),
4081 };
4082 if !orphan_message_ids.is_empty() {
4083 report.record("messages", orphan_message_ids.len() as i64);
4084 }
4085
4086 if !orphan_message_ids.is_empty() {
4087 delete_orphan_message_ids_bisecting_oom(&self.conn, &orphan_message_ids)
4088 .context("deleting orphan message rows and dependent children")?;
4089 }
4090
4091 for entry in ORPHAN_DIRECT_CHILD_TABLES {
4092 loop {
4093 let ids = match collect_direct_orphan_id_page(&self.conn, entry) {
4094 Ok(ids) => ids,
4095 Err(err)
4096 if error_indicates_missing_table(&err)
4097 || error_indicates_missing_column(&err) =>
4098 {
4099 tracing::debug!(
4103 target: "cass::fk_repair",
4104 child_table = entry.child_table,
4105 error = %err,
4106 "skipping orphan probe (table or column unavailable)"
4107 );
4108 break;
4109 }
4110 Err(err) => {
4111 return Err(err).with_context(|| {
4112 format!("probing orphan rows in {}", entry.child_table)
4113 });
4114 }
4115 };
4116 if ids.is_empty() {
4117 break;
4118 }
4119
4120 let deleted = delete_direct_orphan_ids_bisecting_oom(&self.conn, entry, &ids)
4121 .with_context(|| format!("deleting orphan rows from {}", entry.child_table))?;
4122 if deleted == 0 {
4123 break;
4124 }
4125 report.record(
4126 entry.child_table,
4127 i64::try_from(deleted).unwrap_or(i64::MAX),
4128 );
4129 }
4130 }
4131
4132 if report.total == 0 {
4133 return Ok(report);
4134 }
4135
4136 tracing::warn!(
4141 target: "cass::fk_repair",
4142 db_path = %self.db_path.display(),
4143 total_orphans = report.total,
4144 per_table = ?report.per_table,
4145 "cass#202: removed orphan rows left behind by interrupted index transactions"
4146 );
4147
4148 Ok(report)
4149 }
4150
4151 pub fn schema_version(&self) -> Result<i64> {
4153 let rows = self
4154 .conn
4155 .query("SELECT MAX(version) FROM _schema_migrations;")
4156 .with_context(|| "reading schema version from _schema_migrations")?;
4157
4158 if let Some(row) = rows.first()
4159 && let Ok(v) = row.get_typed::<Option<i64>>(0)
4160 {
4161 return Ok(v.unwrap_or(0));
4162 }
4163 Ok(0)
4164 }
4165
4166 fn sync_meta_schema_version(&self, version: i64) -> Result<()> {
4168 if self.conn.query("SELECT key FROM meta LIMIT 1;").is_err() {
4171 return Ok(());
4172 }
4173
4174 if let Ok(rows) = self
4176 .conn
4177 .query("SELECT value FROM meta WHERE key = 'schema_version';")
4178 && let Some(row) = rows.first()
4179 && let Ok(val) = row.get_typed::<String>(0)
4180 && val == version.to_string()
4181 {
4182 return Ok(()); }
4184
4185 self.conn
4186 .execute_compat(
4187 "INSERT OR REPLACE INTO meta(key, value) VALUES('schema_version', ?1);",
4188 &[ParamValue::from(version.to_string())],
4189 )
4190 .with_context(|| "syncing meta schema_version")?;
4191
4192 Ok(())
4193 }
4194
4195 pub fn database_path(&self) -> Result<PathBuf> {
4197 Ok(self.db_path.clone())
4198 }
4199
4200 pub(crate) fn ephemeral_writer_preflight_verified(&self) -> bool {
4201 self.ephemeral_writer_preflight_verified
4202 .load(Ordering::Relaxed)
4203 }
4204
4205 pub(crate) fn mark_ephemeral_writer_preflight_verified(&self) {
4206 self.ephemeral_writer_preflight_verified
4207 .store(true, Ordering::Relaxed);
4208 }
4209
4210 pub(crate) fn index_writer_checkpoint_pages(&self) -> Option<i64> {
4211 let pages = self.index_writer_checkpoint_pages.load(Ordering::Relaxed);
4212 (pages != UNSET_INDEX_WRITER_CHECKPOINT_PAGES).then_some(pages)
4213 }
4214
4215 pub(crate) fn mark_index_writer_checkpoint_pages(&self, pages: i64) {
4216 self.index_writer_checkpoint_pages
4217 .store(pages, Ordering::Relaxed);
4218 }
4219
4220 pub(crate) fn index_writer_busy_timeout_ms(&self) -> Option<u64> {
4221 let timeout_ms = self.index_writer_busy_timeout_ms.load(Ordering::Relaxed);
4222 (timeout_ms != UNSET_INDEX_WRITER_BUSY_TIMEOUT_MS).then_some(timeout_ms)
4223 }
4224
4225 pub(crate) fn mark_index_writer_busy_timeout_ms(&self, timeout_ms: u64) {
4226 self.index_writer_busy_timeout_ms
4227 .store(timeout_ms, Ordering::Relaxed);
4228 }
4229
4230 pub fn open_or_rebuild(path: &Path) -> std::result::Result<Self, MigrationError> {
4232 if let Some(parent) = path.parent() {
4233 fs::create_dir_all(parent)?;
4234 }
4235
4236 if path.exists() {
4237 let check_result = check_schema_compatibility(path);
4238 match check_result {
4239 Ok(SchemaCheck::Compatible) | Ok(SchemaCheck::NeedsMigration) => {
4240 }
4242 Ok(SchemaCheck::NeedsRebuild(reason)) => {
4243 let backup_path = create_backup(path)?;
4244 cleanup_old_backups(path, MAX_BACKUPS)?;
4245 remove_database_files(path)?;
4246 return Err(MigrationError::RebuildRequired {
4247 reason,
4248 backup_path,
4249 });
4250 }
4251 Err(err) if schema_check_error_requires_rebuild(&err) => {
4252 let backup_path = create_backup(path)?;
4253 cleanup_old_backups(path, MAX_BACKUPS)?;
4254 remove_database_files(path)?;
4255 return Err(MigrationError::RebuildRequired {
4256 reason: format!("Database appears corrupted: {err}"),
4257 backup_path,
4258 });
4259 }
4260 Err(err) => return Err(MigrationError::Database(err)),
4261 }
4262 }
4263
4264 let storage = Self::open(path).map_err(|e| MigrationError::Other(e.to_string()))?;
4265 Ok(storage)
4266 }
4267}
4268
4269fn build_cass_migrations_before_tail_cache() -> MigrationRunner {
4285 MigrationRunner::new()
4286 .add(13, "full_schema_v13", MIGRATION_FRESH_SCHEMA)
4287 .add(14, "fts_contentless", MIGRATION_V14)
4288}
4289
4290fn build_cass_migrations_after_tail_cache() -> MigrationRunner {
4291 MigrationRunner::new()
4292 .add(16, "drop_redundant_message_conv_idx", MIGRATION_V16)
4293 .add(17, "drop_message_created_idx", MIGRATION_V17)
4294 .add(18, "conversation_tail_state_hot_table", MIGRATION_V18)
4295 .add(19, "conversation_external_lookup", MIGRATION_V19)
4296 .add(20, "conversation_external_tail_lookup", MIGRATION_V20)
4297}
4298
4299fn schema_migration_is_applied(conn: &FrankenConnection, version: i64) -> Result<bool> {
4300 let rows = conn
4301 .query_with_params(
4302 "SELECT 1 FROM _schema_migrations WHERE version = ?1 LIMIT 1;",
4303 &[SqliteValue::from(version)],
4304 )
4305 .with_context(|| format!("checking schema migration version {version}"))?;
4306 Ok(!rows.is_empty())
4307}
4308
4309fn apply_conversation_tail_state_cache_migration(conn: &FrankenConnection) -> Result<bool> {
4310 conn.execute("BEGIN IMMEDIATE;")
4311 .with_context(|| "starting v15 conversation tail-state migration transaction")?;
4312
4313 let result = (|| -> Result<bool> {
4314 if schema_migration_is_applied(conn, 15)? {
4315 conn.execute("COMMIT;")
4316 .with_context(|| "committing already-applied v15 migration transaction")?;
4317 return Ok(false);
4318 }
4319
4320 let started = Instant::now();
4321 let conversation_columns = franken_table_column_names(conn, "conversations")
4322 .with_context(|| "inspecting conversations columns before v15 migration")?;
4323 if !conversation_columns.contains("last_message_idx") {
4324 conn.execute("ALTER TABLE conversations ADD COLUMN last_message_idx INTEGER;")
4325 .with_context(|| "adding v15 conversations.last_message_idx column")?;
4326 }
4327 if !conversation_columns.contains("last_message_created_at") {
4328 conn.execute("ALTER TABLE conversations ADD COLUMN last_message_created_at INTEGER;")
4329 .with_context(|| "adding v15 conversations.last_message_created_at column")?;
4330 }
4331 conn.execute_batch(MIGRATION_V15_TAIL_STATE_TABLE)
4332 .with_context(|| "applying v15 conversation tail-state table schema")?;
4333 conn.execute_compat(
4334 "INSERT INTO _schema_migrations (version, name) VALUES (?1, ?2);",
4335 fparams![15_i64, "conversation_tail_state_cache"],
4336 )
4337 .with_context(|| "recording v15 conversation tail-state migration")?;
4338 conn.execute("COMMIT;")
4339 .with_context(|| "committing v15 conversation tail-state migration")?;
4340 info!(
4341 elapsed_ms = started.elapsed().as_millis(),
4342 "applied v15 conversation tail-state cache migration"
4343 );
4344 Ok(true)
4345 })();
4346
4347 if result.is_err() {
4348 let _ = conn.execute("ROLLBACK;");
4349 }
4350
4351 result
4352}
4353
4354fn franken_table_column_names(
4355 conn: &FrankenConnection,
4356 table_name: &str,
4357) -> Result<HashSet<String>> {
4358 if !table_name
4359 .chars()
4360 .all(|c| c.is_ascii_alphanumeric() || c == '_')
4361 {
4362 return Err(anyhow!(
4363 "unsafe table name for PRAGMA table_info: {table_name}"
4364 ));
4365 }
4366
4367 conn.query_map_collect(
4368 &format!("PRAGMA table_info({table_name})"),
4369 fparams![],
4370 |row: &FrankenRow| row.get_typed::<String>(1),
4371 )
4372 .with_context(|| format!("reading PRAGMA table_info({table_name})"))
4373 .map(|columns| columns.into_iter().collect())
4374}
4375
4376const MIGRATION_FRESH_SCHEMA: &str = r"
4386-- Core tables (V1)
4387CREATE TABLE IF NOT EXISTS meta (
4388 key TEXT PRIMARY KEY,
4389 value TEXT NOT NULL
4390);
4391
4392CREATE TABLE IF NOT EXISTS agents (
4393 id INTEGER PRIMARY KEY,
4394 slug TEXT NOT NULL UNIQUE,
4395 name TEXT NOT NULL,
4396 version TEXT,
4397 kind TEXT NOT NULL,
4398 created_at INTEGER NOT NULL,
4399 updated_at INTEGER NOT NULL
4400);
4401
4402CREATE TABLE IF NOT EXISTS workspaces (
4403 id INTEGER PRIMARY KEY,
4404 path TEXT NOT NULL UNIQUE,
4405 display_name TEXT
4406);
4407
4408-- Sources (V4)
4409CREATE TABLE IF NOT EXISTS sources (
4410 id TEXT PRIMARY KEY,
4411 kind TEXT NOT NULL,
4412 host_label TEXT,
4413 machine_id TEXT,
4414 platform TEXT,
4415 config_json TEXT,
4416 created_at INTEGER NOT NULL,
4417 updated_at INTEGER NOT NULL
4418);
4419
4420INSERT OR IGNORE INTO sources (id, kind, host_label, created_at, updated_at)
4421VALUES ('local', 'local', NULL, strftime('%s','now')*1000, strftime('%s','now')*1000);
4422
4423-- Conversations: V1 base + V5 provenance + V7 metadata_bin + V10 token summary
4424CREATE TABLE IF NOT EXISTS conversations (
4425 id INTEGER PRIMARY KEY,
4426 agent_id INTEGER NOT NULL REFERENCES agents(id),
4427 workspace_id INTEGER REFERENCES workspaces(id),
4428 source_id TEXT NOT NULL DEFAULT 'local' REFERENCES sources(id),
4429 external_id TEXT,
4430 title TEXT,
4431 source_path TEXT NOT NULL,
4432 started_at INTEGER,
4433 ended_at INTEGER,
4434 approx_tokens INTEGER,
4435 metadata_json TEXT,
4436 origin_host TEXT,
4437 metadata_bin BLOB,
4438 total_input_tokens INTEGER,
4439 total_output_tokens INTEGER,
4440 total_cache_read_tokens INTEGER,
4441 total_cache_creation_tokens INTEGER,
4442 grand_total_tokens INTEGER,
4443 estimated_cost_usd REAL,
4444 primary_model TEXT,
4445 api_call_count INTEGER,
4446 tool_call_count INTEGER,
4447 user_message_count INTEGER,
4448 assistant_message_count INTEGER,
4449 -- V15 columns are included in the fresh schema so fresh DB creation does
4450 -- not need ALTER TABLE on conversations. That ALTER path can duplicate
4451 -- provenance autoindex state in frankensqlite when the named unique
4452 -- provenance index already exists.
4453 last_message_idx INTEGER,
4454 last_message_created_at INTEGER
4455);
4456
4457-- Named unique index avoids autoindex issues if table is ever recreated
4458CREATE UNIQUE INDEX IF NOT EXISTS idx_conversations_provenance
4459 ON conversations(source_id, agent_id, external_id);
4460
4461-- Messages: V1 base + V7 extra_bin
4462CREATE TABLE IF NOT EXISTS messages (
4463 id INTEGER PRIMARY KEY,
4464 conversation_id INTEGER NOT NULL REFERENCES conversations(id) ON DELETE CASCADE,
4465 idx INTEGER NOT NULL,
4466 role TEXT NOT NULL,
4467 author TEXT,
4468 created_at INTEGER,
4469 content TEXT NOT NULL,
4470 extra_json TEXT,
4471 extra_bin BLOB,
4472 UNIQUE(conversation_id, idx)
4473);
4474
4475CREATE TABLE IF NOT EXISTS snippets (
4476 id INTEGER PRIMARY KEY,
4477 message_id INTEGER NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
4478 file_path TEXT,
4479 start_line INTEGER,
4480 end_line INTEGER,
4481 language TEXT,
4482 snippet_text TEXT
4483);
4484
4485CREATE TABLE IF NOT EXISTS tags (
4486 id INTEGER PRIMARY KEY,
4487 name TEXT NOT NULL UNIQUE
4488);
4489
4490CREATE TABLE IF NOT EXISTS conversation_tags (
4491 conversation_id INTEGER NOT NULL REFERENCES conversations(id) ON DELETE CASCADE,
4492 tag_id INTEGER NOT NULL REFERENCES tags(id) ON DELETE CASCADE,
4493 PRIMARY KEY (conversation_id, tag_id)
4494);
4495
4496-- Daily stats (V8)
4497CREATE TABLE IF NOT EXISTS daily_stats (
4498 day_id INTEGER NOT NULL,
4499 agent_slug TEXT NOT NULL,
4500 source_id TEXT NOT NULL DEFAULT 'all',
4501 session_count INTEGER NOT NULL DEFAULT 0,
4502 message_count INTEGER NOT NULL DEFAULT 0,
4503 total_chars INTEGER NOT NULL DEFAULT 0,
4504 last_updated INTEGER NOT NULL,
4505 PRIMARY KEY (day_id, agent_slug, source_id)
4506);
4507
4508-- Embedding jobs (V9)
4509CREATE TABLE IF NOT EXISTS embedding_jobs (
4510 id INTEGER PRIMARY KEY AUTOINCREMENT,
4511 db_path TEXT NOT NULL,
4512 model_id TEXT NOT NULL,
4513 status TEXT NOT NULL DEFAULT 'pending',
4514 total_docs INTEGER NOT NULL DEFAULT 0,
4515 completed_docs INTEGER NOT NULL DEFAULT 0,
4516 error_message TEXT,
4517 created_at TEXT NOT NULL DEFAULT (datetime('now')),
4518 started_at TEXT,
4519 completed_at TEXT
4520);
4521
4522CREATE UNIQUE INDEX IF NOT EXISTS idx_embedding_jobs_active
4523ON embedding_jobs(db_path, model_id)
4524WHERE status IN ('pending', 'running');
4525
4526-- Token usage ledger (V10)
4527CREATE TABLE IF NOT EXISTS token_usage (
4528 id INTEGER PRIMARY KEY AUTOINCREMENT,
4529 message_id INTEGER NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
4530 conversation_id INTEGER NOT NULL,
4531 agent_id INTEGER NOT NULL,
4532 workspace_id INTEGER,
4533 source_id TEXT NOT NULL DEFAULT 'local',
4534 timestamp_ms INTEGER NOT NULL,
4535 day_id INTEGER NOT NULL,
4536 model_name TEXT,
4537 model_family TEXT,
4538 model_tier TEXT,
4539 service_tier TEXT,
4540 provider TEXT,
4541 input_tokens INTEGER,
4542 output_tokens INTEGER,
4543 cache_read_tokens INTEGER,
4544 cache_creation_tokens INTEGER,
4545 thinking_tokens INTEGER,
4546 total_tokens INTEGER,
4547 estimated_cost_usd REAL,
4548 role TEXT NOT NULL,
4549 content_chars INTEGER NOT NULL,
4550 has_tool_calls INTEGER NOT NULL DEFAULT 0,
4551 tool_call_count INTEGER NOT NULL DEFAULT 0,
4552 data_source TEXT NOT NULL DEFAULT 'api',
4553 UNIQUE(message_id)
4554);
4555
4556-- Token daily stats (V10)
4557CREATE TABLE IF NOT EXISTS token_daily_stats (
4558 day_id INTEGER NOT NULL,
4559 agent_slug TEXT NOT NULL,
4560 source_id TEXT NOT NULL DEFAULT 'all',
4561 model_family TEXT NOT NULL DEFAULT 'all',
4562 api_call_count INTEGER NOT NULL DEFAULT 0,
4563 user_message_count INTEGER NOT NULL DEFAULT 0,
4564 assistant_message_count INTEGER NOT NULL DEFAULT 0,
4565 tool_message_count INTEGER NOT NULL DEFAULT 0,
4566 total_input_tokens INTEGER NOT NULL DEFAULT 0,
4567 total_output_tokens INTEGER NOT NULL DEFAULT 0,
4568 total_cache_read_tokens INTEGER NOT NULL DEFAULT 0,
4569 total_cache_creation_tokens INTEGER NOT NULL DEFAULT 0,
4570 total_thinking_tokens INTEGER NOT NULL DEFAULT 0,
4571 grand_total_tokens INTEGER NOT NULL DEFAULT 0,
4572 total_content_chars INTEGER NOT NULL DEFAULT 0,
4573 total_tool_calls INTEGER NOT NULL DEFAULT 0,
4574 estimated_cost_usd REAL NOT NULL DEFAULT 0.0,
4575 session_count INTEGER NOT NULL DEFAULT 0,
4576 last_updated INTEGER NOT NULL,
4577 PRIMARY KEY (day_id, agent_slug, source_id, model_family)
4578);
4579
4580-- Model pricing (V10)
4581CREATE TABLE IF NOT EXISTS model_pricing (
4582 model_pattern TEXT NOT NULL,
4583 provider TEXT NOT NULL,
4584 input_cost_per_mtok REAL NOT NULL,
4585 output_cost_per_mtok REAL NOT NULL,
4586 cache_read_cost_per_mtok REAL,
4587 cache_creation_cost_per_mtok REAL,
4588 effective_date TEXT NOT NULL,
4589 PRIMARY KEY (model_pattern, effective_date)
4590);
4591
4592INSERT OR IGNORE INTO model_pricing VALUES
4593 ('claude-opus-4%', 'anthropic', 15.0, 75.0, 1.5, 18.75, '2025-10-01'),
4594 ('claude-sonnet-4%', 'anthropic', 3.0, 15.0, 0.3, 3.75, '2025-10-01'),
4595 ('claude-haiku-4%', 'anthropic', 0.80, 4.0, 0.08, 1.0, '2025-10-01'),
4596 ('gpt-4o%', 'openai', 2.50, 10.0, NULL, NULL, '2025-01-01'),
4597 ('gpt-4-turbo%', 'openai', 10.0, 30.0, NULL, NULL, '2024-04-01'),
4598 ('gpt-4.1%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
4599 ('o3%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
4600 ('o4-mini%', 'openai', 1.10, 4.40, NULL, NULL, '2025-04-01'),
4601 ('gemini-2%flash%', 'google', 0.075, 0.30, NULL, NULL, '2025-01-01'),
4602 ('gemini-2%pro%', 'google', 1.25, 10.0, NULL, NULL, '2025-01-01');
4603
4604-- Message metrics: V11 base + V12 model dimensions
4605CREATE TABLE IF NOT EXISTS message_metrics (
4606 message_id INTEGER PRIMARY KEY REFERENCES messages(id) ON DELETE CASCADE,
4607 created_at_ms INTEGER NOT NULL,
4608 hour_id INTEGER NOT NULL,
4609 day_id INTEGER NOT NULL,
4610 agent_slug TEXT NOT NULL,
4611 workspace_id INTEGER NOT NULL DEFAULT 0,
4612 source_id TEXT NOT NULL DEFAULT 'local',
4613 role TEXT NOT NULL,
4614 content_chars INTEGER NOT NULL,
4615 content_tokens_est INTEGER NOT NULL,
4616 api_input_tokens INTEGER,
4617 api_output_tokens INTEGER,
4618 api_cache_read_tokens INTEGER,
4619 api_cache_creation_tokens INTEGER,
4620 api_thinking_tokens INTEGER,
4621 api_service_tier TEXT,
4622 api_data_source TEXT NOT NULL DEFAULT 'estimated',
4623 tool_call_count INTEGER NOT NULL DEFAULT 0,
4624 has_tool_calls INTEGER NOT NULL DEFAULT 0,
4625 has_plan INTEGER NOT NULL DEFAULT 0,
4626 model_name TEXT,
4627 model_family TEXT NOT NULL DEFAULT 'unknown',
4628 model_tier TEXT NOT NULL DEFAULT 'unknown',
4629 provider TEXT NOT NULL DEFAULT 'unknown'
4630);
4631
4632-- Hourly rollups: V11 base + V13 plan columns
4633CREATE TABLE IF NOT EXISTS usage_hourly (
4634 hour_id INTEGER NOT NULL,
4635 agent_slug TEXT NOT NULL,
4636 workspace_id INTEGER NOT NULL DEFAULT 0,
4637 source_id TEXT NOT NULL DEFAULT 'local',
4638 message_count INTEGER NOT NULL DEFAULT 0,
4639 user_message_count INTEGER NOT NULL DEFAULT 0,
4640 assistant_message_count INTEGER NOT NULL DEFAULT 0,
4641 tool_call_count INTEGER NOT NULL DEFAULT 0,
4642 plan_message_count INTEGER NOT NULL DEFAULT 0,
4643 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
4644 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4645 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
4646 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
4647 api_tokens_total INTEGER NOT NULL DEFAULT 0,
4648 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
4649 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
4650 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
4651 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
4652 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
4653 last_updated INTEGER NOT NULL DEFAULT 0,
4654 plan_content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4655 plan_api_tokens_total INTEGER NOT NULL DEFAULT 0,
4656 PRIMARY KEY (hour_id, agent_slug, workspace_id, source_id)
4657);
4658
4659-- Daily rollups: V11 base + V13 plan columns
4660CREATE TABLE IF NOT EXISTS usage_daily (
4661 day_id INTEGER NOT NULL,
4662 agent_slug TEXT NOT NULL,
4663 workspace_id INTEGER NOT NULL DEFAULT 0,
4664 source_id TEXT NOT NULL DEFAULT 'local',
4665 message_count INTEGER NOT NULL DEFAULT 0,
4666 user_message_count INTEGER NOT NULL DEFAULT 0,
4667 assistant_message_count INTEGER NOT NULL DEFAULT 0,
4668 tool_call_count INTEGER NOT NULL DEFAULT 0,
4669 plan_message_count INTEGER NOT NULL DEFAULT 0,
4670 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
4671 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4672 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
4673 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
4674 api_tokens_total INTEGER NOT NULL DEFAULT 0,
4675 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
4676 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
4677 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
4678 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
4679 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
4680 last_updated INTEGER NOT NULL DEFAULT 0,
4681 plan_content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4682 plan_api_tokens_total INTEGER NOT NULL DEFAULT 0,
4683 PRIMARY KEY (day_id, agent_slug, workspace_id, source_id)
4684);
4685
4686-- Model daily rollups (V12)
4687CREATE TABLE IF NOT EXISTS usage_models_daily (
4688 day_id INTEGER NOT NULL,
4689 agent_slug TEXT NOT NULL,
4690 workspace_id INTEGER NOT NULL DEFAULT 0,
4691 source_id TEXT NOT NULL DEFAULT 'local',
4692 model_family TEXT NOT NULL DEFAULT 'unknown',
4693 model_tier TEXT NOT NULL DEFAULT 'unknown',
4694 message_count INTEGER NOT NULL DEFAULT 0,
4695 user_message_count INTEGER NOT NULL DEFAULT 0,
4696 assistant_message_count INTEGER NOT NULL DEFAULT 0,
4697 tool_call_count INTEGER NOT NULL DEFAULT 0,
4698 plan_message_count INTEGER NOT NULL DEFAULT 0,
4699 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
4700 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4701 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
4702 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
4703 api_tokens_total INTEGER NOT NULL DEFAULT 0,
4704 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
4705 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
4706 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
4707 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
4708 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
4709 last_updated INTEGER NOT NULL DEFAULT 0,
4710 PRIMARY KEY (day_id, agent_slug, workspace_id, source_id, model_family, model_tier)
4711);
4712
4713-- All indexes
4714CREATE INDEX IF NOT EXISTS idx_conversations_agent_started ON conversations(agent_id, started_at DESC);
4715CREATE INDEX IF NOT EXISTS idx_conversations_source_id ON conversations(source_id);
4716CREATE INDEX IF NOT EXISTS idx_conversations_source_path ON conversations(source_path);
4717CREATE INDEX IF NOT EXISTS idx_daily_stats_agent ON daily_stats(agent_slug, day_id);
4718CREATE INDEX IF NOT EXISTS idx_daily_stats_source ON daily_stats(source_id, day_id);
4719CREATE INDEX IF NOT EXISTS idx_token_usage_day ON token_usage(day_id, agent_id);
4720CREATE INDEX IF NOT EXISTS idx_token_usage_conv ON token_usage(conversation_id);
4721CREATE INDEX IF NOT EXISTS idx_token_usage_model ON token_usage(model_family, day_id);
4722CREATE INDEX IF NOT EXISTS idx_token_usage_workspace ON token_usage(workspace_id, day_id);
4723CREATE INDEX IF NOT EXISTS idx_token_usage_timestamp ON token_usage(timestamp_ms);
4724CREATE INDEX IF NOT EXISTS idx_token_daily_stats_agent ON token_daily_stats(agent_slug, day_id);
4725CREATE INDEX IF NOT EXISTS idx_token_daily_stats_model ON token_daily_stats(model_family, day_id);
4726CREATE INDEX IF NOT EXISTS idx_mm_hour ON message_metrics(hour_id);
4727CREATE INDEX IF NOT EXISTS idx_mm_day ON message_metrics(day_id);
4728CREATE INDEX IF NOT EXISTS idx_mm_agent_hour ON message_metrics(agent_slug, hour_id);
4729CREATE INDEX IF NOT EXISTS idx_mm_agent_day ON message_metrics(agent_slug, day_id);
4730CREATE INDEX IF NOT EXISTS idx_mm_workspace_hour ON message_metrics(workspace_id, hour_id);
4731CREATE INDEX IF NOT EXISTS idx_mm_source_hour ON message_metrics(source_id, hour_id);
4732CREATE INDEX IF NOT EXISTS idx_mm_model_family_day ON message_metrics(model_family, day_id);
4733CREATE INDEX IF NOT EXISTS idx_mm_provider_day ON message_metrics(provider, day_id);
4734CREATE INDEX IF NOT EXISTS idx_uh_agent ON usage_hourly(agent_slug, hour_id);
4735CREATE INDEX IF NOT EXISTS idx_uh_workspace ON usage_hourly(workspace_id, hour_id);
4736CREATE INDEX IF NOT EXISTS idx_uh_source ON usage_hourly(source_id, hour_id);
4737CREATE INDEX IF NOT EXISTS idx_ud_agent ON usage_daily(agent_slug, day_id);
4738CREATE INDEX IF NOT EXISTS idx_ud_workspace ON usage_daily(workspace_id, day_id);
4739CREATE INDEX IF NOT EXISTS idx_ud_source ON usage_daily(source_id, day_id);
4740CREATE INDEX IF NOT EXISTS idx_umd_model_day ON usage_models_daily(model_family, day_id);
4741CREATE INDEX IF NOT EXISTS idx_umd_agent_day ON usage_models_daily(agent_slug, day_id);
4742CREATE INDEX IF NOT EXISTS idx_umd_workspace_day ON usage_models_daily(workspace_id, day_id);
4743CREATE INDEX IF NOT EXISTS idx_umd_source_day ON usage_models_daily(source_id, day_id);
4744";
4745
4746#[derive(Clone, Copy)]
4747struct SchemaRepairBatch {
4748 name: &'static str,
4749 tables: &'static [&'static str],
4750 sql: &'static str,
4751}
4752
4753const CURRENT_SCHEMA_REPAIR_SOURCES_SQL: &str = r"
4754CREATE TABLE IF NOT EXISTS sources (
4755 id TEXT PRIMARY KEY,
4756 kind TEXT NOT NULL,
4757 host_label TEXT,
4758 machine_id TEXT,
4759 platform TEXT,
4760 config_json TEXT,
4761 created_at INTEGER NOT NULL,
4762 updated_at INTEGER NOT NULL
4763);
4764
4765INSERT OR IGNORE INTO sources (id, kind, host_label, created_at, updated_at)
4766VALUES ('local', 'local', NULL, strftime('%s','now')*1000, strftime('%s','now')*1000);
4767";
4768
4769const CURRENT_SCHEMA_REPAIR_DAILY_STATS_SQL: &str = r"
4770CREATE TABLE IF NOT EXISTS daily_stats (
4771 day_id INTEGER NOT NULL,
4772 agent_slug TEXT NOT NULL,
4773 source_id TEXT NOT NULL DEFAULT 'all',
4774 session_count INTEGER NOT NULL DEFAULT 0,
4775 message_count INTEGER NOT NULL DEFAULT 0,
4776 total_chars INTEGER NOT NULL DEFAULT 0,
4777 last_updated INTEGER NOT NULL,
4778 PRIMARY KEY (day_id, agent_slug, source_id)
4779);
4780
4781CREATE INDEX IF NOT EXISTS idx_daily_stats_agent ON daily_stats(agent_slug, day_id);
4782CREATE INDEX IF NOT EXISTS idx_daily_stats_source ON daily_stats(source_id, day_id);
4783";
4784
4785const CURRENT_SCHEMA_REPAIR_CONVERSATION_EXTERNAL_LOOKUP_SQL: &str = r"
4786CREATE TABLE IF NOT EXISTS conversation_external_lookup (
4787 lookup_key TEXT PRIMARY KEY,
4788 conversation_id INTEGER NOT NULL
4789);
4790
4791INSERT OR REPLACE INTO conversation_external_lookup (lookup_key, conversation_id)
4792SELECT
4793 CAST(length(source_id) AS TEXT) || ':' || source_id || ':' ||
4794 CAST(agent_id AS TEXT) || ':' ||
4795 CAST(length(external_id) AS TEXT) || ':' || external_id,
4796 id
4797FROM conversations
4798WHERE external_id IS NOT NULL;
4799";
4800
4801const CURRENT_SCHEMA_REPAIR_CONVERSATION_EXTERNAL_TAIL_LOOKUP_SQL: &str = r"
4802CREATE TABLE IF NOT EXISTS conversation_tail_state (
4803 conversation_id INTEGER PRIMARY KEY,
4804 ended_at INTEGER,
4805 last_message_idx INTEGER,
4806 last_message_created_at INTEGER
4807);
4808
4809CREATE TABLE IF NOT EXISTS conversation_external_tail_lookup (
4810 lookup_key TEXT PRIMARY KEY,
4811 conversation_id INTEGER NOT NULL,
4812 ended_at INTEGER,
4813 last_message_idx INTEGER,
4814 last_message_created_at INTEGER
4815);
4816
4817INSERT OR REPLACE INTO conversation_external_tail_lookup (
4818 lookup_key,
4819 conversation_id,
4820 ended_at,
4821 last_message_idx,
4822 last_message_created_at
4823)
4824SELECT
4825 CAST(length(c.source_id) AS TEXT) || ':' || c.source_id || ':' ||
4826 CAST(c.agent_id AS TEXT) || ':' ||
4827 CAST(length(c.external_id) AS TEXT) || ':' || c.external_id,
4828 c.id,
4829 ts.ended_at,
4830 ts.last_message_idx,
4831 ts.last_message_created_at
4832FROM conversations c
4833LEFT JOIN conversation_tail_state ts ON ts.conversation_id = c.id
4834WHERE c.external_id IS NOT NULL;
4835";
4836
4837const CURRENT_SCHEMA_REPAIR_EMBEDDING_JOBS_SQL: &str = r"
4838CREATE TABLE IF NOT EXISTS embedding_jobs (
4839 id INTEGER PRIMARY KEY AUTOINCREMENT,
4840 db_path TEXT NOT NULL,
4841 model_id TEXT NOT NULL,
4842 status TEXT NOT NULL DEFAULT 'pending',
4843 total_docs INTEGER NOT NULL DEFAULT 0,
4844 completed_docs INTEGER NOT NULL DEFAULT 0,
4845 error_message TEXT,
4846 created_at TEXT NOT NULL DEFAULT (datetime('now')),
4847 started_at TEXT,
4848 completed_at TEXT
4849);
4850
4851CREATE UNIQUE INDEX IF NOT EXISTS idx_embedding_jobs_active
4852ON embedding_jobs(db_path, model_id)
4853WHERE status IN ('pending', 'running');
4854";
4855
4856const CURRENT_SCHEMA_REPAIR_TOKEN_ANALYTICS_SQL: &str = r"
4857CREATE TABLE IF NOT EXISTS token_usage (
4858 id INTEGER PRIMARY KEY AUTOINCREMENT,
4859 message_id INTEGER NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
4860 conversation_id INTEGER NOT NULL,
4861 agent_id INTEGER NOT NULL,
4862 workspace_id INTEGER,
4863 source_id TEXT NOT NULL DEFAULT 'local',
4864 timestamp_ms INTEGER NOT NULL,
4865 day_id INTEGER NOT NULL,
4866 model_name TEXT,
4867 model_family TEXT,
4868 model_tier TEXT,
4869 service_tier TEXT,
4870 provider TEXT,
4871 input_tokens INTEGER,
4872 output_tokens INTEGER,
4873 cache_read_tokens INTEGER,
4874 cache_creation_tokens INTEGER,
4875 thinking_tokens INTEGER,
4876 total_tokens INTEGER,
4877 estimated_cost_usd REAL,
4878 role TEXT NOT NULL,
4879 content_chars INTEGER NOT NULL,
4880 has_tool_calls INTEGER NOT NULL DEFAULT 0,
4881 tool_call_count INTEGER NOT NULL DEFAULT 0,
4882 data_source TEXT NOT NULL DEFAULT 'api',
4883 UNIQUE(message_id)
4884);
4885
4886CREATE INDEX IF NOT EXISTS idx_token_usage_day ON token_usage(day_id, agent_id);
4887CREATE INDEX IF NOT EXISTS idx_token_usage_conv ON token_usage(conversation_id);
4888CREATE INDEX IF NOT EXISTS idx_token_usage_model ON token_usage(model_family, day_id);
4889CREATE INDEX IF NOT EXISTS idx_token_usage_workspace ON token_usage(workspace_id, day_id);
4890CREATE INDEX IF NOT EXISTS idx_token_usage_timestamp ON token_usage(timestamp_ms);
4891
4892CREATE TABLE IF NOT EXISTS token_daily_stats (
4893 day_id INTEGER NOT NULL,
4894 agent_slug TEXT NOT NULL,
4895 source_id TEXT NOT NULL DEFAULT 'all',
4896 model_family TEXT NOT NULL DEFAULT 'all',
4897 api_call_count INTEGER NOT NULL DEFAULT 0,
4898 user_message_count INTEGER NOT NULL DEFAULT 0,
4899 assistant_message_count INTEGER NOT NULL DEFAULT 0,
4900 tool_message_count INTEGER NOT NULL DEFAULT 0,
4901 total_input_tokens INTEGER NOT NULL DEFAULT 0,
4902 total_output_tokens INTEGER NOT NULL DEFAULT 0,
4903 total_cache_read_tokens INTEGER NOT NULL DEFAULT 0,
4904 total_cache_creation_tokens INTEGER NOT NULL DEFAULT 0,
4905 total_thinking_tokens INTEGER NOT NULL DEFAULT 0,
4906 grand_total_tokens INTEGER NOT NULL DEFAULT 0,
4907 total_content_chars INTEGER NOT NULL DEFAULT 0,
4908 total_tool_calls INTEGER NOT NULL DEFAULT 0,
4909 estimated_cost_usd REAL NOT NULL DEFAULT 0.0,
4910 session_count INTEGER NOT NULL DEFAULT 0,
4911 last_updated INTEGER NOT NULL,
4912 PRIMARY KEY (day_id, agent_slug, source_id, model_family)
4913);
4914
4915CREATE INDEX IF NOT EXISTS idx_token_daily_stats_agent ON token_daily_stats(agent_slug, day_id);
4916CREATE INDEX IF NOT EXISTS idx_token_daily_stats_model ON token_daily_stats(model_family, day_id);
4917
4918CREATE TABLE IF NOT EXISTS model_pricing (
4919 model_pattern TEXT NOT NULL,
4920 provider TEXT NOT NULL,
4921 input_cost_per_mtok REAL NOT NULL,
4922 output_cost_per_mtok REAL NOT NULL,
4923 cache_read_cost_per_mtok REAL,
4924 cache_creation_cost_per_mtok REAL,
4925 effective_date TEXT NOT NULL,
4926 PRIMARY KEY (model_pattern, effective_date)
4927);
4928
4929INSERT OR IGNORE INTO model_pricing VALUES
4930 ('claude-opus-4%', 'anthropic', 15.0, 75.0, 1.5, 18.75, '2025-10-01'),
4931 ('claude-sonnet-4%', 'anthropic', 3.0, 15.0, 0.3, 3.75, '2025-10-01'),
4932 ('claude-haiku-4%', 'anthropic', 0.80, 4.0, 0.08, 1.0, '2025-10-01'),
4933 ('gpt-4o%', 'openai', 2.50, 10.0, NULL, NULL, '2025-01-01'),
4934 ('gpt-4-turbo%', 'openai', 10.0, 30.0, NULL, NULL, '2024-04-01'),
4935 ('gpt-4.1%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
4936 ('o3%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
4937 ('o4-mini%', 'openai', 1.10, 4.40, NULL, NULL, '2025-04-01'),
4938 ('gemini-2%flash%', 'google', 0.075, 0.30, NULL, NULL, '2025-01-01'),
4939 ('gemini-2%pro%', 'google', 1.25, 10.0, NULL, NULL, '2025-01-01');
4940";
4941
4942const CURRENT_SCHEMA_REPAIR_MESSAGE_METRICS_SQL: &str = r"
4943CREATE TABLE IF NOT EXISTS message_metrics (
4944 message_id INTEGER PRIMARY KEY REFERENCES messages(id) ON DELETE CASCADE,
4945 created_at_ms INTEGER NOT NULL,
4946 hour_id INTEGER NOT NULL,
4947 day_id INTEGER NOT NULL,
4948 agent_slug TEXT NOT NULL,
4949 workspace_id INTEGER NOT NULL DEFAULT 0,
4950 source_id TEXT NOT NULL DEFAULT 'local',
4951 role TEXT NOT NULL,
4952 content_chars INTEGER NOT NULL,
4953 content_tokens_est INTEGER NOT NULL,
4954 api_input_tokens INTEGER,
4955 api_output_tokens INTEGER,
4956 api_cache_read_tokens INTEGER,
4957 api_cache_creation_tokens INTEGER,
4958 api_thinking_tokens INTEGER,
4959 api_service_tier TEXT,
4960 api_data_source TEXT NOT NULL DEFAULT 'estimated',
4961 tool_call_count INTEGER NOT NULL DEFAULT 0,
4962 has_tool_calls INTEGER NOT NULL DEFAULT 0,
4963 has_plan INTEGER NOT NULL DEFAULT 0,
4964 model_name TEXT,
4965 model_family TEXT NOT NULL DEFAULT 'unknown',
4966 model_tier TEXT NOT NULL DEFAULT 'unknown',
4967 provider TEXT NOT NULL DEFAULT 'unknown'
4968);
4969
4970CREATE TABLE IF NOT EXISTS usage_hourly (
4971 hour_id INTEGER NOT NULL,
4972 agent_slug TEXT NOT NULL,
4973 workspace_id INTEGER NOT NULL DEFAULT 0,
4974 source_id TEXT NOT NULL DEFAULT 'local',
4975 message_count INTEGER NOT NULL DEFAULT 0,
4976 user_message_count INTEGER NOT NULL DEFAULT 0,
4977 assistant_message_count INTEGER NOT NULL DEFAULT 0,
4978 tool_call_count INTEGER NOT NULL DEFAULT 0,
4979 plan_message_count INTEGER NOT NULL DEFAULT 0,
4980 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
4981 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4982 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
4983 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
4984 api_tokens_total INTEGER NOT NULL DEFAULT 0,
4985 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
4986 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
4987 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
4988 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
4989 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
4990 last_updated INTEGER NOT NULL DEFAULT 0,
4991 plan_content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4992 plan_api_tokens_total INTEGER NOT NULL DEFAULT 0,
4993 PRIMARY KEY (hour_id, agent_slug, workspace_id, source_id)
4994);
4995
4996CREATE TABLE IF NOT EXISTS usage_daily (
4997 day_id INTEGER NOT NULL,
4998 agent_slug TEXT NOT NULL,
4999 workspace_id INTEGER NOT NULL DEFAULT 0,
5000 source_id TEXT NOT NULL DEFAULT 'local',
5001 message_count INTEGER NOT NULL DEFAULT 0,
5002 user_message_count INTEGER NOT NULL DEFAULT 0,
5003 assistant_message_count INTEGER NOT NULL DEFAULT 0,
5004 tool_call_count INTEGER NOT NULL DEFAULT 0,
5005 plan_message_count INTEGER NOT NULL DEFAULT 0,
5006 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
5007 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
5008 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
5009 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
5010 api_tokens_total INTEGER NOT NULL DEFAULT 0,
5011 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
5012 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
5013 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
5014 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
5015 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
5016 last_updated INTEGER NOT NULL DEFAULT 0,
5017 plan_content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
5018 plan_api_tokens_total INTEGER NOT NULL DEFAULT 0,
5019 PRIMARY KEY (day_id, agent_slug, workspace_id, source_id)
5020);
5021
5022CREATE TABLE IF NOT EXISTS usage_models_daily (
5023 day_id INTEGER NOT NULL,
5024 agent_slug TEXT NOT NULL,
5025 workspace_id INTEGER NOT NULL DEFAULT 0,
5026 source_id TEXT NOT NULL DEFAULT 'local',
5027 model_family TEXT NOT NULL DEFAULT 'unknown',
5028 model_tier TEXT NOT NULL DEFAULT 'unknown',
5029 message_count INTEGER NOT NULL DEFAULT 0,
5030 user_message_count INTEGER NOT NULL DEFAULT 0,
5031 assistant_message_count INTEGER NOT NULL DEFAULT 0,
5032 tool_call_count INTEGER NOT NULL DEFAULT 0,
5033 plan_message_count INTEGER NOT NULL DEFAULT 0,
5034 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
5035 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
5036 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
5037 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
5038 api_tokens_total INTEGER NOT NULL DEFAULT 0,
5039 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
5040 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
5041 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
5042 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
5043 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
5044 last_updated INTEGER NOT NULL DEFAULT 0,
5045 PRIMARY KEY (day_id, agent_slug, workspace_id, source_id, model_family, model_tier)
5046);
5047
5048CREATE INDEX IF NOT EXISTS idx_mm_hour ON message_metrics(hour_id);
5049CREATE INDEX IF NOT EXISTS idx_mm_day ON message_metrics(day_id);
5050CREATE INDEX IF NOT EXISTS idx_mm_agent_hour ON message_metrics(agent_slug, hour_id);
5051CREATE INDEX IF NOT EXISTS idx_mm_agent_day ON message_metrics(agent_slug, day_id);
5052CREATE INDEX IF NOT EXISTS idx_mm_workspace_hour ON message_metrics(workspace_id, hour_id);
5053CREATE INDEX IF NOT EXISTS idx_mm_source_hour ON message_metrics(source_id, hour_id);
5054CREATE INDEX IF NOT EXISTS idx_mm_model_family_day ON message_metrics(model_family, day_id);
5055CREATE INDEX IF NOT EXISTS idx_mm_provider_day ON message_metrics(provider, day_id);
5056CREATE INDEX IF NOT EXISTS idx_uh_agent ON usage_hourly(agent_slug, hour_id);
5057CREATE INDEX IF NOT EXISTS idx_uh_workspace ON usage_hourly(workspace_id, hour_id);
5058CREATE INDEX IF NOT EXISTS idx_uh_source ON usage_hourly(source_id, hour_id);
5059CREATE INDEX IF NOT EXISTS idx_ud_agent ON usage_daily(agent_slug, day_id);
5060CREATE INDEX IF NOT EXISTS idx_ud_workspace ON usage_daily(workspace_id, day_id);
5061CREATE INDEX IF NOT EXISTS idx_ud_source ON usage_daily(source_id, day_id);
5062CREATE INDEX IF NOT EXISTS idx_umd_model_day ON usage_models_daily(model_family, day_id);
5063CREATE INDEX IF NOT EXISTS idx_umd_agent_day ON usage_models_daily(agent_slug, day_id);
5064CREATE INDEX IF NOT EXISTS idx_umd_workspace_day ON usage_models_daily(workspace_id, day_id);
5065CREATE INDEX IF NOT EXISTS idx_umd_source_day ON usage_models_daily(source_id, day_id);
5066";
5067
5068const CURRENT_SCHEMA_REPAIR_BATCHES: &[SchemaRepairBatch] = &[
5069 SchemaRepairBatch {
5070 name: "sources",
5071 tables: &["sources"],
5072 sql: CURRENT_SCHEMA_REPAIR_SOURCES_SQL,
5073 },
5074 SchemaRepairBatch {
5075 name: "daily_stats",
5076 tables: &["daily_stats"],
5077 sql: CURRENT_SCHEMA_REPAIR_DAILY_STATS_SQL,
5078 },
5079 SchemaRepairBatch {
5080 name: "conversation_external_lookup",
5081 tables: &["conversation_external_lookup"],
5082 sql: CURRENT_SCHEMA_REPAIR_CONVERSATION_EXTERNAL_LOOKUP_SQL,
5083 },
5084 SchemaRepairBatch {
5085 name: "conversation_external_tail_lookup",
5086 tables: &[
5087 "conversation_tail_state",
5088 "conversation_external_tail_lookup",
5089 ],
5090 sql: CURRENT_SCHEMA_REPAIR_CONVERSATION_EXTERNAL_TAIL_LOOKUP_SQL,
5091 },
5092 SchemaRepairBatch {
5093 name: "embedding_jobs",
5094 tables: &["embedding_jobs"],
5095 sql: CURRENT_SCHEMA_REPAIR_EMBEDDING_JOBS_SQL,
5096 },
5097 SchemaRepairBatch {
5098 name: "token_analytics",
5099 tables: &["token_usage", "token_daily_stats", "model_pricing"],
5100 sql: CURRENT_SCHEMA_REPAIR_TOKEN_ANALYTICS_SQL,
5101 },
5102 SchemaRepairBatch {
5103 name: "message_rollups",
5104 tables: &[
5105 "message_metrics",
5106 "usage_hourly",
5107 "usage_daily",
5108 "usage_models_daily",
5109 ],
5110 sql: CURRENT_SCHEMA_REPAIR_MESSAGE_METRICS_SQL,
5111 },
5112];
5113
5114fn current_schema_repair_batches_for_missing_tables(
5115 missing_tables: &[&'static str],
5116) -> Result<Vec<&'static SchemaRepairBatch>> {
5117 let missing_set: HashSet<&'static str> = missing_tables.iter().copied().collect();
5118 let mut selected_batches = Vec::new();
5119 let mut covered_tables = HashSet::new();
5120
5121 for batch in CURRENT_SCHEMA_REPAIR_BATCHES {
5122 if !batch
5123 .tables
5124 .iter()
5125 .any(|table_name| missing_set.contains(table_name))
5126 {
5127 continue;
5128 }
5129 selected_batches.push(batch);
5130 covered_tables.extend(batch.tables.iter().copied());
5131 }
5132
5133 for &table_name in missing_tables {
5134 if !covered_tables.contains(table_name) {
5135 return Err(anyhow!(
5136 "no current-schema repair batch registered for missing table {table_name}"
5137 ));
5138 }
5139 }
5140
5141 Ok(selected_batches)
5142}
5143
5144const MIGRATION_NAMES: [(i64, &str); 20] = [
5146 (1, "core_tables"),
5147 (2, "fts_messages"),
5148 (3, "fts_messages_rebuild"),
5149 (4, "sources"),
5150 (5, "provenance_columns"),
5151 (6, "source_path_index"),
5152 (7, "msgpack_columns"),
5153 (8, "daily_stats"),
5154 (9, "embedding_jobs"),
5155 (10, "token_analytics"),
5156 (11, "message_metrics"),
5157 (12, "model_dimensions"),
5158 (13, "plan_token_rollups"),
5159 (14, "fts_contentless"),
5160 (15, "conversation_tail_state_cache"),
5161 (16, "drop_redundant_message_conv_idx"),
5162 (17, "drop_message_created_idx"),
5163 (18, "conversation_tail_state_hot_table"),
5164 (19, "conversation_external_lookup"),
5165 (20, "conversation_external_tail_lookup"),
5166];
5167
5168fn transition_from_meta_version(conn: &FrankenConnection) -> Result<()> {
5187 if conn
5191 .query("SELECT version FROM \"_schema_migrations\";")
5192 .is_ok()
5193 {
5194 return Ok(());
5195 }
5196
5197 if conn.query("SELECT key FROM meta;").is_err() {
5199 return Ok(());
5201 }
5202
5203 let rows = conn
5205 .query("SELECT value FROM meta WHERE key = 'schema_version';")
5206 .with_context(|| "reading schema_version from meta")?;
5207
5208 let current_version: i64 = rows
5209 .first()
5210 .and_then(|row| row.get_typed::<String>(0).ok())
5211 .and_then(|s| s.parse().ok())
5212 .unwrap_or(0);
5213
5214 if current_version == 0 {
5215 if conn.query("SELECT id FROM conversations LIMIT 1;").is_err() {
5217 return Ok(());
5219 }
5220
5221 info!("meta.schema_version=0 but tables exist; skipping transition (corrupted state)");
5224 return Ok(());
5225 }
5226
5227 info!(
5229 current_version,
5230 "transitioning schema tracking from meta table to _schema_migrations"
5231 );
5232
5233 conn.execute(
5234 "CREATE TABLE IF NOT EXISTS _schema_migrations (\
5235 version INTEGER PRIMARY KEY, \
5236 name TEXT NOT NULL, \
5237 applied_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now'))\
5238 );",
5239 )
5240 .with_context(|| "creating _schema_migrations table for transition")?;
5241
5242 let backfill_through_version = if (10..13).contains(¤t_version) {
5243 13
5244 } else {
5245 current_version
5246 };
5247
5248 for &(version, name) in &MIGRATION_NAMES {
5249 if version > backfill_through_version {
5250 break;
5251 }
5252 conn.execute_compat(
5253 "INSERT INTO _schema_migrations (version, name) VALUES (?1, ?2);",
5254 &[ParamValue::from(version), ParamValue::from(name)],
5255 )
5256 .with_context(|| format!("backfilling _schema_migrations version {version}"))?;
5257 }
5258
5259 info!(
5260 current_version,
5261 backfill_through_version,
5262 "schema version transition complete: backfilled legacy meta schema versions"
5263 );
5264
5265 Ok(())
5266}
5267
5268const REQUIRED_CURRENT_SCHEMA_TABLE_PROBES: &[(&str, &str)] = &[
5269 ("sources", "SELECT id FROM sources LIMIT 1;"),
5270 ("daily_stats", "SELECT day_id FROM daily_stats LIMIT 1;"),
5271 (
5272 "conversation_external_lookup",
5273 "SELECT lookup_key FROM conversation_external_lookup LIMIT 1;",
5274 ),
5275 (
5276 "conversation_tail_state",
5277 "SELECT conversation_id FROM conversation_tail_state LIMIT 1;",
5278 ),
5279 (
5280 "conversation_external_tail_lookup",
5281 "SELECT lookup_key, last_message_idx FROM conversation_external_tail_lookup LIMIT 1;",
5282 ),
5283 ("embedding_jobs", "SELECT id FROM embedding_jobs LIMIT 1;"),
5284 ("token_usage", "SELECT id FROM token_usage LIMIT 1;"),
5285 (
5286 "token_daily_stats",
5287 "SELECT day_id FROM token_daily_stats LIMIT 1;",
5288 ),
5289 (
5290 "model_pricing",
5291 "SELECT model_pattern FROM model_pricing LIMIT 1;",
5292 ),
5293 (
5294 "message_metrics",
5295 "SELECT message_id FROM message_metrics LIMIT 1;",
5296 ),
5297 ("usage_hourly", "SELECT hour_id FROM usage_hourly LIMIT 1;"),
5298 ("usage_daily", "SELECT day_id FROM usage_daily LIMIT 1;"),
5299 (
5300 "usage_models_daily",
5301 "SELECT day_id FROM usage_models_daily LIMIT 1;",
5302 ),
5303];
5304
5305const REQUIRED_CONVERSATION_TOKEN_COLUMNS: &[(&str, &str)] = &[
5306 ("total_input_tokens", "INTEGER"),
5307 ("total_output_tokens", "INTEGER"),
5308 ("total_cache_read_tokens", "INTEGER"),
5309 ("total_cache_creation_tokens", "INTEGER"),
5310 ("grand_total_tokens", "INTEGER"),
5311 ("estimated_cost_usd", "REAL"),
5312 ("primary_model", "TEXT"),
5313 ("api_call_count", "INTEGER"),
5314 ("tool_call_count", "INTEGER"),
5315 ("user_message_count", "INTEGER"),
5316 ("assistant_message_count", "INTEGER"),
5317];
5318
5319fn error_indicates_missing_table(err: &impl std::fmt::Display) -> bool {
5320 err.to_string()
5321 .to_ascii_lowercase()
5322 .contains("no such table")
5323}
5324
5325fn error_indicates_missing_column(err: &impl std::fmt::Display) -> bool {
5326 err.to_string()
5327 .to_ascii_lowercase()
5328 .contains("no such column")
5329}
5330
5331const ORPHAN_FK_ID_CHUNK_SIZE: usize = 256;
5332
5333fn collect_orphan_message_ids(conn: &FrankenConnection) -> Result<Vec<i64>> {
5334 let min_conversation_id = conn
5335 .query_map_collect(
5336 "SELECT conversation_id
5337 FROM messages
5338 ORDER BY conversation_id ASC
5339 LIMIT 1",
5340 fparams![],
5341 |row| row.get_typed(0),
5342 )
5343 .context("finding minimum message conversation id for orphan FK cleanup")?
5344 .into_iter()
5345 .next();
5346 let Some(min_conversation_id) = min_conversation_id else {
5347 return Ok(Vec::new());
5348 };
5349 let max_conversation_id: i64 = conn
5350 .query_row_map(
5351 "SELECT conversation_id
5352 FROM messages
5353 ORDER BY conversation_id DESC
5354 LIMIT 1",
5355 fparams![],
5356 |row| row.get_typed(0),
5357 )
5358 .context("finding maximum message conversation id for orphan FK cleanup")?;
5359
5360 let parent_conversation_ids: Vec<i64> = conn
5361 .query_map_collect(
5362 "SELECT id
5363 FROM conversations
5364 WHERE id BETWEEN ?1 AND ?2
5365 ORDER BY id",
5366 fparams![min_conversation_id, max_conversation_id],
5367 |row| row.get_typed(0),
5368 )
5369 .context("listing parent conversation ids for orphan FK cleanup")?;
5370
5371 let mut message_ids = Vec::new();
5372 let mut gap_start = min_conversation_id;
5373 for parent_id in parent_conversation_ids {
5374 if parent_id < gap_start {
5375 continue;
5376 }
5377 if parent_id > max_conversation_id {
5378 break;
5379 }
5380 if gap_start < parent_id {
5381 collect_message_ids_for_conversation_gap(
5382 conn,
5383 gap_start,
5384 parent_id.saturating_sub(1),
5385 &mut message_ids,
5386 )?;
5387 }
5388 if parent_id == i64::MAX {
5389 return Ok(message_ids);
5390 }
5391 gap_start = parent_id + 1;
5392 }
5393 if gap_start <= max_conversation_id {
5394 collect_message_ids_for_conversation_gap(
5395 conn,
5396 gap_start,
5397 max_conversation_id,
5398 &mut message_ids,
5399 )?;
5400 }
5401
5402 Ok(message_ids)
5403}
5404
5405fn collect_message_ids_for_conversation_gap(
5406 conn: &FrankenConnection,
5407 gap_start: i64,
5408 gap_end: i64,
5409 message_ids: &mut Vec<i64>,
5410) -> Result<()> {
5411 let (sql, params) = if gap_start == gap_end {
5412 (
5413 "SELECT id FROM messages WHERE conversation_id = ?1",
5414 vec![SqliteValue::from(gap_start)],
5415 )
5416 } else {
5417 (
5418 "SELECT id FROM messages WHERE conversation_id BETWEEN ?1 AND ?2",
5419 vec![SqliteValue::from(gap_start), SqliteValue::from(gap_end)],
5420 )
5421 };
5422 let rows = conn.query_with_params(sql, ¶ms).with_context(|| {
5423 format!("listing orphan message ids for conversation-id gap {gap_start}..={gap_end}")
5424 })?;
5425 message_ids.reserve(rows.len());
5426 for row in rows {
5427 message_ids.push(row.get_typed(0)?);
5428 }
5429 Ok(())
5430}
5431
5432fn delete_rows_by_i64_chunks(
5433 tx: &FrankenTransaction<'_>,
5434 delete_many_sql_prefix: &'static str,
5435 ids: &[i64],
5436) -> Result<usize> {
5437 if ids.is_empty() {
5438 return Ok(0);
5439 }
5440
5441 let full_chunk_sql = delete_rows_by_i64_sql(delete_many_sql_prefix, ORPHAN_FK_ID_CHUNK_SIZE);
5442 let tail_len = ids.len() % ORPHAN_FK_ID_CHUNK_SIZE;
5443 let tail_sql =
5444 (tail_len != 0).then(|| delete_rows_by_i64_sql(delete_many_sql_prefix, tail_len));
5445
5446 let mut deleted = 0;
5447 for chunk in ids.chunks(ORPHAN_FK_ID_CHUNK_SIZE) {
5448 let sql = if chunk.len() == ORPHAN_FK_ID_CHUNK_SIZE {
5449 &full_chunk_sql
5450 } else {
5451 tail_sql.as_ref().unwrap_or(&full_chunk_sql)
5452 };
5453 let params = chunk
5454 .iter()
5455 .map(|id| SqliteValue::from(*id))
5456 .collect::<Vec<_>>();
5457 deleted += tx.execute_with_params(sql, ¶ms)?;
5458 }
5459 Ok(deleted)
5460}
5461
5462fn delete_rows_by_i64_sql(delete_many_sql_prefix: &'static str, count: usize) -> String {
5463 let placeholders = sql_placeholders(count);
5464 format!("{delete_many_sql_prefix} ({placeholders})")
5465}
5466
5467fn sql_placeholders(count: usize) -> String {
5468 vec!["?"; count].join(", ")
5469}
5470
5471fn delete_orphan_message_ids_bisecting_oom(conn: &FrankenConnection, ids: &[i64]) -> Result<usize> {
5472 let mut deleted = 0usize;
5473 for chunk in ids.chunks(ORPHAN_FK_ID_CHUNK_SIZE) {
5474 deleted = deleted.saturating_add(delete_orphan_message_id_chunk(conn, chunk)?);
5475 }
5476 Ok(deleted)
5477}
5478
5479fn delete_orphan_message_id_chunk(conn: &FrankenConnection, ids: &[i64]) -> Result<usize> {
5480 if ids.is_empty() {
5481 return Ok(0);
5482 }
5483
5484 match delete_orphan_message_id_chunk_once(conn, ids) {
5485 Ok(deleted) => Ok(deleted),
5486 Err(err) if is_out_of_memory_error(&err) && ids.len() > 1 => {
5487 let split_at = ids.len() / 2;
5488 tracing::warn!(
5489 target: "cass::fk_repair",
5490 rows = ids.len(),
5491 left = split_at,
5492 right = ids.len().saturating_sub(split_at),
5493 error = %err,
5494 "orphan-message cleanup ran out of memory; retrying as smaller batches"
5495 );
5496 let left = delete_orphan_message_id_chunk(conn, &ids[..split_at])?;
5497 let right = delete_orphan_message_id_chunk(conn, &ids[split_at..])?;
5498 Ok(left.saturating_add(right))
5499 }
5500 Err(err) => Err(err),
5501 }
5502}
5503
5504fn delete_orphan_message_id_chunk_once(conn: &FrankenConnection, ids: &[i64]) -> Result<usize> {
5505 let mut tx = conn.transaction()?;
5506 let mut deleted = 0usize;
5507 for entry in ORPHAN_MESSAGE_DEPENDENT_TABLES {
5508 match delete_rows_by_i64_chunks(&tx, entry.delete_many_sql_prefix, ids) {
5509 Ok(count) => {
5510 deleted = deleted.saturating_add(count);
5511 }
5512 Err(err) if error_indicates_missing_table(&err) => {
5513 tracing::debug!(
5514 target: "cass::fk_repair",
5515 child_table = entry.child_table,
5516 error = %err,
5517 "skipping orphan-message dependent cleanup (table unavailable)"
5518 );
5519 }
5520 Err(err) => {
5521 return Err(err).with_context(|| {
5522 format!(
5523 "deleting rows from {} that depend on orphan messages",
5524 entry.child_table
5525 )
5526 });
5527 }
5528 }
5529 }
5530 deleted = deleted.saturating_add(
5531 delete_rows_by_i64_chunks(&tx, "DELETE FROM messages WHERE id IN", ids)
5532 .context("deleting orphan rows from messages")?,
5533 );
5534 tx.commit()?;
5535 Ok(deleted)
5536}
5537
5538fn collect_direct_orphan_id_page(
5539 conn: &FrankenConnection,
5540 entry: &'static OrphanFkTable,
5541) -> Result<Vec<i64>> {
5542 Ok(conn.query_map_collect(
5543 entry.orphan_id_page_sql,
5544 fparams![i64::try_from(ORPHAN_FK_ID_CHUNK_SIZE).unwrap_or(i64::MAX)],
5545 |row| row.get_typed(0),
5546 )?)
5547}
5548
5549fn delete_direct_orphan_ids_bisecting_oom(
5550 conn: &FrankenConnection,
5551 entry: &'static OrphanFkTable,
5552 ids: &[i64],
5553) -> Result<usize> {
5554 let mut deleted = 0usize;
5555 for chunk in ids.chunks(ORPHAN_FK_ID_CHUNK_SIZE) {
5556 deleted = deleted.saturating_add(delete_direct_orphan_id_chunk(conn, entry, chunk)?);
5557 }
5558 Ok(deleted)
5559}
5560
5561fn delete_direct_orphan_id_chunk(
5562 conn: &FrankenConnection,
5563 entry: &'static OrphanFkTable,
5564 ids: &[i64],
5565) -> Result<usize> {
5566 if ids.is_empty() {
5567 return Ok(0);
5568 }
5569
5570 match delete_direct_orphan_id_chunk_once(conn, entry, ids) {
5571 Ok(deleted) => Ok(deleted),
5572 Err(err) if is_out_of_memory_error(&err) && ids.len() > 1 => {
5573 let split_at = ids.len() / 2;
5574 tracing::warn!(
5575 target: "cass::fk_repair",
5576 child_table = entry.child_table,
5577 rows = ids.len(),
5578 left = split_at,
5579 right = ids.len().saturating_sub(split_at),
5580 error = %err,
5581 "direct orphan cleanup ran out of memory; retrying as smaller batches"
5582 );
5583 let left = delete_direct_orphan_id_chunk(conn, entry, &ids[..split_at])?;
5584 let right = delete_direct_orphan_id_chunk(conn, entry, &ids[split_at..])?;
5585 Ok(left.saturating_add(right))
5586 }
5587 Err(err) => Err(err),
5588 }
5589}
5590
5591fn delete_direct_orphan_id_chunk_once(
5592 conn: &FrankenConnection,
5593 entry: &'static OrphanFkTable,
5594 ids: &[i64],
5595) -> Result<usize> {
5596 let mut tx = conn.transaction()?;
5597 let deleted = delete_rows_by_i64_chunks(&tx, entry.delete_many_sql_prefix, ids)?;
5598 tx.commit()?;
5599 Ok(deleted)
5600}
5601
5602struct OrphanFkTable {
5608 child_table: &'static str,
5609 orphan_id_page_sql: &'static str,
5610 delete_many_sql_prefix: &'static str,
5611}
5612
5613const ORPHAN_DIRECT_CHILD_TABLES: &[OrphanFkTable] = &[
5614 OrphanFkTable {
5615 child_table: "message_metrics",
5616 orphan_id_page_sql: "SELECT message_id FROM message_metrics \
5617 WHERE NOT EXISTS (\
5618 SELECT 1 FROM messages \
5619 WHERE messages.id = message_metrics.message_id\
5620 ) \
5621 ORDER BY message_id \
5622 LIMIT ?1",
5623 delete_many_sql_prefix: "DELETE FROM message_metrics WHERE message_id IN",
5624 },
5625 OrphanFkTable {
5626 child_table: "token_usage",
5627 orphan_id_page_sql: "SELECT message_id FROM token_usage \
5628 WHERE NOT EXISTS (\
5629 SELECT 1 FROM messages \
5630 WHERE messages.id = token_usage.message_id\
5631 ) \
5632 ORDER BY message_id \
5633 LIMIT ?1",
5634 delete_many_sql_prefix: "DELETE FROM token_usage WHERE message_id IN",
5635 },
5636 OrphanFkTable {
5637 child_table: "snippets",
5638 orphan_id_page_sql: "SELECT message_id FROM snippets \
5639 WHERE NOT EXISTS (\
5640 SELECT 1 FROM messages \
5641 WHERE messages.id = snippets.message_id\
5642 ) \
5643 ORDER BY message_id \
5644 LIMIT ?1",
5645 delete_many_sql_prefix: "DELETE FROM snippets WHERE message_id IN",
5646 },
5647 OrphanFkTable {
5648 child_table: "conversation_tags",
5649 orphan_id_page_sql: "SELECT conversation_id FROM conversation_tags \
5650 WHERE NOT EXISTS (\
5651 SELECT 1 FROM conversations \
5652 WHERE conversations.id = conversation_tags.conversation_id\
5653 ) \
5654 ORDER BY conversation_id \
5655 LIMIT ?1",
5656 delete_many_sql_prefix: "DELETE FROM conversation_tags WHERE conversation_id IN",
5657 },
5658];
5659
5660struct OrphanMessageDependentTable {
5661 child_table: &'static str,
5662 delete_many_sql_prefix: &'static str,
5663}
5664
5665const ORPHAN_MESSAGE_DEPENDENT_TABLES: &[OrphanMessageDependentTable] = &[
5666 OrphanMessageDependentTable {
5667 child_table: "message_metrics",
5668 delete_many_sql_prefix: "DELETE FROM message_metrics WHERE message_id IN",
5669 },
5670 OrphanMessageDependentTable {
5671 child_table: "token_usage",
5672 delete_many_sql_prefix: "DELETE FROM token_usage WHERE message_id IN",
5673 },
5674 OrphanMessageDependentTable {
5675 child_table: "snippets",
5676 delete_many_sql_prefix: "DELETE FROM snippets WHERE message_id IN",
5677 },
5678];
5679
5680#[derive(Debug, Default, Clone)]
5691pub(crate) struct OrphanFkCleanupReport {
5692 pub total: i64,
5693 pub per_table: Vec<(&'static str, i64)>,
5694}
5695
5696impl OrphanFkCleanupReport {
5697 fn record(&mut self, child_table: &'static str, count: i64) {
5698 if let Some((_, existing)) = self
5699 .per_table
5700 .iter_mut()
5701 .find(|(table, _)| *table == child_table)
5702 {
5703 *existing = existing.saturating_add(count);
5704 } else {
5705 self.per_table.push((child_table, count));
5706 }
5707 self.total = self.total.saturating_add(count);
5708 }
5709}
5710
5711pub struct InsertOutcome {
5712 pub conversation_id: i64,
5713 pub conversation_inserted: bool,
5714 pub inserted_indices: Vec<i64>,
5715}
5716
5717#[cfg(test)]
5718#[derive(Debug, Clone, Default)]
5719struct MessageInsertSubstageProfile {
5720 single_row_calls: usize,
5721 batch_calls: usize,
5722 batch_rows: usize,
5723 payload_duration: Duration,
5724 sql_build_duration: Duration,
5725 param_build_duration: Duration,
5726 execute_duration: Duration,
5727 rowid_duration: Duration,
5728}
5729
5730#[cfg(test)]
5731#[derive(Debug, Clone, Default)]
5732struct InsertConversationTreePerfProfile {
5733 invocations: usize,
5734 messages: usize,
5735 inserted_messages: usize,
5736 total_duration: Duration,
5737 source_duration: Duration,
5738 tx_open_duration: Duration,
5739 existing_lookup_duration: Duration,
5740 existing_idx_lookup_duration: Duration,
5741 existing_replay_lookup_duration: Duration,
5742 dedupe_filter_duration: Duration,
5743 conversation_row_duration: Duration,
5744 message_insert_duration: Duration,
5745 message_insert_breakdown: MessageInsertSubstageProfile,
5746 snippet_insert_duration: Duration,
5747 fts_entry_duration: Duration,
5748 fts_flush_duration: Duration,
5749 analytics_duration: Duration,
5750 commit_duration: Duration,
5751}
5752
5753#[cfg(test)]
5754impl InsertConversationTreePerfProfile {
5755 fn millis(duration: Duration) -> f64 {
5756 duration.as_secs_f64() * 1000.0
5757 }
5758
5759 fn log_summary(&self, label: &str) {
5760 let calls = self.invocations.max(1) as f64;
5761 let accounted_duration = self.source_duration
5762 + self.tx_open_duration
5763 + self.existing_lookup_duration
5764 + self.existing_idx_lookup_duration
5765 + self.existing_replay_lookup_duration
5766 + self.dedupe_filter_duration
5767 + self.conversation_row_duration
5768 + self.message_insert_duration
5769 + self.snippet_insert_duration
5770 + self.fts_entry_duration
5771 + self.fts_flush_duration
5772 + self.analytics_duration
5773 + self.commit_duration;
5774 let residual_duration = self.total_duration.saturating_sub(accounted_duration);
5775 eprintln!(
5776 concat!(
5777 "CASS_INSERT_TREE_STAGE_PROFILE ",
5778 "label={} calls={} messages={} inserted_messages={} ",
5779 "total_ms={:.3} source_ms={:.3} tx_open_ms={:.3} existing_lookup_ms={:.3} ",
5780 "existing_idx_lookup_ms={:.3} existing_replay_lookup_ms={:.3} dedupe_filter_ms={:.3} ",
5781 "conversation_row_ms={:.3} message_insert_ms={:.3} snippet_insert_ms={:.3} ",
5782 "fts_entry_ms={:.3} fts_flush_ms={:.3} analytics_ms={:.3} commit_ms={:.3} ",
5783 "msg_payload_ms={:.3} msg_sql_ms={:.3} msg_param_ms={:.3} msg_execute_ms={:.3} msg_rowid_ms={:.3} ",
5784 "residual_ms={:.3} avg_total_ms={:.3} avg_message_insert_ms={:.3} ",
5785 "avg_msg_execute_ms={:.3} avg_msg_payload_ms={:.3} avg_snippet_insert_ms={:.3} avg_fts_entry_ms={:.3} avg_commit_ms={:.3}"
5786 ),
5787 label,
5788 self.invocations,
5789 self.messages,
5790 self.inserted_messages,
5791 Self::millis(self.total_duration),
5792 Self::millis(self.source_duration),
5793 Self::millis(self.tx_open_duration),
5794 Self::millis(self.existing_lookup_duration),
5795 Self::millis(self.existing_idx_lookup_duration),
5796 Self::millis(self.existing_replay_lookup_duration),
5797 Self::millis(self.dedupe_filter_duration),
5798 Self::millis(self.conversation_row_duration),
5799 Self::millis(self.message_insert_duration),
5800 Self::millis(self.snippet_insert_duration),
5801 Self::millis(self.fts_entry_duration),
5802 Self::millis(self.fts_flush_duration),
5803 Self::millis(self.analytics_duration),
5804 Self::millis(self.commit_duration),
5805 Self::millis(self.message_insert_breakdown.payload_duration),
5806 Self::millis(self.message_insert_breakdown.sql_build_duration),
5807 Self::millis(self.message_insert_breakdown.param_build_duration),
5808 Self::millis(self.message_insert_breakdown.execute_duration),
5809 Self::millis(self.message_insert_breakdown.rowid_duration),
5810 Self::millis(residual_duration),
5811 Self::millis(self.total_duration) / calls,
5812 Self::millis(self.message_insert_duration) / calls,
5813 Self::millis(self.message_insert_breakdown.execute_duration) / calls,
5814 Self::millis(self.message_insert_breakdown.payload_duration) / calls,
5815 Self::millis(self.snippet_insert_duration) / calls,
5816 Self::millis(self.fts_entry_duration) / calls,
5817 Self::millis(self.commit_duration) / calls,
5818 );
5819 }
5820}
5821
5822#[derive(Debug, Clone, PartialEq, Eq, Hash)]
5823enum PendingConversationKey {
5824 External {
5825 source_id: String,
5826 agent_id: i64,
5827 external_id: String,
5828 },
5829 SourcePath {
5830 source_id: String,
5831 agent_id: i64,
5832 source_path: String,
5833 started_at: Option<i64>,
5834 },
5835}
5836
5837fn conversation_external_lookup_key(source_id: &str, agent_id: i64, external_id: &str) -> String {
5838 format!(
5839 "{}:{source_id}:{agent_id}:{}:{external_id}",
5840 source_id.chars().count(),
5841 external_id.chars().count()
5842 )
5843}
5844
5845fn conversation_external_lookup_key_for_conv(agent_id: i64, conv: &Conversation) -> Option<String> {
5846 conv.external_id
5847 .as_deref()
5848 .map(|external_id| conversation_external_lookup_key(&conv.source_id, agent_id, external_id))
5849}
5850
5851#[derive(Debug, Clone, PartialEq, Eq, Hash)]
5852struct MessageMergeFingerprint {
5853 idx: i64,
5854 created_at: Option<i64>,
5855 role: MessageRole,
5856 author: Option<String>,
5857 content_hash: [u8; 32],
5858}
5859
5860#[derive(Debug, Clone, PartialEq, Eq, Hash)]
5861struct MessageReplayFingerprint {
5862 created_at: Option<i64>,
5863 role: MessageRole,
5864 author: Option<String>,
5865 content_hash: [u8; 32],
5866}
5867
5868#[derive(Debug, Clone, Copy, PartialEq, Eq)]
5869struct ConversationMergeEvidence {
5870 exact_overlap: usize,
5871 replay_overlap: usize,
5872 smaller_replay_set: usize,
5873 started_close: bool,
5874 start_distance_ms: i64,
5875}
5876
5877struct ExistingConversationNewMessages<'a> {
5878 messages: Vec<&'a Message>,
5879 new_chars: i64,
5880 idx_collision_count: usize,
5881 first_collision_idx: Option<i64>,
5882}
5883
5884#[derive(Debug, Clone, Copy)]
5885struct ExistingConversationTailState {
5886 last_message_idx: i64,
5887 last_message_created_at: i64,
5888 ended_at: Option<i64>,
5889}
5890
5891#[derive(Debug, Clone, Copy)]
5892struct ExistingConversationWithTail {
5893 id: i64,
5894 tail_state: Option<ExistingConversationTailState>,
5895}
5896
5897fn conversation_effective_started_at(conv: &Conversation) -> Option<i64> {
5898 conv.started_at
5899 .or_else(|| conv.messages.iter().filter_map(|msg| msg.created_at).min())
5900}
5901
5902fn conversation_tail_state(conv: &Conversation) -> (Option<i64>, Option<i64>) {
5903 (
5904 conv.messages.iter().map(|msg| msg.idx).max(),
5905 conv.messages.iter().filter_map(|msg| msg.created_at).max(),
5906 )
5907}
5908
5909fn borrowed_messages_tail_state(messages: &[&Message]) -> (Option<i64>, Option<i64>) {
5910 (
5911 messages.iter().map(|msg| msg.idx).max(),
5912 messages.iter().filter_map(|msg| msg.created_at).max(),
5913 )
5914}
5915
5916fn role_from_str(role: &str) -> MessageRole {
5917 match role {
5918 "user" => MessageRole::User,
5919 "agent" | "assistant" => MessageRole::Agent,
5920 "tool" => MessageRole::Tool,
5921 "system" => MessageRole::System,
5922 other => MessageRole::Other(other.to_string()),
5923 }
5924}
5925
5926fn message_merge_fingerprint(msg: &Message) -> MessageMergeFingerprint {
5927 MessageMergeFingerprint {
5928 idx: msg.idx,
5929 created_at: msg.created_at,
5930 role: msg.role.clone(),
5931 author: msg.author.clone(),
5932 content_hash: *blake3::hash(msg.content.as_bytes()).as_bytes(),
5933 }
5934}
5935
5936fn message_replay_fingerprint(msg: &Message) -> MessageReplayFingerprint {
5937 MessageReplayFingerprint {
5938 created_at: msg.created_at,
5939 role: msg.role.clone(),
5940 author: msg.author.clone(),
5941 content_hash: *blake3::hash(msg.content.as_bytes()).as_bytes(),
5942 }
5943}
5944
5945fn conversation_message_fingerprints(conv: &Conversation) -> HashSet<MessageMergeFingerprint> {
5946 conv.messages
5947 .iter()
5948 .map(message_merge_fingerprint)
5949 .collect()
5950}
5951
5952fn conversation_message_replay_fingerprints(
5953 conv: &Conversation,
5954) -> HashSet<MessageReplayFingerprint> {
5955 conv.messages
5956 .iter()
5957 .map(message_replay_fingerprint)
5958 .collect()
5959}
5960
5961fn replay_fingerprint_from_merge(
5962 fingerprint: &MessageMergeFingerprint,
5963) -> MessageReplayFingerprint {
5964 MessageReplayFingerprint {
5965 created_at: fingerprint.created_at,
5966 role: fingerprint.role.clone(),
5967 author: fingerprint.author.clone(),
5968 content_hash: fingerprint.content_hash,
5969 }
5970}
5971
5972fn replay_fingerprints_from_merge_set(
5973 fingerprints: &HashSet<MessageMergeFingerprint>,
5974) -> HashSet<MessageReplayFingerprint> {
5975 fingerprints
5976 .iter()
5977 .map(replay_fingerprint_from_merge)
5978 .collect()
5979}
5980
5981fn collect_new_messages_for_existing_conversation<'a>(
5982 conversation_id: i64,
5983 conv: &'a Conversation,
5984 existing_messages: &mut HashMap<i64, MessageMergeFingerprint>,
5985 existing_replay_fingerprints: &mut HashSet<MessageReplayFingerprint>,
5986 replay_skip_log: &'static str,
5987) -> ExistingConversationNewMessages<'a> {
5988 let mut idx_collision_count = 0usize;
5989 let mut first_collision_idx: Option<i64> = None;
5990 let mut new_chars: i64 = 0;
5991 let mut messages = Vec::new();
5992
5993 for msg in &conv.messages {
5994 let incoming_fingerprint = message_merge_fingerprint(msg);
5995 if let Some(existing_fingerprint) = existing_messages.get(&msg.idx) {
5996 if existing_fingerprint != &incoming_fingerprint {
5997 idx_collision_count = idx_collision_count.saturating_add(1);
5998 first_collision_idx.get_or_insert(msg.idx);
5999 }
6000 continue;
6001 }
6002
6003 let incoming_replay = replay_fingerprint_from_merge(&incoming_fingerprint);
6004 if existing_replay_fingerprints.contains(&incoming_replay) {
6005 tracing::debug!(
6006 conversation_id,
6007 idx = msg.idx,
6008 source_path = %conv.source_path.display(),
6009 "{replay_skip_log}"
6010 );
6011 continue;
6012 }
6013
6014 existing_messages.insert(msg.idx, incoming_fingerprint);
6015 existing_replay_fingerprints.insert(incoming_replay);
6016 new_chars += msg.content.len() as i64;
6017 messages.push(msg);
6018 }
6019
6020 ExistingConversationNewMessages {
6021 messages,
6022 new_chars,
6023 idx_collision_count,
6024 first_collision_idx,
6025 }
6026}
6027
6028fn franken_existing_conversation_append_tail_state(
6029 tx: &FrankenTransaction<'_>,
6030 conversation_id: i64,
6031) -> Result<Option<ExistingConversationTailState>> {
6032 let cached: Option<(Option<i64>, Option<i64>, Option<i64>)> = tx
6033 .query_row_map(
6034 "SELECT last_message_idx, last_message_created_at, ended_at
6035 FROM conversation_tail_state
6036 WHERE conversation_id = ?1",
6037 fparams![conversation_id],
6038 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
6039 )
6040 .optional()?;
6041 if let Some(cached) = cached {
6042 let (_, _, cached_ended_at) = cached;
6043 if let Some(tail_state) =
6044 existing_conversation_tail_state_from_cached(cached.0, cached.1, cached_ended_at)
6045 {
6046 return Ok(Some(tail_state));
6047 }
6048 }
6049
6050 let legacy_cached: (Option<i64>, Option<i64>, Option<i64>) = tx.query_row_map(
6051 "SELECT last_message_idx, last_message_created_at, ended_at
6052 FROM conversations
6053 WHERE id = ?1",
6054 fparams![conversation_id],
6055 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
6056 )?;
6057 let (_, _, cached_ended_at) = legacy_cached;
6058 if let Some(tail_state) = existing_conversation_tail_state_from_cached(
6059 legacy_cached.0,
6060 legacy_cached.1,
6061 cached_ended_at,
6062 ) {
6063 franken_insert_conversation_tail_state(
6064 tx,
6065 conversation_id,
6066 cached_ended_at,
6067 Some(tail_state.last_message_idx),
6068 Some(tail_state.last_message_created_at),
6069 )?;
6070 return Ok(Some(tail_state));
6071 }
6072
6073 let (max_idx, max_created_at): (Option<i64>, Option<i64>) = tx.query_row_map(
6074 "SELECT MAX(idx), MAX(created_at)
6075 FROM messages
6076 WHERE conversation_id = ?1",
6077 fparams![conversation_id],
6078 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
6079 )?;
6080 if let Some((last_message_idx, last_message_created_at)) = max_idx.zip(max_created_at) {
6081 franken_update_conversation_tail_state(
6082 tx,
6083 conversation_id,
6084 None,
6085 Some(last_message_idx),
6086 Some(last_message_created_at),
6087 )?;
6088 return Ok(Some(ExistingConversationTailState {
6089 last_message_idx,
6090 last_message_created_at,
6091 ended_at: cached_ended_at,
6092 }));
6093 }
6094 Ok(None)
6095}
6096
6097fn existing_conversation_tail_state_from_cached(
6098 last_message_idx: Option<i64>,
6099 last_message_created_at: Option<i64>,
6100 ended_at: Option<i64>,
6101) -> Option<ExistingConversationTailState> {
6102 let (last_message_idx, last_message_created_at) =
6103 last_message_idx.zip(last_message_created_at)?;
6104 Some(ExistingConversationTailState {
6105 last_message_idx,
6106 last_message_created_at,
6107 ended_at,
6108 })
6109}
6110
6111fn franken_find_existing_conversation_with_tail_by_key(
6112 tx: &FrankenTransaction<'_>,
6113 key: &PendingConversationKey,
6114 conv: Option<&Conversation>,
6115) -> Result<Option<ExistingConversationWithTail>> {
6116 if let PendingConversationKey::External {
6117 source_id,
6118 agent_id,
6119 external_id,
6120 } = key
6121 {
6122 let lookup_key = conversation_external_lookup_key(source_id, *agent_id, external_id);
6123 if let Some(existing) = franken_find_external_conversation_tail_lookup(tx, &lookup_key)? {
6124 return Ok(Some(existing));
6125 }
6126 return Ok(None);
6127 }
6128
6129 let Some(id) = franken_find_existing_conversation_by_key(tx, key, conv)? else {
6130 return Ok(None);
6131 };
6132 let tail_state = franken_existing_conversation_append_tail_state(tx, id)?;
6133 Ok(Some(ExistingConversationWithTail { id, tail_state }))
6134}
6135
6136fn franken_insert_conversation_tail_state(
6137 tx: &FrankenTransaction<'_>,
6138 conversation_id: i64,
6139 ended_at: Option<i64>,
6140 last_message_idx: Option<i64>,
6141 last_message_created_at: Option<i64>,
6142) -> Result<()> {
6143 if ended_at.is_none() && last_message_idx.is_none() && last_message_created_at.is_none() {
6144 return Ok(());
6145 }
6146 tx.execute_compat(
6147 "INSERT OR REPLACE INTO conversation_tail_state (
6148 conversation_id, ended_at, last_message_idx, last_message_created_at
6149 ) VALUES (?1, ?2, ?3, ?4)",
6150 fparams![
6151 conversation_id,
6152 ended_at,
6153 last_message_idx,
6154 last_message_created_at
6155 ],
6156 )?;
6157 Ok(())
6158}
6159
6160fn franken_update_conversation_tail_columns(
6161 tx: &FrankenTransaction<'_>,
6162 conversation_id: i64,
6163 ended_at_candidate: Option<i64>,
6164 last_message_idx_candidate: Option<i64>,
6165 last_message_created_at_candidate: Option<i64>,
6166) -> Result<()> {
6167 if ended_at_candidate.is_none()
6168 && last_message_idx_candidate.is_none()
6169 && last_message_created_at_candidate.is_none()
6170 {
6171 return Ok(());
6172 }
6173
6174 tx.execute_compat(
6175 "UPDATE conversations
6176 SET ended_at = CASE
6177 WHEN ?1 IS NULL THEN ended_at
6178 WHEN ended_at IS NULL OR ended_at < ?1 THEN ?1
6179 ELSE ended_at
6180 END,
6181 last_message_idx = CASE
6182 WHEN ?2 IS NULL THEN last_message_idx
6183 WHEN last_message_idx IS NULL OR last_message_idx < ?2 THEN ?2
6184 ELSE last_message_idx
6185 END,
6186 last_message_created_at = CASE
6187 WHEN ?3 IS NULL THEN last_message_created_at
6188 WHEN last_message_created_at IS NULL OR last_message_created_at < ?3 THEN ?3
6189 ELSE last_message_created_at
6190 END
6191 WHERE id = ?4",
6192 fparams![
6193 ended_at_candidate,
6194 last_message_idx_candidate,
6195 last_message_created_at_candidate,
6196 conversation_id
6197 ],
6198 )?;
6199 Ok(())
6200}
6201
6202fn franken_tail_state_insert_ended_at(
6203 tx: &FrankenTransaction<'_>,
6204 conversation_id: i64,
6205 candidate: Option<i64>,
6206) -> Result<Option<i64>> {
6207 let canonical: Option<i64> = tx
6208 .query_row_map(
6209 "SELECT ended_at FROM conversations WHERE id = ?1",
6210 fparams![conversation_id],
6211 |row| row.get_typed(0),
6212 )
6213 .optional()?
6214 .flatten();
6215 Ok(canonical.max(candidate))
6216}
6217
6218fn franken_update_conversation_tail_state(
6219 tx: &FrankenTransaction<'_>,
6220 conversation_id: i64,
6221 ended_at_candidate: Option<i64>,
6222 last_message_idx_candidate: Option<i64>,
6223 last_message_created_at_candidate: Option<i64>,
6224) -> Result<()> {
6225 if ended_at_candidate.is_none()
6226 && last_message_idx_candidate.is_none()
6227 && last_message_created_at_candidate.is_none()
6228 {
6229 return Ok(());
6230 }
6231
6232 let changed = tx.execute_compat(
6233 "UPDATE conversation_tail_state
6234 SET ended_at = CASE
6235 WHEN ?1 IS NULL THEN ended_at
6236 ELSE MAX(IFNULL(ended_at, 0), ?1)
6237 END,
6238 last_message_idx = CASE
6239 WHEN ?2 IS NULL THEN last_message_idx
6240 WHEN last_message_idx IS NULL OR last_message_idx < ?2 THEN ?2
6241 ELSE last_message_idx
6242 END,
6243 last_message_created_at = CASE
6244 WHEN ?3 IS NULL THEN last_message_created_at
6245 WHEN last_message_created_at IS NULL OR last_message_created_at < ?3 THEN ?3
6246 ELSE last_message_created_at
6247 END
6248 WHERE conversation_id = ?4",
6249 fparams![
6250 ended_at_candidate,
6251 last_message_idx_candidate,
6252 last_message_created_at_candidate,
6253 conversation_id
6254 ],
6255 )?;
6256 if changed == 0 {
6257 let insert_ended_at =
6258 franken_tail_state_insert_ended_at(tx, conversation_id, ended_at_candidate)?;
6259 franken_insert_conversation_tail_state(
6260 tx,
6261 conversation_id,
6262 insert_ended_at,
6263 last_message_idx_candidate,
6264 last_message_created_at_candidate,
6265 )?;
6266 }
6267 franken_update_conversation_tail_columns(
6268 tx,
6269 conversation_id,
6270 ended_at_candidate,
6271 last_message_idx_candidate,
6272 last_message_created_at_candidate,
6273 )?;
6274 Ok(())
6275}
6276
6277fn franken_set_conversation_tail_state_after_append(
6278 tx: &FrankenTransaction<'_>,
6279 conversation_id: i64,
6280 ended_at: i64,
6281 last_message_idx: i64,
6282 last_message_created_at: i64,
6283) -> Result<()> {
6284 let changed = tx.execute_compat(
6285 "UPDATE conversation_tail_state
6286 SET ended_at = ?1,
6287 last_message_idx = ?2,
6288 last_message_created_at = ?3
6289 WHERE conversation_id = ?4",
6290 fparams![
6291 ended_at,
6292 last_message_idx,
6293 last_message_created_at,
6294 conversation_id
6295 ],
6296 )?;
6297 if changed == 0 {
6298 let insert_ended_at =
6299 franken_tail_state_insert_ended_at(tx, conversation_id, Some(ended_at))?;
6300 franken_insert_conversation_tail_state(
6301 tx,
6302 conversation_id,
6303 insert_ended_at,
6304 Some(last_message_idx),
6305 Some(last_message_created_at),
6306 )?;
6307 }
6308 franken_update_conversation_tail_columns(
6309 tx,
6310 conversation_id,
6311 Some(ended_at),
6312 Some(last_message_idx),
6313 Some(last_message_created_at),
6314 )?;
6315 Ok(())
6316}
6317
6318fn collect_append_only_tail_messages<'a>(
6319 conv: &'a Conversation,
6320 existing_max_idx: i64,
6321 existing_max_created_at: i64,
6322) -> Option<ExistingConversationNewMessages<'a>> {
6323 if conv.messages.is_empty() {
6324 return Some(ExistingConversationNewMessages {
6325 messages: Vec::new(),
6326 new_chars: 0,
6327 idx_collision_count: 0,
6328 first_collision_idx: None,
6329 });
6330 }
6331
6332 let mut split_idx = None;
6333 let mut prev_idx = None;
6334 for (pos, msg) in conv.messages.iter().enumerate() {
6335 if prev_idx.is_some_and(|prev| msg.idx < prev) {
6336 return None;
6337 }
6338 prev_idx = Some(msg.idx);
6339 if split_idx.is_none() && msg.idx > existing_max_idx {
6340 split_idx = Some(pos);
6341 }
6342 }
6343 let split_idx = split_idx?;
6344
6345 let mut seen_tail_idx = HashSet::new();
6346 let mut seen_tail_replay = HashSet::new();
6347 let mut new_chars = 0i64;
6348 let mut messages = Vec::new();
6349 for msg in &conv.messages[split_idx..] {
6350 let created_at = msg.created_at?;
6351 if created_at <= existing_max_created_at {
6352 return None;
6353 }
6354
6355 if !seen_tail_idx.insert(msg.idx) {
6356 return None;
6357 }
6358
6359 let replay_fingerprint = message_replay_fingerprint(msg);
6360 if !seen_tail_replay.insert(replay_fingerprint) {
6361 return None;
6362 }
6363
6364 new_chars += msg.content.len() as i64;
6365 messages.push(msg);
6366 }
6367
6368 Some(ExistingConversationNewMessages {
6369 messages,
6370 new_chars,
6371 idx_collision_count: 0,
6372 first_collision_idx: None,
6373 })
6374}
6375
6376fn start_distance_ms(left: Option<i64>, right: Option<i64>) -> i64 {
6377 match (left, right) {
6378 (Some(left), Some(right)) => (i128::from(left) - i128::from(right))
6379 .abs()
6380 .try_into()
6381 .unwrap_or(i64::MAX),
6382 _ => i64::MAX,
6383 }
6384}
6385
6386fn conversation_merge_evidence(
6387 incoming_exact: &HashSet<MessageMergeFingerprint>,
6388 incoming_replay: &HashSet<MessageReplayFingerprint>,
6389 existing_exact: &HashSet<MessageMergeFingerprint>,
6390 existing_replay: &HashSet<MessageReplayFingerprint>,
6391 incoming_started_at: Option<i64>,
6392 existing_started_at: Option<i64>,
6393) -> Option<ConversationMergeEvidence> {
6394 let exact_overlap = incoming_exact.intersection(existing_exact).count();
6395 let replay_overlap = incoming_replay.intersection(existing_replay).count();
6396 if exact_overlap == 0 && replay_overlap == 0 {
6397 return None;
6398 }
6399
6400 let smaller_replay_set = incoming_replay.len().min(existing_replay.len());
6401 let started_close = timestamps_within_tolerance(
6402 incoming_started_at,
6403 existing_started_at,
6404 SOURCE_PATH_MERGE_START_TOLERANCE_MS,
6405 );
6406 let full_replay_subset_match = smaller_replay_set >= 2 && replay_overlap == smaller_replay_set;
6407
6408 let merge_allowed = if started_close {
6409 exact_overlap >= 1 || replay_overlap >= 2
6410 } else {
6411 exact_overlap >= 2 || full_replay_subset_match
6412 };
6413
6414 merge_allowed.then_some(ConversationMergeEvidence {
6415 exact_overlap,
6416 replay_overlap,
6417 smaller_replay_set,
6418 started_close,
6419 start_distance_ms: start_distance_ms(incoming_started_at, existing_started_at),
6420 })
6421}
6422
6423fn timestamps_within_tolerance(left: Option<i64>, right: Option<i64>, tolerance_ms: i64) -> bool {
6424 match (left, right) {
6425 (Some(left), Some(right)) => {
6426 (i128::from(left) - i128::from(right)).abs() <= i128::from(tolerance_ms)
6427 }
6428 _ => false,
6429 }
6430}
6431
6432fn conversation_merge_key(agent_id: i64, conv: &Conversation) -> PendingConversationKey {
6433 if let Some(external_id) = conv.external_id.clone() {
6434 PendingConversationKey::External {
6435 source_id: conv.source_id.clone(),
6436 agent_id,
6437 external_id,
6438 }
6439 } else {
6440 PendingConversationKey::SourcePath {
6441 source_id: conv.source_id.clone(),
6442 agent_id,
6443 source_path: path_to_string(&conv.source_path),
6444 started_at: conversation_effective_started_at(conv),
6445 }
6446 }
6447}
6448
6449pub struct MessageForEmbedding {
6451 pub message_id: i64,
6452 pub created_at: Option<i64>,
6453 pub agent_id: i64,
6454 pub workspace_id: Option<i64>,
6455 pub source_id_hash: u32,
6456 pub role: String,
6457 pub content: String,
6458}
6459
6460impl FrankenStorage {
6465 pub fn ensure_agent(&self, agent: &Agent) -> Result<i64> {
6467 let cache_key = EnsuredAgentKey::from_agent(agent);
6468 if let Some(id) = self.cached_agent_id(&cache_key) {
6469 return Ok(id);
6470 }
6471
6472 let now = Self::now_millis();
6473 self.conn.execute_compat(
6474 "INSERT INTO agents(slug, name, version, kind, created_at, updated_at)
6475 VALUES(?1, ?2, ?3, ?4, ?5, ?6)
6476 ON CONFLICT(slug) DO UPDATE SET
6477 name = excluded.name,
6478 version = excluded.version,
6479 kind = excluded.kind,
6480 updated_at = excluded.updated_at
6481 WHERE NOT (
6482 agents.name IS excluded.name
6483 AND agents.version IS excluded.version
6484 AND agents.kind IS excluded.kind
6485 )",
6486 fparams![
6487 agent.slug.as_str(),
6488 agent.name.as_str(),
6489 agent.version.as_deref(),
6490 cache_key.kind.as_str(),
6491 now,
6492 now
6493 ],
6494 )?;
6495
6496 let id = self
6497 .conn
6498 .query_row_map(
6499 "SELECT id FROM agents WHERE slug = ?1 LIMIT 1",
6500 fparams![agent.slug.as_str()],
6501 |row| row.get_typed(0),
6502 )
6503 .with_context(|| format!("fetching agent id for {}", agent.slug))?;
6504 self.mark_agent_ensured(cache_key, id);
6505 Ok(id)
6506 }
6507
6508 pub fn ensure_workspace(&self, path: &Path, display_name: Option<&str>) -> Result<i64> {
6510 let path_str = path.to_string_lossy().to_string();
6511 let cache_key = EnsuredWorkspaceKey::new(path_str.clone(), display_name);
6512 if let Some(id) = self.cached_workspace_id(&cache_key) {
6513 return Ok(id);
6514 }
6515
6516 if let Some(display_name) = display_name {
6517 self.conn.execute_compat(
6518 "INSERT INTO workspaces(path, display_name)
6519 VALUES(?1, ?2)
6520 ON CONFLICT(path) DO UPDATE SET
6521 display_name = excluded.display_name
6522 WHERE NOT (workspaces.display_name IS excluded.display_name)",
6523 fparams![path_str.as_str(), display_name],
6524 )?;
6525 } else {
6526 self.conn.execute_compat(
6527 "INSERT OR IGNORE INTO workspaces(path, display_name) VALUES(?1, NULL)",
6528 fparams![path_str.as_str()],
6529 )?;
6530 }
6531
6532 let id = self
6533 .conn
6534 .query_row_map(
6535 "SELECT id FROM workspaces WHERE path = ?1 LIMIT 1",
6536 fparams![path_str.as_str()],
6537 |row| row.get_typed(0),
6538 )
6539 .with_context(|| format!("fetching workspace id for {path_str}"))?;
6540 self.mark_workspace_ensured(cache_key, id);
6541 Ok(id)
6542 }
6543
6544 pub fn now_millis() -> i64 {
6546 SystemTime::now()
6547 .duration_since(UNIX_EPOCH)
6548 .map(|d| i64::try_from(d.as_millis()).unwrap_or(i64::MAX))
6549 .unwrap_or(0)
6550 }
6551
6552 pub fn day_id_from_millis(timestamp_ms: i64) -> i64 {
6554 const EPOCH_2020_SECS: i64 = 1_577_836_800;
6555 let secs = timestamp_ms.div_euclid(1000);
6556 (secs - EPOCH_2020_SECS).div_euclid(86400)
6557 }
6558
6559 pub fn hour_id_from_millis(timestamp_ms: i64) -> i64 {
6561 const EPOCH_2020_SECS: i64 = 1_577_836_800;
6562 let secs = timestamp_ms.div_euclid(1000);
6563 (secs - EPOCH_2020_SECS).div_euclid(3600)
6564 }
6565
6566 pub fn millis_from_day_id(day_id: i64) -> i64 {
6568 const EPOCH_2020_SECS: i64 = 1_577_836_800;
6569 (EPOCH_2020_SECS + day_id * 86400) * 1000
6570 }
6571
6572 pub fn millis_from_hour_id(hour_id: i64) -> i64 {
6574 const EPOCH_2020_SECS: i64 = 1_577_836_800;
6575 (EPOCH_2020_SECS + hour_id * 3600) * 1000
6576 }
6577
6578 pub fn get_last_scan_ts(&self) -> Result<Option<i64>> {
6580 let result: Result<String, _> = self.conn.query_row_map(
6581 "SELECT value FROM meta WHERE key = 'last_scan_ts'",
6582 fparams![],
6583 |row| row.get_typed(0),
6584 );
6585 match result.optional() {
6586 Ok(Some(s)) => Ok(s.parse().ok()),
6587 Ok(None) => Ok(None),
6588 Err(e) => Err(e.into()),
6589 }
6590 }
6591
6592 pub fn set_last_scan_ts(&self, ts: i64) -> Result<()> {
6594 self.conn.execute_compat(
6595 "INSERT OR REPLACE INTO meta(key, value) VALUES('last_scan_ts', ?1)",
6596 fparams![ts.to_string()],
6597 )?;
6598 Ok(())
6599 }
6600
6601 pub fn get_last_indexed_at(&self) -> Result<Option<i64>> {
6603 let result: Result<String, _> = self.conn.query_row_map(
6604 "SELECT value FROM meta WHERE key = 'last_indexed_at'",
6605 fparams![],
6606 |row| row.get_typed(0),
6607 );
6608 match result.optional() {
6609 Ok(Some(s)) => Ok(s.parse().ok()),
6610 Ok(None) => Ok(None),
6611 Err(e) => Err(e.into()),
6612 }
6613 }
6614
6615 pub fn set_last_indexed_at(&self, ts: i64) -> Result<()> {
6617 self.conn.execute_compat(
6618 "INSERT OR REPLACE INTO meta(key, value) VALUES('last_indexed_at', ?1)",
6619 fparams![ts.to_string()],
6620 )?;
6621 Ok(())
6622 }
6623
6624 pub fn list_agents(&self) -> Result<Vec<Agent>> {
6626 self.conn
6627 .query_map_collect(
6628 "SELECT id, slug, name, version, kind FROM agents ORDER BY slug",
6629 fparams![],
6630 |row| {
6631 let kind: String = row.get_typed(4)?;
6632 Ok(Agent {
6633 id: Some(row.get_typed(0)?),
6634 slug: row.get_typed(1)?,
6635 name: row.get_typed(2)?,
6636 version: row.get_typed(3)?,
6637 kind: match kind.as_str() {
6638 "cli" => AgentKind::Cli,
6639 "vscode" => AgentKind::VsCode,
6640 _ => AgentKind::Hybrid,
6641 },
6642 })
6643 },
6644 )
6645 .with_context(|| "listing agents")
6646 }
6647
6648 pub fn total_conversation_count(&self) -> Result<usize> {
6650 let count: i64 =
6651 self.conn
6652 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
6653 row.get_typed(0)
6654 })?;
6655 Ok(count.max(0) as usize)
6656 }
6657
6658 pub fn total_message_count(&self) -> Result<usize> {
6660 let count: i64 =
6661 self.conn
6662 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
6663 row.get_typed(0)
6664 })?;
6665 Ok(count.max(0) as usize)
6666 }
6667
6668 pub fn purge_agent_archive_data(&self, agent_slug: &str) -> Result<AgentArchivePurgeResult> {
6673 let normalized = agent_slug.trim().to_ascii_lowercase();
6674 if normalized.is_empty() {
6675 return Err(anyhow!("agent slug cannot be empty"));
6676 }
6677
6678 let Some(agent_id) = self
6679 .conn
6680 .query_row_map(
6681 "SELECT id FROM agents WHERE slug = ?1",
6682 fparams![normalized.as_str()],
6683 |row| row.get_typed::<i64>(0),
6684 )
6685 .optional()?
6686 else {
6687 return Ok(AgentArchivePurgeResult::default());
6688 };
6689
6690 let conversations_deleted: i64 = self.conn.query_row_map(
6691 "SELECT COUNT(*) FROM conversations WHERE agent_id = ?1",
6692 fparams![agent_id],
6693 |row| row.get_typed(0),
6694 )?;
6695 if conversations_deleted == 0 {
6696 return Ok(AgentArchivePurgeResult::default());
6697 }
6698
6699 let messages_deleted: i64 = self.conn.query_row_map(
6700 "SELECT COUNT(*)
6701 FROM messages
6702 WHERE conversation_id IN (
6703 SELECT id FROM conversations WHERE agent_id = ?1
6704 )",
6705 fparams![agent_id],
6706 |row| row.get_typed(0),
6707 )?;
6708
6709 let mut tx = self.conn.transaction()?;
6710 tx.execute_compat(
6711 "DELETE FROM conversation_external_lookup
6712 WHERE conversation_id IN (
6713 SELECT id FROM conversations WHERE agent_id = ?1
6714 )",
6715 fparams![agent_id],
6716 )?;
6717 tx.execute_compat(
6718 "DELETE FROM conversation_external_tail_lookup
6719 WHERE conversation_id IN (
6720 SELECT id FROM conversations WHERE agent_id = ?1
6721 )",
6722 fparams![agent_id],
6723 )?;
6724 tx.execute_compat(
6725 "DELETE FROM conversations WHERE agent_id = ?1",
6726 fparams![agent_id],
6727 )?;
6728 tx.execute_compat(
6729 "DELETE FROM agents
6730 WHERE id = ?1
6731 AND NOT EXISTS (
6732 SELECT 1 FROM conversations WHERE agent_id = ?1
6733 )",
6734 fparams![agent_id],
6735 )?;
6736 tx.commit()?;
6737
6738 Ok(AgentArchivePurgeResult {
6739 conversations_deleted: conversations_deleted.max(0) as usize,
6740 messages_deleted: messages_deleted.max(0) as usize,
6741 })
6742 }
6743
6744 pub fn list_workspaces(&self) -> Result<Vec<crate::model::types::Workspace>> {
6746 self.conn
6747 .query_map_collect(
6748 "SELECT id, path, display_name FROM workspaces ORDER BY path",
6749 fparams![],
6750 |row| {
6751 let path_str: String = row.get_typed(1)?;
6752 Ok(crate::model::types::Workspace {
6753 id: Some(row.get_typed(0)?),
6754 path: Path::new(&path_str).to_path_buf(),
6755 display_name: row.get_typed(2)?,
6756 })
6757 },
6758 )
6759 .with_context(|| "listing workspaces")
6760 }
6761
6762 pub fn list_conversations(&self, limit: i64, offset: i64) -> Result<Vec<Conversation>> {
6764 self.conn
6771 .query_map_collect(
6772 r"SELECT c.id,
6773 COALESCE((SELECT a.slug FROM agents a WHERE a.id = c.agent_id), 'unknown'),
6774 (SELECT w.path FROM workspaces w WHERE w.id = c.workspace_id),
6775 c.external_id, c.title, c.source_path,
6776 c.started_at,
6777 COALESCE(
6778 (SELECT ts.ended_at
6779 FROM conversation_tail_state ts
6780 WHERE ts.conversation_id = c.id),
6781 c.ended_at
6782 ),
6783 c.approx_tokens, c.metadata_json,
6784 c.source_id, c.origin_host, c.metadata_bin
6785 FROM conversations c
6786 ORDER BY CASE WHEN c.started_at IS NULL THEN 1 ELSE 0 END, c.started_at DESC, c.id DESC
6787 LIMIT ?1 OFFSET ?2",
6788 fparams![limit, offset],
6789 |row| {
6790 let workspace_path: Option<String> = row.get_typed(2)?;
6791 let source_path: String = row.get_typed(5)?;
6792 let raw_source_id: Option<String> = row.get_typed(10)?;
6793 let raw_origin_host: Option<String> = row.get_typed(11)?;
6794 let (source_id, _, origin_host) = normalized_storage_source_parts(
6795 raw_source_id.as_deref(),
6796 None,
6797 raw_origin_host.as_deref(),
6798 );
6799 Ok(Conversation {
6800 id: Some(row.get_typed(0)?),
6801 agent_slug: row.get_typed(1)?,
6802 workspace: workspace_path.map(|p| Path::new(&p).to_path_buf()),
6803 external_id: row.get_typed(3)?,
6804 title: row.get_typed(4)?,
6805 source_path: Path::new(&source_path).to_path_buf(),
6806 started_at: row.get_typed(6)?,
6807 ended_at: row.get_typed(7)?,
6808 approx_tokens: row.get_typed(8)?,
6809 metadata_json: franken_read_metadata_compat(row, 9, 12),
6810 messages: Vec::new(),
6811 source_id,
6812 origin_host,
6813 })
6814 },
6815 )
6816 .with_context(|| "listing conversations")
6817 }
6818
6819 pub fn build_lexical_rebuild_lookups(
6823 &self,
6824 ) -> Result<(HashMap<i64, String>, HashMap<i64, PathBuf>)> {
6825 let agents: HashMap<i64, String> = self
6826 .conn
6827 .query_map_collect("SELECT id, slug FROM agents", fparams![], |row| {
6828 Ok((row.get_typed::<i64>(0)?, row.get_typed::<String>(1)?))
6829 })
6830 .with_context(|| "loading agent lookup for lexical rebuild")?
6831 .into_iter()
6832 .collect();
6833 let workspaces: HashMap<i64, PathBuf> = self
6834 .conn
6835 .query_map_collect("SELECT id, path FROM workspaces", fparams![], |row| {
6836 let path_str: String = row.get_typed(1)?;
6837 Ok((row.get_typed::<i64>(0)?, PathBuf::from(path_str)))
6838 })
6839 .with_context(|| "loading workspace lookup for lexical rebuild")?
6840 .into_iter()
6841 .collect();
6842 Ok((agents, workspaces))
6843 }
6844
6845 pub fn list_conversation_footprints_for_lexical_rebuild(
6858 &self,
6859 ) -> Result<Vec<LexicalRebuildConversationFootprintRow>> {
6860 let tail_state_rows: Vec<(i64, Option<i64>)> = match self.conn.query_map_collect(
6861 "SELECT conversation_id, last_message_idx
6862 FROM conversation_tail_state
6863 ORDER BY conversation_id ASC",
6864 fparams![],
6865 |row| Ok((row.get_typed::<i64>(0)?, row.get_typed::<Option<i64>>(1)?)),
6866 ) {
6867 Ok(rows) => rows,
6868 Err(err) if error_indicates_missing_table(&err) => Vec::new(),
6869 Err(err) => {
6870 return Err(err).with_context(|| "listing lexical rebuild tail-state estimates");
6871 }
6872 };
6873 let tail_state_by_conversation: HashMap<i64, Option<i64>> =
6874 tail_state_rows.into_iter().collect();
6875
6876 let rows: Vec<(i64, Option<i64>)> = match self.conn.query_map_collect(
6877 "SELECT id, last_message_idx
6878 FROM conversations
6879 ORDER BY id ASC",
6880 fparams![],
6881 |row| Ok((row.get_typed::<i64>(0)?, row.get_typed::<Option<i64>>(1)?)),
6882 ) {
6883 Ok(rows) => rows,
6884 Err(err) if error_indicates_missing_column(&err) => self
6885 .conn
6886 .query_map_collect(
6887 "SELECT id
6888 FROM conversations
6889 ORDER BY id ASC",
6890 fparams![],
6891 |row| Ok((row.get_typed::<i64>(0)?, None)),
6892 )
6893 .with_context(|| {
6894 "listing lexical rebuild conversation ids after missing tail column fallback"
6895 })?,
6896 Err(err) => {
6897 return Err(err)
6898 .with_context(|| "listing lexical rebuild conversation footprint estimates");
6899 }
6900 };
6901
6902 let mut footprints = Vec::with_capacity(rows.len());
6903 let mut missing_tail_positions = HashMap::new();
6904 for (conversation_id, conversation_last_message_idx) in rows {
6905 let last_message_idx = tail_state_by_conversation
6906 .get(&conversation_id)
6907 .copied()
6908 .flatten()
6909 .or(conversation_last_message_idx);
6910 let Some(message_count) = lexical_rebuild_message_count_from_tail_idx(last_message_idx)
6911 else {
6912 missing_tail_positions.insert(conversation_id, footprints.len());
6913 footprints.push(LexicalRebuildConversationFootprintRow {
6914 conversation_id,
6915 message_count: 0,
6916 message_bytes: 0,
6917 });
6918 continue;
6919 };
6920 footprints.push(lexical_rebuild_conversation_footprint_from_count(
6921 conversation_id,
6922 message_count,
6923 ));
6924 }
6925
6926 let every_footprint_was_missing_tail = missing_tail_positions.len() == footprints.len();
6927 if !missing_tail_positions.is_empty() {
6928 self.fill_missing_lexical_rebuild_footprint_tails(
6929 &mut footprints,
6930 &missing_tail_positions,
6931 )?;
6932 }
6933 if !every_footprint_was_missing_tail {
6934 self.raise_lexical_rebuild_footprints_to_exact_message_counts(&mut footprints)?;
6935 }
6936
6937 Ok(footprints)
6938 }
6939
6940 pub fn lexical_rebuild_has_tail_footprint_metadata(&self) -> Result<bool> {
6941 let total_conversations: i64 = self
6942 .conn
6943 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
6944 row.get_typed(0)
6945 })
6946 .with_context(|| "counting conversations for lexical rebuild tail metadata coverage")?;
6947 let total_conversations = usize::try_from(total_conversations.max(0)).unwrap_or(usize::MAX);
6948 if total_conversations == 0 {
6949 return Ok(true);
6950 }
6951
6952 let conversation_columns = franken_table_column_names(&self.conn, "conversations")?;
6953 let conversations_have_tail_column = conversation_columns.contains("last_message_idx");
6954 let tail_state_has_tail_column =
6955 match franken_table_column_names(&self.conn, "conversation_tail_state") {
6956 Ok(columns) => columns.contains("last_message_idx"),
6957 Err(err) if error_indicates_missing_table(&err) => false,
6958 Err(err) => {
6959 return Err(err)
6960 .with_context(|| "reading lexical rebuild tail-state metadata columns");
6961 }
6962 };
6963 if !conversations_have_tail_column && !tail_state_has_tail_column {
6964 return Ok(false);
6965 }
6966
6967 let covered_sql = match (conversations_have_tail_column, tail_state_has_tail_column) {
6968 (true, true) => {
6969 "SELECT COUNT(*)
6970 FROM conversations c
6971 LEFT JOIN conversation_tail_state ts ON ts.conversation_id = c.id
6972 WHERE c.last_message_idx IS NOT NULL
6973 OR ts.last_message_idx IS NOT NULL"
6974 }
6975 (true, false) => {
6976 "SELECT COUNT(*)
6977 FROM conversations
6978 WHERE last_message_idx IS NOT NULL"
6979 }
6980 (false, true) => {
6981 "SELECT COUNT(*)
6982 FROM conversations c
6983 WHERE EXISTS (
6984 SELECT 1
6985 FROM conversation_tail_state ts
6986 WHERE ts.conversation_id = c.id
6987 AND ts.last_message_idx IS NOT NULL
6988 )"
6989 }
6990 (false, false) => unreachable!("checked before covered_sql selection"),
6991 };
6992 let covered_conversations: i64 = self
6993 .conn
6994 .query_row_map(covered_sql, fparams![], |row| row.get_typed(0))
6995 .with_context(
6996 || "counting conversations covered by lexical rebuild tail footprint metadata",
6997 )?;
6998 let covered_conversations =
6999 usize::try_from(covered_conversations.max(0)).unwrap_or(usize::MAX);
7000
7001 Ok(lexical_rebuild_tail_metadata_coverage_is_sufficient(
7002 total_conversations,
7003 covered_conversations,
7004 ))
7005 }
7006
7007 fn raise_lexical_rebuild_footprints_to_exact_message_counts(
7008 &self,
7009 footprints: &mut [LexicalRebuildConversationFootprintRow],
7010 ) -> Result<()> {
7011 if footprints.is_empty() {
7012 return Ok(());
7013 }
7014
7015 let positions_by_conversation: HashMap<i64, usize> = footprints
7016 .iter()
7017 .enumerate()
7018 .map(|(position, footprint)| (footprint.conversation_id, position))
7019 .collect();
7020 self.conn
7021 .query_with_params_for_each(
7022 "SELECT conversation_id, COUNT(*) AS message_count
7023 FROM messages
7024 GROUP BY conversation_id
7025 ORDER BY conversation_id ASC",
7026 &[] as &[SqliteValue],
7027 |row| {
7028 let conversation_id: i64 = row.get_typed(0)?;
7029 let exact_count: i64 = row.get_typed(1)?;
7030 let Some(position) = positions_by_conversation.get(&conversation_id) else {
7031 return Ok(());
7032 };
7033 let exact_count = usize::try_from(exact_count.max(0)).unwrap_or(usize::MAX);
7034 let footprint = &mut footprints[*position];
7035 if exact_count > footprint.message_count {
7036 footprint.message_count = exact_count;
7037 footprint.message_bytes =
7038 footprint.message_bytes.max(exact_count.saturating_mul(
7039 LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
7040 ));
7041 }
7042 Ok(())
7043 },
7044 )
7045 .with_context(|| "raising lexical rebuild footprints to exact message counts")?;
7046 Ok(())
7047 }
7048
7049 fn fill_missing_lexical_rebuild_footprint_tails(
7050 &self,
7051 footprints: &mut [LexicalRebuildConversationFootprintRow],
7052 missing_tail_positions: &HashMap<i64, usize>,
7053 ) -> Result<()> {
7054 if missing_tail_positions.len() <= LEXICAL_REBUILD_FOOTPRINT_POINT_TAIL_FALLBACK_LIMIT {
7055 for (conversation_id, position) in missing_tail_positions {
7056 let last_message_idx: Option<i64> = self
7057 .conn
7058 .query_row_map(
7059 "SELECT MAX(idx) FROM messages WHERE conversation_id = ?1",
7060 fparams![*conversation_id],
7061 |row| row.get_typed(0),
7062 )
7063 .with_context(|| {
7064 format!(
7065 "looking up missing lexical rebuild tail estimate for conversation {conversation_id}"
7066 )
7067 })?;
7068 if let Some(message_count) =
7069 lexical_rebuild_message_count_from_tail_idx(last_message_idx)
7070 {
7071 footprints[*position] = lexical_rebuild_conversation_footprint_from_count(
7072 *conversation_id,
7073 message_count,
7074 );
7075 }
7076 }
7077 return Ok(());
7078 }
7079
7080 self.fill_missing_lexical_rebuild_footprint_tails_from_grouped_messages(
7081 footprints,
7082 missing_tail_positions,
7083 "SELECT conversation_id, MAX(idx) AS last_message_idx
7084 FROM messages INDEXED BY idx_messages_conv_idx
7085 GROUP BY conversation_id
7086 ORDER BY conversation_id ASC",
7087 )
7088 .or_else(|err| {
7089 if err
7090 .to_string()
7091 .contains("no such index: idx_messages_conv_idx")
7092 {
7093 return self.fill_missing_lexical_rebuild_footprint_tails_from_grouped_messages(
7094 footprints,
7095 missing_tail_positions,
7096 "SELECT conversation_id, MAX(idx) AS last_message_idx
7097 FROM messages
7098 GROUP BY conversation_id
7099 ORDER BY conversation_id ASC",
7100 );
7101 }
7102 Err(err)
7103 })
7104 .with_context(|| "grouping missing lexical rebuild tail estimates from messages")?;
7105
7106 Ok(())
7107 }
7108
7109 fn fill_missing_lexical_rebuild_footprint_tails_from_grouped_messages(
7110 &self,
7111 footprints: &mut [LexicalRebuildConversationFootprintRow],
7112 missing_tail_positions: &HashMap<i64, usize>,
7113 sql: &str,
7114 ) -> Result<()> {
7115 self.conn
7116 .query_with_params_for_each(sql, &[] as &[SqliteValue], |row| {
7117 let conversation_id: i64 = row.get_typed(0)?;
7118 let last_message_idx: Option<i64> = row.get_typed(1)?;
7119 let Some(position) = missing_tail_positions.get(&conversation_id) else {
7120 return Ok(());
7121 };
7122 if let Some(message_count) =
7123 lexical_rebuild_message_count_from_tail_idx(last_message_idx)
7124 {
7125 footprints[*position] = lexical_rebuild_conversation_footprint_from_count(
7126 conversation_id,
7127 message_count,
7128 );
7129 }
7130 Ok(())
7131 })
7132 .with_context(|| "grouping lexical rebuild missing tail estimates")
7133 }
7134
7135 pub fn list_conversation_ids_for_lexical_rebuild(&self) -> Result<Vec<i64>> {
7137 self.conn
7138 .query_map_collect(
7139 "SELECT id FROM conversations ORDER BY id ASC",
7140 fparams![],
7141 |row| row.get_typed(0),
7142 )
7143 .with_context(|| "listing conversation ids for lexical rebuild")
7144 }
7145 pub fn list_conversations_for_lexical_rebuild_by_offset(
7150 &self,
7151 limit: i64,
7152 offset: i64,
7153 agent_slugs: &HashMap<i64, String>,
7154 workspace_paths: &HashMap<i64, PathBuf>,
7155 ) -> Result<Vec<LexicalRebuildConversationRow>> {
7156 self.conn
7159 .query_map_collect(
7160 r"SELECT id, agent_id, workspace_id, external_id, title, source_path,
7161 started_at,
7162 COALESCE(
7163 (SELECT ts.ended_at
7164 FROM conversation_tail_state ts
7165 WHERE ts.conversation_id = conversations.id),
7166 ended_at
7167 ),
7168 source_id, origin_host
7169 FROM conversations
7170 ORDER BY id ASC
7171 LIMIT ?1 OFFSET ?2",
7172 fparams![limit, offset],
7173 |row| {
7174 let agent_id: Option<i64> = row.get_typed(1)?;
7175 let workspace_id: Option<i64> = row.get_typed(2)?;
7176 let source_path: String = row.get_typed(5)?;
7177 let raw_source_id: Option<String> = row.get_typed(8)?;
7178 let raw_origin_host: Option<String> = row.get_typed(9)?;
7179 let (source_id, _, origin_host) = normalized_storage_source_parts(
7180 raw_source_id.as_deref(),
7181 None,
7182 raw_origin_host.as_deref(),
7183 );
7184 Ok(LexicalRebuildConversationRow {
7185 id: Some(row.get_typed(0)?),
7186 agent_slug: agent_id
7187 .and_then(|aid| agent_slugs.get(&aid).cloned())
7188 .unwrap_or_else(|| "unknown".to_string()),
7189 workspace: workspace_id.and_then(|wid| workspace_paths.get(&wid).cloned()),
7190 external_id: row.get_typed(3)?,
7191 title: row.get_typed(4)?,
7192 source_path: Path::new(&source_path).to_path_buf(),
7193 started_at: row.get_typed(6)?,
7194 ended_at: row.get_typed(7)?,
7195 source_id,
7196 origin_host,
7197 })
7198 },
7199 )
7200 .with_context(|| "listing conversations for lexical rebuild")
7201 }
7202
7203 pub fn list_conversations_for_lexical_rebuild_after_id(
7208 &self,
7209 limit: i64,
7210 after_conversation_id: i64,
7211 agent_slugs: &HashMap<i64, String>,
7212 workspace_paths: &HashMap<i64, PathBuf>,
7213 ) -> Result<Vec<LexicalRebuildConversationRow>> {
7214 self.conn
7215 .query_map_collect(
7216 r"SELECT id, agent_id, workspace_id, external_id, title, source_path,
7217 started_at,
7218 COALESCE(
7219 (SELECT ts.ended_at
7220 FROM conversation_tail_state ts
7221 WHERE ts.conversation_id = conversations.id),
7222 ended_at
7223 ),
7224 source_id, origin_host
7225 FROM conversations
7226 WHERE id > ?2
7227 ORDER BY id ASC
7228 LIMIT ?1",
7229 fparams![limit, after_conversation_id],
7230 |row| {
7231 let agent_id: Option<i64> = row.get_typed(1)?;
7232 let workspace_id: Option<i64> = row.get_typed(2)?;
7233 let source_path: String = row.get_typed(5)?;
7234 let raw_source_id: Option<String> = row.get_typed(8)?;
7235 let raw_origin_host: Option<String> = row.get_typed(9)?;
7236 let (source_id, _, origin_host) = normalized_storage_source_parts(
7237 raw_source_id.as_deref(),
7238 None,
7239 raw_origin_host.as_deref(),
7240 );
7241 Ok(LexicalRebuildConversationRow {
7242 id: Some(row.get_typed(0)?),
7243 agent_slug: agent_id
7244 .and_then(|aid| agent_slugs.get(&aid).cloned())
7245 .unwrap_or_else(|| "unknown".to_string()),
7246 workspace: workspace_id.and_then(|wid| workspace_paths.get(&wid).cloned()),
7247 external_id: row.get_typed(3)?,
7248 title: row.get_typed(4)?,
7249 source_path: Path::new(&source_path).to_path_buf(),
7250 started_at: row.get_typed(6)?,
7251 ended_at: row.get_typed(7)?,
7252 source_id,
7253 origin_host,
7254 })
7255 },
7256 )
7257 .with_context(|| {
7258 format!(
7259 "listing conversations for lexical rebuild after id {after_conversation_id}"
7260 )
7261 })
7262 }
7263
7264 pub fn list_conversations_for_lexical_rebuild_after_id_through_id(
7270 &self,
7271 limit: i64,
7272 after_conversation_id: i64,
7273 through_conversation_id: i64,
7274 agent_slugs: &HashMap<i64, String>,
7275 workspace_paths: &HashMap<i64, PathBuf>,
7276 ) -> Result<Vec<LexicalRebuildConversationRow>> {
7277 if through_conversation_id <= after_conversation_id {
7278 return Ok(Vec::new());
7279 }
7280 self.conn
7281 .query_map_collect(
7282 r"SELECT id, agent_id, workspace_id, external_id, title, source_path,
7283 started_at,
7284 COALESCE(
7285 (SELECT ts.ended_at
7286 FROM conversation_tail_state ts
7287 WHERE ts.conversation_id = conversations.id),
7288 ended_at
7289 ),
7290 source_id, origin_host
7291 FROM conversations
7292 WHERE id > ?2 AND id <= ?3
7293 ORDER BY id ASC
7294 LIMIT ?1",
7295 fparams![limit, after_conversation_id, through_conversation_id],
7296 |row| {
7297 let agent_id: Option<i64> = row.get_typed(1)?;
7298 let workspace_id: Option<i64> = row.get_typed(2)?;
7299 let source_path: String = row.get_typed(5)?;
7300 let raw_source_id: Option<String> = row.get_typed(8)?;
7301 let raw_origin_host: Option<String> = row.get_typed(9)?;
7302 let (source_id, _, origin_host) = normalized_storage_source_parts(
7303 raw_source_id.as_deref(),
7304 None,
7305 raw_origin_host.as_deref(),
7306 );
7307 Ok(LexicalRebuildConversationRow {
7308 id: Some(row.get_typed(0)?),
7309 agent_slug: agent_id
7310 .and_then(|aid| agent_slugs.get(&aid).cloned())
7311 .unwrap_or_else(|| "unknown".to_string()),
7312 workspace: workspace_id.and_then(|wid| workspace_paths.get(&wid).cloned()),
7313 external_id: row.get_typed(3)?,
7314 title: row.get_typed(4)?,
7315 source_path: Path::new(&source_path).to_path_buf(),
7316 started_at: row.get_typed(6)?,
7317 ended_at: row.get_typed(7)?,
7318 source_id,
7319 origin_host,
7320 })
7321 },
7322 )
7323 .with_context(|| {
7324 format!(
7325 "listing conversations for lexical rebuild after id {after_conversation_id} through id {through_conversation_id}"
7326 )
7327 })
7328 }
7329
7330 pub fn fetch_messages(&self, conversation_id: i64) -> Result<Vec<Message>> {
7332 let hinted_sql = "SELECT id, idx, role, author, created_at, content, extra_json, extra_bin \
7333 FROM messages INDEXED BY sqlite_autoindex_messages_1 \
7334 WHERE conversation_id = ?1 ORDER BY idx";
7335 let fallback_sql = "SELECT id, idx, role, author, created_at, content, extra_json, extra_bin \
7336 FROM messages \
7337 WHERE conversation_id = ?1 ORDER BY idx";
7338
7339 self.conn
7340 .query_map_collect(hinted_sql, fparams![conversation_id], |row| {
7341 let role: String = row.get_typed(2)?;
7342 Ok(Message {
7343 id: Some(row.get_typed(0)?),
7344 idx: row.get_typed(1)?,
7345 role: match role.as_str() {
7346 "user" => MessageRole::User,
7347 "agent" | "assistant" => MessageRole::Agent,
7348 "tool" => MessageRole::Tool,
7349 "system" => MessageRole::System,
7350 other => MessageRole::Other(other.to_string()),
7351 },
7352 author: row.get_typed(3)?,
7353 created_at: row.get_typed(4)?,
7354 content: row.get_typed(5)?,
7355 extra_json: franken_read_message_extra_compat(row, 6, 7),
7356 snippets: Vec::new(),
7357 })
7358 })
7359 .or_else(|err| {
7360 if err
7361 .to_string()
7362 .contains("no such index: sqlite_autoindex_messages_1")
7363 {
7364 return self.conn.query_map_collect(
7365 fallback_sql,
7366 fparams![conversation_id],
7367 |row| {
7368 let role: String = row.get_typed(2)?;
7369 Ok(Message {
7370 id: Some(row.get_typed(0)?),
7371 idx: row.get_typed(1)?,
7372 role: match role.as_str() {
7373 "user" => MessageRole::User,
7374 "agent" | "assistant" => MessageRole::Agent,
7375 "tool" => MessageRole::Tool,
7376 "system" => MessageRole::System,
7377 other => MessageRole::Other(other.to_string()),
7378 },
7379 author: row.get_typed(3)?,
7380 created_at: row.get_typed(4)?,
7381 content: row.get_typed(5)?,
7382 extra_json: franken_read_message_extra_compat(row, 6, 7),
7383 snippets: Vec::new(),
7384 })
7385 },
7386 );
7387 }
7388 Err(err)
7389 })
7390 .with_context(|| format!("fetching messages for conversation {conversation_id}"))
7391 }
7392
7393 pub fn fetch_messages_for_lexical_rebuild(&self, conversation_id: i64) -> Result<Vec<Message>> {
7399 let hinted_sql = "SELECT id, idx, role, author, created_at, content \
7400 FROM messages INDEXED BY sqlite_autoindex_messages_1 \
7401 WHERE conversation_id = ?1 ORDER BY idx";
7402 let fallback_sql = "SELECT id, idx, role, author, created_at, content \
7403 FROM messages \
7404 WHERE conversation_id = ?1 ORDER BY idx";
7405
7406 self.conn
7407 .query_map_collect(hinted_sql, fparams![conversation_id], |row| {
7408 let role: String = row.get_typed(2)?;
7409 Ok(Message {
7410 id: Some(row.get_typed(0)?),
7411 idx: row.get_typed(1)?,
7412 role: match role.as_str() {
7413 "user" => MessageRole::User,
7414 "agent" | "assistant" => MessageRole::Agent,
7415 "tool" => MessageRole::Tool,
7416 "system" => MessageRole::System,
7417 other => MessageRole::Other(other.to_string()),
7418 },
7419 author: row.get_typed(3)?,
7420 created_at: row.get_typed(4)?,
7421 content: row.get_typed(5)?,
7422 extra_json: serde_json::Value::Null,
7423 snippets: Vec::new(),
7424 })
7425 })
7426 .or_else(|err| {
7427 if err
7428 .to_string()
7429 .contains("no such index: sqlite_autoindex_messages_1")
7430 {
7431 return self.conn.query_map_collect(
7432 fallback_sql,
7433 fparams![conversation_id],
7434 |row| {
7435 let role: String = row.get_typed(2)?;
7436 Ok(Message {
7437 id: Some(row.get_typed(0)?),
7438 idx: row.get_typed(1)?,
7439 role: match role.as_str() {
7440 "user" => MessageRole::User,
7441 "agent" | "assistant" => MessageRole::Agent,
7442 "tool" => MessageRole::Tool,
7443 "system" => MessageRole::System,
7444 other => MessageRole::Other(other.to_string()),
7445 },
7446 author: row.get_typed(3)?,
7447 created_at: row.get_typed(4)?,
7448 content: row.get_typed(5)?,
7449 extra_json: serde_json::Value::Null,
7450 snippets: Vec::new(),
7451 })
7452 },
7453 );
7454 }
7455 Err(err)
7456 })
7457 .with_context(|| {
7458 format!("fetching messages for lexical rebuild of conversation {conversation_id}")
7459 })
7460 }
7461
7462 pub fn fetch_messages_for_lexical_rebuild_batch(
7467 &self,
7468 conversation_ids: &[i64],
7469 max_messages: Option<usize>,
7470 max_content_bytes: Option<usize>,
7471 ) -> Result<HashMap<i64, Vec<Message>>> {
7472 if conversation_ids.is_empty() {
7473 return Ok(HashMap::new());
7474 }
7475
7476 let mut grouped: HashMap<i64, Vec<Message>> =
7477 HashMap::with_capacity(conversation_ids.len());
7478 let mut fetched_conversation_ids = HashSet::with_capacity(conversation_ids.len());
7479 let mut total_messages = 0usize;
7480 let mut total_content_bytes = 0usize;
7481
7482 for conversation_id in conversation_ids {
7487 if !fetched_conversation_ids.insert(*conversation_id) {
7488 continue;
7489 }
7490
7491 let messages = self
7492 .fetch_messages_for_lexical_rebuild(*conversation_id)
7493 .with_context(|| {
7494 format!("fetching lexical rebuild messages for conversation {conversation_id}")
7495 })?;
7496 total_messages = total_messages.saturating_add(messages.len());
7497 if let Some(limit) = max_messages
7498 && total_messages > limit
7499 {
7500 return Err(anyhow!(
7501 "lexical rebuild batch fetch exceeded message guardrail: messages={total_messages} limit={limit} conversations={}",
7502 conversation_ids.len()
7503 ));
7504 }
7505
7506 let message_bytes = messages
7507 .iter()
7508 .map(|message| message.content.len())
7509 .sum::<usize>();
7510 total_content_bytes = total_content_bytes.saturating_add(message_bytes);
7511 if let Some(limit) = max_content_bytes
7512 && total_content_bytes > limit
7513 {
7514 return Err(anyhow!(
7515 "lexical rebuild batch fetch exceeded content-byte guardrail: bytes={total_content_bytes} limit={limit} conversations={}",
7516 conversation_ids.len()
7517 ));
7518 }
7519
7520 if !messages.is_empty() {
7521 grouped.insert(*conversation_id, messages);
7522 }
7523 }
7524
7525 Ok(grouped)
7526 }
7527
7528 pub fn stream_messages_for_lexical_rebuild_between_conversation_ids<F>(
7531 &self,
7532 start_conversation_id: i64,
7533 end_conversation_id: i64,
7534 mut f: F,
7535 ) -> Result<()>
7536 where
7537 F: FnMut(LexicalRebuildMessageRow) -> Result<()>,
7538 {
7539 if end_conversation_id < start_conversation_id {
7540 return Ok(());
7541 }
7542
7543 let conversation_ids: Vec<i64> = self
7544 .conn
7545 .query_map_collect(
7546 "SELECT id FROM conversations WHERE id >= ?1 AND id <= ?2 ORDER BY id ASC",
7547 fparams![start_conversation_id, end_conversation_id],
7548 |row| row.get_typed(0),
7549 )
7550 .with_context(|| "listing conversation ids for streamed lexical rebuild")?;
7551
7552 for conversation_id in conversation_ids {
7553 let messages = self
7554 .fetch_messages_for_lexical_rebuild(conversation_id)
7555 .with_context(|| {
7556 format!("streaming lexical rebuild messages for conversation {conversation_id}")
7557 })?;
7558
7559 for message in messages {
7560 let message_id = message.id.ok_or_else(|| {
7561 anyhow!(
7562 "lexical rebuild message missing id for conversation {conversation_id} idx {}",
7563 message.idx
7564 )
7565 })?;
7566 f(LexicalRebuildMessageRow {
7567 conversation_id,
7568 id: message_id,
7569 idx: message.idx,
7570 role: role_str(&message.role),
7571 author: message.author,
7572 created_at: message.created_at,
7573 content: message.content,
7574 })?;
7575 }
7576 }
7577
7578 Ok(())
7579 }
7580
7581 pub fn stream_grouped_messages_for_lexical_rebuild_between_conversation_ids<F>(
7585 &self,
7586 start_conversation_id: i64,
7587 end_conversation_id: i64,
7588 mut f: F,
7589 ) -> Result<()>
7590 where
7591 F: FnMut(i64, LexicalRebuildGroupedMessageRows, i64) -> Result<()>,
7592 {
7593 if end_conversation_id < start_conversation_id {
7594 return Ok(());
7595 }
7596
7597 let mut current_conversation_id: Option<i64> = None;
7598 let mut current_messages: LexicalRebuildGroupedMessageRows = SmallVec::new();
7599 let mut current_last_message_id = 0i64;
7600 let mut flush_current = |current_conversation_id: &mut Option<i64>,
7601 current_messages: &mut LexicalRebuildGroupedMessageRows,
7602 current_last_message_id: &mut i64|
7603 -> Result<()> {
7604 let Some(conversation_id) = current_conversation_id.take() else {
7605 return Ok(());
7606 };
7607 let messages = std::mem::take(current_messages);
7608 let last_message_id = std::mem::take(current_last_message_id);
7609 f(conversation_id, messages, last_message_id)
7610 };
7611
7612 self.stream_messages_for_lexical_rebuild_between_conversation_ids(
7613 start_conversation_id,
7614 end_conversation_id,
7615 |row| {
7616 if current_conversation_id != Some(row.conversation_id) {
7617 flush_current(
7618 &mut current_conversation_id,
7619 &mut current_messages,
7620 &mut current_last_message_id,
7621 )?;
7622 current_conversation_id = Some(row.conversation_id);
7623 }
7624 current_last_message_id = row.id;
7625 current_messages.push(LexicalRebuildGroupedMessageRow {
7626 idx: row.idx,
7627 is_tool_role: row.role == "tool",
7628 created_at: row.created_at,
7629 content: row.content,
7630 });
7631 Ok(())
7632 },
7633 )
7634 .with_context(|| "streaming grouped lexical rebuild messages")?;
7635
7636 flush_current(
7637 &mut current_conversation_id,
7638 &mut current_messages,
7639 &mut current_last_message_id,
7640 )
7641 .with_context(|| "flushing grouped lexical rebuild messages")
7642 }
7643
7644 pub fn stream_grouped_messages_for_lexical_rebuild_from_conversation_id<F>(
7647 &self,
7648 start_conversation_id: i64,
7649 f: F,
7650 ) -> Result<()>
7651 where
7652 F: FnMut(i64, LexicalRebuildGroupedMessageRows, i64) -> Result<()>,
7653 {
7654 self.stream_grouped_messages_for_lexical_rebuild_between_conversation_ids(
7655 start_conversation_id,
7656 i64::MAX,
7657 f,
7658 )
7659 }
7660
7661 pub fn stream_messages_for_lexical_rebuild_from_conversation_id<F>(
7664 &self,
7665 start_conversation_id: i64,
7666 f: F,
7667 ) -> Result<()>
7668 where
7669 F: FnMut(LexicalRebuildMessageRow) -> Result<()>,
7670 {
7671 self.stream_messages_for_lexical_rebuild_between_conversation_ids(
7672 start_conversation_id,
7673 i64::MAX,
7674 f,
7675 )
7676 }
7677
7678 pub fn get_source(&self, id: &str) -> Result<Option<Source>> {
7680 let result = self.conn.query_row_map(
7681 "SELECT id, kind, host_label, machine_id, platform, config_json, created_at, updated_at FROM sources WHERE id = ?1",
7682 fparams![id],
7683 |row| {
7684 let kind_str: String = row.get_typed(1)?;
7685 let config_json_str: Option<String> = row.get_typed(5)?;
7686 Ok(Source {
7687 id: row.get_typed(0)?,
7688 kind: SourceKind::parse(&kind_str).unwrap_or_default(),
7689 host_label: row.get_typed(2)?,
7690 machine_id: row.get_typed(3)?,
7691 platform: row.get_typed(4)?,
7692 config_json: config_json_str.and_then(|s| serde_json::from_str(&s).ok()),
7693 created_at: row.get_typed(6)?,
7694 updated_at: row.get_typed(7)?,
7695 })
7696 },
7697 );
7698 Ok(result.optional()?)
7699 }
7700
7701 pub fn list_sources(&self) -> Result<Vec<Source>> {
7703 self.conn
7704 .query_map_collect(
7705 "SELECT id, kind, host_label, machine_id, platform, config_json, created_at, updated_at FROM sources ORDER BY id",
7706 fparams![],
7707 |row| {
7708 let kind_str: String = row.get_typed(1)?;
7709 let config_json_str: Option<String> = row.get_typed(5)?;
7710 Ok(Source {
7711 id: row.get_typed(0)?,
7712 kind: SourceKind::parse(&kind_str).unwrap_or_default(),
7713 host_label: row.get_typed(2)?,
7714 machine_id: row.get_typed(3)?,
7715 platform: row.get_typed(4)?,
7716 config_json: config_json_str.and_then(|s| serde_json::from_str(&s).ok()),
7717 created_at: row.get_typed(6)?,
7718 updated_at: row.get_typed(7)?,
7719 })
7720 },
7721 )
7722 .with_context(|| "listing sources")
7723 }
7724
7725 pub fn get_source_ids(&self) -> Result<Vec<String>> {
7727 self.conn
7728 .query_map_collect(
7729 "SELECT id FROM sources WHERE id != 'local' ORDER BY id",
7730 fparams![],
7731 |row| row.get_typed(0),
7732 )
7733 .with_context(|| "listing source ids")
7734 }
7735
7736 pub fn upsert_source(&self, source: &Source) -> Result<()> {
7738 self.invalidate_conversation_source_cache(source.id.as_str());
7739 let now = Self::now_millis();
7740 let kind_str = source.kind.to_string();
7741 let config_json_str = source
7742 .config_json
7743 .as_ref()
7744 .map(serde_json::to_string)
7745 .transpose()?;
7746
7747 self.conn.execute_compat(
7751 "INSERT INTO sources(id, kind, host_label, machine_id, platform, config_json, created_at, updated_at)
7752 VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)
7753 ON CONFLICT(id) DO UPDATE SET
7754 kind = excluded.kind,
7755 host_label = excluded.host_label,
7756 machine_id = excluded.machine_id,
7757 platform = excluded.platform,
7758 config_json = excluded.config_json,
7759 updated_at = excluded.updated_at
7760 WHERE NOT (
7761 sources.kind IS excluded.kind
7762 AND sources.host_label IS excluded.host_label
7763 AND sources.machine_id IS excluded.machine_id
7764 AND sources.platform IS excluded.platform
7765 AND sources.config_json IS excluded.config_json
7766 )",
7767 fparams![
7768 source.id.as_str(),
7769 kind_str.as_str(),
7770 source.host_label.as_deref(),
7771 source.machine_id.as_deref(),
7772 source.platform.as_deref(),
7773 config_json_str.as_deref(),
7774 source.created_at.unwrap_or(now),
7775 now
7776 ],
7777 )?;
7778 Ok(())
7779 }
7780
7781 fn historical_bundle_key_hash(
7782 version: u32,
7783 bundle: &HistoricalDatabaseBundle,
7784 include_bundle_stats: bool,
7785 ) -> String {
7786 let signature = if include_bundle_stats {
7787 format!(
7788 "{}:{}:{}:{}",
7789 version,
7790 bundle.root_path.display(),
7791 bundle.total_bytes,
7792 bundle.modified_at_ms
7793 )
7794 } else {
7795 format!("{}:{}", version, bundle.root_path.display())
7796 };
7797 blake3::hash(signature.as_bytes()).to_hex().to_string()
7798 }
7799
7800 fn historical_bundle_meta_key(bundle: &HistoricalDatabaseBundle) -> String {
7801 format!(
7802 "historical_bundle_salvaged:{}",
7803 Self::historical_bundle_key_hash(HISTORICAL_SALVAGE_LEDGER_VERSION, bundle, false)
7804 )
7805 }
7806
7807 fn historical_bundle_legacy_meta_key(bundle: &HistoricalDatabaseBundle) -> String {
7808 let signature = format!(
7809 "{}:{}:{}:{}",
7810 HISTORICAL_SALVAGE_LEDGER_VERSION,
7811 bundle.root_path.display(),
7812 bundle.total_bytes,
7813 bundle.modified_at_ms
7814 );
7815 format!(
7816 "historical_bundle_salvaged:{}",
7817 blake3::hash(signature.as_bytes()).to_hex()
7818 )
7819 }
7820
7821 fn historical_bundle_progress_key(bundle: &HistoricalDatabaseBundle) -> String {
7822 format!(
7823 "historical_bundle_progress:{}",
7824 Self::historical_bundle_key_hash(HISTORICAL_SALVAGE_PROGRESS_VERSION, bundle, false)
7825 )
7826 }
7827
7828 fn historical_bundle_legacy_progress_key(bundle: &HistoricalDatabaseBundle) -> String {
7829 let signature = format!(
7830 "{}:{}:{}:{}",
7831 HISTORICAL_SALVAGE_PROGRESS_VERSION,
7832 bundle.root_path.display(),
7833 bundle.total_bytes,
7834 bundle.modified_at_ms
7835 );
7836 format!(
7837 "historical_bundle_progress:{}",
7838 blake3::hash(signature.as_bytes()).to_hex()
7839 )
7840 }
7841
7842 fn historical_bundle_already_imported(
7843 &self,
7844 bundle: &HistoricalDatabaseBundle,
7845 ) -> Result<bool> {
7846 for key in [
7847 Self::historical_bundle_meta_key(bundle),
7848 Self::historical_bundle_legacy_meta_key(bundle),
7849 ] {
7850 let existing: Option<String> = self
7851 .conn
7852 .query_row_map(
7853 "SELECT value FROM meta WHERE key = ?1",
7854 fparams![key.as_str()],
7855 |row| row.get_typed(0),
7856 )
7857 .optional()?;
7858 if existing.is_some() {
7859 return Ok(true);
7860 }
7861 }
7862 Ok(false)
7863 }
7864
7865 pub(crate) fn has_pending_historical_bundles(&self, canonical_db_path: &Path) -> Result<bool> {
7866 for bundle in discover_historical_database_bundles(canonical_db_path) {
7867 if !self.historical_bundle_already_imported(&bundle)? {
7868 return Ok(true);
7869 }
7870 }
7871 Ok(false)
7872 }
7873
7874 fn load_historical_bundle_progress(
7875 &self,
7876 bundle: &HistoricalDatabaseBundle,
7877 ) -> Result<Option<HistoricalBundleProgress>> {
7878 for key in [
7879 Self::historical_bundle_progress_key(bundle),
7880 Self::historical_bundle_legacy_progress_key(bundle),
7881 ] {
7882 let raw: Option<String> = self
7883 .conn
7884 .query_row_map(
7885 "SELECT value FROM meta WHERE key = ?1",
7886 fparams![key.as_str()],
7887 |row| row.get_typed(0),
7888 )
7889 .optional()?;
7890 let Some(raw) = raw else {
7891 continue;
7892 };
7893 let parsed: HistoricalBundleProgress =
7894 serde_json::from_str(&raw).with_context(|| {
7895 format!(
7896 "parsing historical salvage progress checkpoint for {}",
7897 bundle.root_path.display()
7898 )
7899 })?;
7900 if parsed.progress_version == HISTORICAL_SALVAGE_PROGRESS_VERSION {
7901 return Ok(Some(parsed));
7902 }
7903 }
7904 Ok(None)
7905 }
7906
7907 fn record_historical_bundle_progress(
7908 &self,
7909 bundle: &HistoricalDatabaseBundle,
7910 method: &str,
7911 last_completed_source_row_id: i64,
7912 conversations_imported: usize,
7913 messages_imported: usize,
7914 ) -> Result<()> {
7915 let key = Self::historical_bundle_progress_key(bundle);
7916 let value = HistoricalBundleProgress {
7917 progress_version: HISTORICAL_SALVAGE_PROGRESS_VERSION,
7918 path: bundle.root_path.display().to_string(),
7919 bytes: bundle.total_bytes,
7920 modified_at_ms: bundle.modified_at_ms,
7921 method: method.to_string(),
7922 last_completed_source_row_id,
7923 conversations_imported,
7924 messages_imported,
7925 updated_at_ms: Self::now_millis(),
7926 };
7927 let value_str = serde_json::to_string(&value)?;
7928 self.conn.execute_compat(
7929 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
7930 fparams![key.as_str(), value_str.as_str()],
7931 )?;
7932 Ok(())
7933 }
7934
7935 fn clear_historical_bundle_progress(&self, bundle: &HistoricalDatabaseBundle) -> Result<()> {
7936 for key in [
7937 Self::historical_bundle_progress_key(bundle),
7938 Self::historical_bundle_legacy_progress_key(bundle),
7939 ] {
7940 self.conn
7941 .execute_compat("DELETE FROM meta WHERE key = ?1", fparams![key.as_str()])?;
7942 }
7943 Ok(())
7944 }
7945
7946 fn record_historical_bundle_import(
7947 &self,
7948 bundle: &HistoricalDatabaseBundle,
7949 method: &str,
7950 conversations_imported: usize,
7951 messages_imported: usize,
7952 ) -> Result<()> {
7953 let key = Self::historical_bundle_meta_key(bundle);
7954 let value = serde_json::json!({
7955 "salvage_version": HISTORICAL_SALVAGE_LEDGER_VERSION,
7956 "path": bundle.root_path.display().to_string(),
7957 "bytes": bundle.total_bytes,
7958 "modified_at_ms": bundle.modified_at_ms,
7959 "method": method,
7960 "conversations_imported": conversations_imported,
7961 "messages_imported": messages_imported,
7962 "recorded_at_ms": Self::now_millis(),
7963 });
7964 let value_str = serde_json::to_string(&value)?;
7965 self.conn.execute_compat(
7966 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
7967 fparams![key.as_str(), value_str.as_str()],
7968 )?;
7969 Ok(())
7970 }
7971
7972 fn historical_import_error_is_split_retryable(err: &anyhow::Error) -> bool {
7973 const RETRYABLE_PATTERNS: &[&str] = &[
7974 "out of memory",
7975 "string or blob too big",
7976 "too many sql variables",
7977 ];
7978 err.chain().any(|cause| {
7979 let rendered = cause.to_string().to_ascii_lowercase();
7980 RETRYABLE_PATTERNS
7981 .iter()
7982 .any(|pattern| rendered.contains(pattern))
7983 })
7984 }
7985
7986 fn split_historical_batch_entry_messages(
7987 entry: &HistoricalBatchEntry,
7988 ) -> Option<(HistoricalBatchEntry, HistoricalBatchEntry)> {
7989 if entry.conversation.messages.len() < 2 {
7990 return None;
7991 }
7992 let split_at = entry.conversation.messages.len() / 2;
7993 if split_at == 0 || split_at >= entry.conversation.messages.len() {
7994 return None;
7995 }
7996
7997 let mut left = entry.clone();
7998 left.conversation.messages = entry.conversation.messages[..split_at].to_vec();
7999
8000 let mut right = entry.clone();
8001 right.conversation.messages = entry.conversation.messages[split_at..].to_vec();
8002
8003 Some((left, right))
8004 }
8005
8006 fn import_historical_batch_with_retry<F>(
8007 entries: &[HistoricalBatchEntry],
8008 insert_batch: &mut F,
8009 ) -> Result<HistoricalBatchImportTotals>
8010 where
8011 F: FnMut(&[HistoricalBatchEntry]) -> Result<HistoricalBatchImportTotals>,
8012 {
8013 match insert_batch(entries) {
8014 Ok(totals) => Ok(totals),
8015 Err(err) if Self::historical_import_error_is_split_retryable(&err) => {
8016 if entries.len() > 1 {
8017 let mid = entries.len() / 2;
8018 tracing::warn!(
8019 batch_entries = entries.len(),
8020 split_left = mid,
8021 split_right = entries.len() - mid,
8022 error = %err,
8023 "historical salvage batch failed; retrying in smaller sub-batches"
8024 );
8025 let left =
8026 Self::import_historical_batch_with_retry(&entries[..mid], insert_batch)?;
8027 let right =
8028 Self::import_historical_batch_with_retry(&entries[mid..], insert_batch)?;
8029 return Ok(HistoricalBatchImportTotals {
8030 inserted_source_rows: left.inserted_source_rows
8031 + right.inserted_source_rows,
8032 inserted_messages: left.inserted_messages + right.inserted_messages,
8033 });
8034 }
8035
8036 if let Some(entry) = entries.first()
8037 && let Some((left, right)) = Self::split_historical_batch_entry_messages(entry)
8038 {
8039 tracing::warn!(
8040 source_row_id = entry.source_row_id,
8041 message_count = entry.conversation.messages.len(),
8042 error = %err,
8043 "historical salvage conversation failed; retrying in smaller message slices"
8044 );
8045 let left_totals = Self::import_historical_batch_with_retry(
8046 std::slice::from_ref(&left),
8047 insert_batch,
8048 )?;
8049 let right_totals = Self::import_historical_batch_with_retry(
8050 std::slice::from_ref(&right),
8051 insert_batch,
8052 )?;
8053 return Ok(HistoricalBatchImportTotals {
8054 inserted_source_rows: usize::from(
8055 left_totals.inserted_source_rows > 0
8056 || right_totals.inserted_source_rows > 0,
8057 ),
8058 inserted_messages: left_totals
8059 .inserted_messages
8060 .saturating_add(right_totals.inserted_messages),
8061 });
8062 }
8063
8064 Err(err)
8065 }
8066 Err(err) => Err(err),
8067 }
8068 }
8069
8070 fn import_historical_sources(&self, source_conn: &FrankenConnection) -> Result<()> {
8071 let sources: Vec<Source> = match source_conn.query_map_collect(
8072 "SELECT id, kind, host_label, machine_id, platform, config_json, created_at, updated_at
8073 FROM sources",
8074 fparams![],
8075 |row| {
8076 let raw_source_id: String = row.get_typed(0)?;
8077 let kind_str: String = row.get_typed(1)?;
8078 let raw_host_label: Option<String> = row.get_typed(2)?;
8079 let config_json_raw: Option<String> = row.get_typed(5)?;
8080 let (source_id, source_kind, host_label) = normalized_storage_source_parts(
8081 Some(raw_source_id.as_str()),
8082 Some(kind_str.as_str()),
8083 raw_host_label.as_deref(),
8084 );
8085 Ok(Source {
8086 id: source_id,
8087 kind: source_kind,
8088 host_label,
8089 machine_id: row.get_typed(3)?,
8090 platform: row.get_typed(4)?,
8091 config_json: config_json_raw.and_then(|raw| serde_json::from_str(&raw).ok()),
8092 created_at: row.get_typed(6)?,
8093 updated_at: row.get_typed(7)?,
8094 })
8095 },
8096 ) {
8097 Ok(rows) => rows,
8098 Err(err) => {
8099 tracing::warn!(error = %err, "historical sources table unavailable; skipping source import");
8100 return Ok(());
8101 }
8102 };
8103
8104 for source in sources {
8105 self.upsert_source(&source)?;
8106 }
8107 Ok(())
8108 }
8109
8110 fn import_historical_conversations(
8111 &self,
8112 bundle: &HistoricalDatabaseBundle,
8113 salvage_method: &str,
8114 source_conn: &FrankenConnection,
8115 ) -> Result<(usize, usize)> {
8116 let batch_limits = historical_import_batch_limits();
8117 let cache_enabled = IndexingCache::is_enabled();
8118 let mut indexing_cache = IndexingCache::new();
8119 let mut known_sources: HashSet<String> = self
8120 .list_sources()?
8121 .into_iter()
8122 .map(|source| source.id)
8123 .collect();
8124 let resume_progress = self.load_historical_bundle_progress(bundle)?;
8125 let resume_after_row_id = resume_progress
8126 .as_ref()
8127 .map(|progress| progress.last_completed_source_row_id)
8128 .filter(|row_id| *row_id > 0);
8129
8130 tracing::info!(
8131 target: "cass::historical_salvage",
8132 batch_conversations = batch_limits.conversations,
8133 batch_messages = batch_limits.messages,
8134 batch_payload_chars = batch_limits.payload_chars,
8135 cache_enabled,
8136 resume_after_row_id,
8137 "configured historical salvage batch limits"
8138 );
8139
8140 if let Some(progress) = &resume_progress {
8141 tracing::info!(
8142 target: "cass::historical_salvage",
8143 path = %bundle.root_path.display(),
8144 resume_after_row_id = progress.last_completed_source_row_id,
8145 prior_conversations_imported = progress.conversations_imported,
8146 prior_messages_imported = progress.messages_imported,
8147 "resuming historical salvage bundle from durable checkpoint"
8148 );
8149 }
8150
8151 let conv_sql = if resume_after_row_id.is_some() {
8157 "SELECT
8158 c.id,
8159 COALESCE(a.slug, 'unknown'),
8160 w.path,
8161 c.external_id,
8162 c.title,
8163 c.source_path,
8164 c.started_at,
8165 c.ended_at,
8166 c.approx_tokens,
8167 c.metadata_json,
8168 c.source_id,
8169 c.origin_host
8170 FROM conversations c
8171 LEFT JOIN agents a ON c.agent_id = a.id
8172 LEFT JOIN workspaces w ON c.workspace_id = w.id
8173 WHERE c.id > ?1
8174 ORDER BY c.id"
8175 } else {
8176 "SELECT
8177 c.id,
8178 COALESCE(a.slug, 'unknown'),
8179 w.path,
8180 c.external_id,
8181 c.title,
8182 c.source_path,
8183 c.started_at,
8184 c.ended_at,
8185 c.approx_tokens,
8186 c.metadata_json,
8187 c.source_id,
8188 c.origin_host
8189 FROM conversations c
8190 LEFT JOIN agents a ON c.agent_id = a.id
8191 LEFT JOIN workspaces w ON c.workspace_id = w.id
8192 ORDER BY c.id"
8193 };
8194 let conv_params: &[ParamValue] =
8195 if let Some(last_completed_source_row_id) = resume_after_row_id {
8196 &[ParamValue::from(last_completed_source_row_id)]
8197 } else {
8198 &[]
8199 };
8200
8201 #[allow(clippy::type_complexity)]
8202 let conv_rows: Vec<(
8203 i64,
8204 String,
8205 Option<String>,
8206 Option<String>,
8207 Option<String>,
8208 String,
8209 Option<i64>,
8210 Option<i64>,
8211 Option<i64>,
8212 Option<String>,
8213 Option<String>,
8214 Option<String>,
8215 )> = source_conn
8216 .query_map_collect(conv_sql, conv_params, |row| {
8217 Ok((
8218 row.get_typed::<i64>(0)?,
8219 row.get_typed::<String>(1)?,
8220 row.get_typed::<Option<String>>(2)?,
8221 row.get_typed::<Option<String>>(3)?,
8222 row.get_typed::<Option<String>>(4)?,
8223 row.get_typed::<String>(5)?,
8224 row.get_typed::<Option<i64>>(6)?,
8225 row.get_typed::<Option<i64>>(7)?,
8226 row.get_typed::<Option<i64>>(8)?,
8227 row.get_typed::<Option<String>>(9)?,
8228 row.get_typed::<Option<String>>(10)?,
8229 row.get_typed::<Option<String>>(11)?,
8230 ))
8231 })
8232 .context("querying historical conversations")?;
8233
8234 let msg_sql = "SELECT idx, role, author, created_at, content, extra_json
8235 FROM messages
8236 WHERE conversation_id = ?1
8237 ORDER BY idx";
8238
8239 let mut imported_conversations = resume_progress
8240 .as_ref()
8241 .map(|progress| progress.conversations_imported)
8242 .unwrap_or(0);
8243 let mut imported_messages = resume_progress
8244 .as_ref()
8245 .map(|progress| progress.messages_imported)
8246 .unwrap_or(0);
8247 let mut pending_batch: Vec<HistoricalBatchEntry> = Vec::new();
8248 let mut pending_batch_messages = 0usize;
8249 let mut pending_batch_chars = 0usize;
8250 let mut pending_batch_first_row_id: Option<i64> = None;
8251 let mut pending_batch_last_row_id: Option<i64> = None;
8252
8253 let flush_batch = |storage: &FrankenStorage,
8254 batch: &mut Vec<HistoricalBatchEntry>,
8255 pending_messages: &mut usize,
8256 pending_chars: &mut usize,
8257 first_row_id: &mut Option<i64>,
8258 last_row_id: &mut Option<i64>,
8259 imported_conversations: &mut usize,
8260 imported_messages: &mut usize|
8261 -> Result<()> {
8262 if batch.is_empty() {
8263 return Ok(());
8264 }
8265
8266 let batch_first_row_id = *first_row_id;
8267 let batch_last_row_id = *last_row_id;
8268 if historical_salvage_debug_enabled() {
8269 eprintln!(
8270 "[historical-salvage] flushing batch rows {:?}..{:?} conversations={} messages={} payload_chars={}",
8271 batch_first_row_id,
8272 batch_last_row_id,
8273 batch.len(),
8274 *pending_messages,
8275 *pending_chars
8276 );
8277 }
8278 tracing::info!(
8279 target: "cass::historical_salvage",
8280 batch_conversations = batch.len(),
8281 batch_messages = *pending_messages,
8282 batch_payload_chars = *pending_chars,
8283 first_source_row_id = batch_first_row_id,
8284 last_source_row_id = batch_last_row_id,
8285 "flushing historical salvage batch"
8286 );
8287
8288 let mut insert_batch =
8289 |entries: &[HistoricalBatchEntry]| -> Result<HistoricalBatchImportTotals> {
8290 let borrowed_batch: Vec<(i64, Option<i64>, &Conversation)> = entries
8291 .iter()
8292 .map(|entry| (entry.agent_id, entry.workspace_id, &entry.conversation))
8293 .collect();
8294 let outcomes = storage
8295 .insert_conversations_batched(&borrowed_batch)
8296 .with_context(|| {
8297 let first_source_row_id =
8298 entries.first().map(|entry| entry.source_row_id);
8299 let last_source_row_id =
8300 entries.last().map(|entry| entry.source_row_id);
8301 format!(
8302 "inserting historical salvage batch source rows {:?}..{:?}",
8303 first_source_row_id, last_source_row_id
8304 )
8305 })?;
8306 let mut totals = HistoricalBatchImportTotals::default();
8307 for outcome in outcomes {
8308 if !outcome.inserted_indices.is_empty() {
8309 totals.inserted_source_rows += 1;
8310 totals.inserted_messages += outcome.inserted_indices.len();
8311 }
8312 }
8313 Ok(totals)
8314 };
8315 let totals =
8316 Self::import_historical_batch_with_retry(batch.as_slice(), &mut insert_batch)?;
8317 *imported_conversations =
8318 (*imported_conversations).saturating_add(totals.inserted_source_rows);
8319 *imported_messages = (*imported_messages).saturating_add(totals.inserted_messages);
8320 if let Some(last_completed_row_id) = batch_last_row_id {
8321 storage.record_historical_bundle_progress(
8322 bundle,
8323 salvage_method,
8324 last_completed_row_id,
8325 *imported_conversations,
8326 *imported_messages,
8327 )?;
8328 }
8329 tracing::info!(
8330 target: "cass::historical_salvage",
8331 batch_conversations = batch.len(),
8332 batch_messages = *pending_messages,
8333 imported_conversations = *imported_conversations,
8334 imported_messages = *imported_messages,
8335 first_source_row_id = batch_first_row_id,
8336 last_source_row_id = batch_last_row_id,
8337 "historical salvage batch committed"
8338 );
8339 if historical_salvage_debug_enabled() {
8340 eprintln!(
8341 "[historical-salvage] committed batch rows {:?}..{:?} imported_conversations={} imported_messages={}",
8342 batch_first_row_id,
8343 batch_last_row_id,
8344 *imported_conversations,
8345 *imported_messages
8346 );
8347 }
8348 batch.clear();
8349 *pending_messages = 0;
8350 *pending_chars = 0;
8351 *first_row_id = None;
8352 *last_row_id = None;
8353 Ok(())
8354 };
8355
8356 for (
8357 conversation_row_id,
8358 agent_slug,
8359 workspace_path,
8360 external_id,
8361 title,
8362 source_path,
8363 started_at,
8364 ended_at,
8365 approx_tokens,
8366 metadata_json_raw,
8367 raw_source_id,
8368 raw_origin_host,
8369 ) in conv_rows
8370 {
8371 let source_id = crate::search::tantivy::normalized_index_source_id(
8372 raw_source_id.as_deref(),
8373 None,
8374 raw_origin_host.as_deref(),
8375 );
8376 let origin_host =
8377 crate::search::tantivy::normalized_index_origin_host(raw_origin_host.as_deref());
8378
8379 let messages: Vec<Message> = source_conn
8380 .query_map_collect(msg_sql, fparams![conversation_row_id], |msg_row| {
8381 let role: String = msg_row.get_typed(1)?;
8382 Ok(Message {
8383 id: None,
8384 idx: msg_row.get_typed(0)?,
8385 role: match role.as_str() {
8386 "user" => MessageRole::User,
8387 "agent" | "assistant" => MessageRole::Agent,
8388 "tool" => MessageRole::Tool,
8389 "system" => MessageRole::System,
8390 other => MessageRole::Other(other.to_string()),
8391 },
8392 author: msg_row.get_typed(2)?,
8393 created_at: msg_row.get_typed(3)?,
8394 content: msg_row.get_typed(4)?,
8395 extra_json: parse_historical_json_column(msg_row.get_typed(5)?),
8396 snippets: Vec::new(),
8397 })
8398 })
8399 .context("collecting historical message rows")?;
8400
8401 if messages.is_empty() {
8402 continue;
8403 }
8404
8405 let conversation_message_count = messages.len();
8406 let conversation_chars = messages
8407 .iter()
8408 .map(message_payload_size_hint)
8409 .sum::<usize>();
8410
8411 let conversation = Conversation {
8412 id: None,
8413 agent_slug: agent_slug.clone(),
8414 workspace: workspace_path.map(PathBuf::from),
8415 external_id,
8416 title,
8417 source_path: PathBuf::from(source_path),
8418 started_at,
8419 ended_at,
8420 approx_tokens,
8421 metadata_json: parse_json_column(metadata_json_raw),
8422 messages,
8423 source_id,
8424 origin_host,
8425 };
8426
8427 if !known_sources.contains(&conversation.source_id) {
8428 let placeholder = if conversation.source_id == LOCAL_SOURCE_ID {
8429 Source::local()
8430 } else {
8431 Source {
8432 id: conversation.source_id.clone(),
8433 kind: SourceKind::Ssh,
8434 host_label: conversation.origin_host.clone(),
8435 machine_id: None,
8436 platform: None,
8437 config_json: None,
8438 created_at: None,
8439 updated_at: None,
8440 }
8441 };
8442 self.upsert_source(&placeholder)?;
8443 known_sources.insert(conversation.source_id.clone());
8444 }
8445
8446 let agent = Agent {
8447 id: None,
8448 slug: agent_slug.clone(),
8449 name: agent_slug,
8450 version: None,
8451 kind: AgentKind::Cli,
8452 };
8453 let agent_id = if cache_enabled {
8454 indexing_cache.get_or_insert_agent(self, &agent)?
8455 } else {
8456 self.ensure_agent(&agent)?
8457 };
8458 let workspace_id = if let Some(workspace) = &conversation.workspace {
8459 if cache_enabled {
8460 Some(indexing_cache.get_or_insert_workspace(self, workspace, None)?)
8461 } else {
8462 Some(self.ensure_workspace(workspace, None)?)
8463 }
8464 } else {
8465 None
8466 };
8467
8468 let exceeds_pending_limits = !pending_batch.is_empty()
8469 && (pending_batch.len() >= batch_limits.conversations
8470 || pending_batch_messages.saturating_add(conversation_message_count)
8471 > batch_limits.messages
8472 || pending_batch_chars.saturating_add(conversation_chars)
8473 > batch_limits.payload_chars);
8474 if exceeds_pending_limits {
8475 flush_batch(
8476 self,
8477 &mut pending_batch,
8478 &mut pending_batch_messages,
8479 &mut pending_batch_chars,
8480 &mut pending_batch_first_row_id,
8481 &mut pending_batch_last_row_id,
8482 &mut imported_conversations,
8483 &mut imported_messages,
8484 )?;
8485 }
8486
8487 if pending_batch_first_row_id.is_none() {
8488 pending_batch_first_row_id = Some(conversation_row_id);
8489 }
8490 pending_batch_last_row_id = Some(conversation_row_id);
8491 pending_batch_messages =
8492 pending_batch_messages.saturating_add(conversation_message_count);
8493 pending_batch_chars = pending_batch_chars.saturating_add(conversation_chars);
8494 pending_batch.push(HistoricalBatchEntry {
8495 source_row_id: conversation_row_id,
8496 agent_id,
8497 workspace_id,
8498 conversation,
8499 });
8500
8501 if pending_batch.len() >= batch_limits.conversations
8502 || pending_batch_messages >= batch_limits.messages
8503 || pending_batch_chars >= batch_limits.payload_chars
8504 {
8505 flush_batch(
8506 self,
8507 &mut pending_batch,
8508 &mut pending_batch_messages,
8509 &mut pending_batch_chars,
8510 &mut pending_batch_first_row_id,
8511 &mut pending_batch_last_row_id,
8512 &mut imported_conversations,
8513 &mut imported_messages,
8514 )?;
8515 }
8516 }
8517
8518 flush_batch(
8519 self,
8520 &mut pending_batch,
8521 &mut pending_batch_messages,
8522 &mut pending_batch_chars,
8523 &mut pending_batch_first_row_id,
8524 &mut pending_batch_last_row_id,
8525 &mut imported_conversations,
8526 &mut imported_messages,
8527 )?;
8528
8529 if cache_enabled {
8530 let (hits, misses, hit_rate) = indexing_cache.stats();
8531 tracing::info!(
8532 target: "cass::historical_salvage",
8533 hits,
8534 misses,
8535 hit_rate = format!("{:.1}%", hit_rate * 100.0),
8536 agents = indexing_cache.agent_count(),
8537 workspaces = indexing_cache.workspace_count(),
8538 sources = known_sources.len(),
8539 "historical salvage cache stats"
8540 );
8541 }
8542
8543 Ok((imported_conversations, imported_messages))
8544 }
8545
8546 pub fn salvage_historical_databases(
8547 &self,
8548 canonical_db_path: &Path,
8549 ) -> Result<HistoricalSalvageOutcome> {
8550 let ordered_bundles = discover_historical_database_bundles(canonical_db_path);
8551 let mut outcome = HistoricalSalvageOutcome {
8552 bundles_considered: ordered_bundles.len(),
8553 ..HistoricalSalvageOutcome::default()
8554 };
8555
8556 for bundle in ordered_bundles {
8557 if self.historical_bundle_already_imported(&bundle)? {
8558 self.clear_historical_bundle_progress(&bundle)?;
8559 continue;
8560 }
8561
8562 let source = match open_historical_bundle_for_salvage(&bundle).with_context(|| {
8563 format!(
8564 "opening historical bundle {} for salvage",
8565 bundle.root_path.display()
8566 )
8567 }) {
8568 Ok(source) => source,
8569 Err(err) => {
8570 tracing::warn!(
8571 path = %bundle.root_path.display(),
8572 error = %err,
8573 "skipping unreadable historical cass database bundle during salvage"
8574 );
8575 self.clear_historical_bundle_progress(&bundle)?;
8576 continue;
8577 }
8578 };
8579
8580 if let Some(progress) = self.load_historical_bundle_progress(&bundle)? {
8588 let backup_max_conversation_id: i64 = source
8589 .conn
8590 .query_row_map(
8591 "SELECT COALESCE(MAX(id), 0) FROM conversations",
8592 fparams![],
8593 |row| row.get_typed(0),
8594 )
8595 .unwrap_or(0);
8596 if backup_max_conversation_id > 0
8597 && progress.last_completed_source_row_id >= backup_max_conversation_id
8598 {
8599 self.record_historical_bundle_import(
8600 &bundle,
8601 source.method,
8602 progress.conversations_imported,
8603 progress.messages_imported,
8604 )?;
8605 self.clear_historical_bundle_progress(&bundle)?;
8606 tracing::info!(
8607 path = %bundle.root_path.display(),
8608 last_completed_source_row_id = progress.last_completed_source_row_id,
8609 backup_max_conversation_id,
8610 conversations_imported = progress.conversations_imported,
8611 messages_imported = progress.messages_imported,
8612 "historical bundle already fully imported per checkpoint; marking salvaged and skipping O(n) re-scan"
8613 );
8614 continue;
8615 }
8616 }
8617
8618 self.import_historical_sources(&source.conn)?;
8619 let (imported_conversations, imported_messages) =
8620 self.import_historical_conversations(&bundle, source.method, &source.conn)?;
8621 self.record_historical_bundle_import(
8622 &bundle,
8623 source.method,
8624 imported_conversations,
8625 imported_messages,
8626 )?;
8627 self.clear_historical_bundle_progress(&bundle)?;
8628
8629 outcome.bundles_imported += 1;
8630 outcome.conversations_imported += imported_conversations;
8631 outcome.messages_imported += imported_messages;
8632
8633 tracing::info!(
8634 path = %bundle.root_path.display(),
8635 bytes = bundle.total_bytes,
8636 method = source.method,
8637 imported_conversations,
8638 imported_messages,
8639 "salvaged historical cass database bundle"
8640 );
8641 }
8642
8643 Ok(outcome)
8644 }
8645
8646 pub fn delete_source(&self, id: &str, _cascade: bool) -> Result<bool> {
8648 if id == LOCAL_SOURCE_ID {
8649 anyhow::bail!("cannot delete the local source");
8650 }
8651 let count = self
8652 .conn
8653 .execute_compat("DELETE FROM sources WHERE id = ?1", fparams![id])?;
8654 if count > 0 {
8655 self.invalidate_conversation_source_cache(id);
8656 }
8657 Ok(count > 0)
8658 }
8659
8660 pub fn insert_conversation_tree(
8662 &self,
8663 agent_id: i64,
8664 workspace_id: Option<i64>,
8665 conv: &Conversation,
8666 ) -> Result<InsertOutcome> {
8667 let normalized_conv = normalized_conversation_for_storage(conv);
8668 let conv = normalized_conv.as_ref();
8669 self.ensure_source_for_conversation(conv)?;
8670 let defer_lexical_updates = defer_storage_lexical_updates_enabled();
8671 let defer_analytics_updates = defer_analytics_updates_enabled();
8672 let conversation_key = conversation_merge_key(agent_id, conv);
8673 let mut tx = self.conn.transaction()?;
8674 let existing = franken_find_existing_conversation_with_tail_by_key(
8675 &tx,
8676 &conversation_key,
8677 Some(conv),
8678 )?;
8679 if let Some(existing) = existing {
8680 let outcome = self.franken_append_messages_with_tail_in_tx(
8681 &tx,
8682 agent_id,
8683 existing.id,
8684 conv,
8685 existing.tail_state,
8686 defer_lexical_updates,
8687 defer_analytics_updates,
8688 )?;
8689 tx.commit()?;
8690 return Ok(outcome);
8691 }
8692
8693 let conv_id = match franken_insert_conversation_or_get_existing_after_miss(
8694 &tx,
8695 agent_id,
8696 workspace_id,
8697 conv,
8698 &conversation_key,
8699 )? {
8700 ConversationInsertStatus::Inserted(conv_id) => conv_id,
8701 ConversationInsertStatus::Existing(existing_id) => {
8702 let ExistingMessageLookup {
8703 by_idx: mut existing_messages,
8704 replay: mut existing_replay_fingerprints,
8705 } = franken_existing_message_lookup(&tx, existing_id, &conv.messages)?;
8706 let ExistingConversationNewMessages {
8707 messages: new_messages,
8708 new_chars,
8709 idx_collision_count,
8710 first_collision_idx,
8711 } = collect_new_messages_for_existing_conversation(
8712 existing_id,
8713 conv,
8714 &mut existing_messages,
8715 &mut existing_replay_fingerprints,
8716 "skipping replay-equivalent recovered message with shifted idx",
8717 );
8718 let (inserted_last_idx, inserted_last_created_at) =
8719 borrowed_messages_tail_state(&new_messages);
8720 let mut inserted_indices = Vec::new();
8721 let mut fts_entries = Vec::new();
8722 let mut fts_pending_chars = 0usize;
8723 let mut _fts_inserted_total = 0usize;
8724 let inserted_message_ids =
8725 franken_append_insert_new_messages(&tx, existing_id, &new_messages)?;
8726 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
8727 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
8728 if !defer_lexical_updates {
8729 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
8730 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
8731 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
8732 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
8733 {
8734 flush_pending_fts_entries(
8735 self,
8736 &tx,
8737 &mut fts_entries,
8738 &mut fts_pending_chars,
8739 &mut _fts_inserted_total,
8740 )?;
8741 }
8742 }
8743 inserted_indices.push(msg.idx);
8744 }
8745
8746 if idx_collision_count > 0 {
8747 tracing::warn!(
8748 conversation_id = existing_id,
8749 collision_count = idx_collision_count,
8750 first_idx = first_collision_idx,
8751 source_path = %conv.source_path.display(),
8752 "message idx collisions encountered while merging recovered conversation; retaining canonical message variants"
8753 );
8754 }
8755
8756 if !defer_lexical_updates {
8757 flush_pending_fts_entries(
8758 self,
8759 &tx,
8760 &mut fts_entries,
8761 &mut fts_pending_chars,
8762 &mut _fts_inserted_total,
8763 )?;
8764 }
8765
8766 let conv_last_ts = conv.messages.iter().filter_map(|m| m.created_at).max();
8767 franken_update_conversation_tail_state(
8768 &tx,
8769 existing_id,
8770 conv_last_ts,
8771 inserted_last_idx,
8772 inserted_last_created_at,
8773 )?;
8774 if let Some(lookup_key) = conversation_external_lookup_key_for_conv(agent_id, conv)
8775 {
8776 franken_update_external_conversation_tail_lookup_key(
8777 &tx,
8778 &lookup_key,
8779 conv_last_ts,
8780 inserted_last_idx,
8781 inserted_last_created_at,
8782 )?;
8783 }
8784
8785 if !defer_analytics_updates && !inserted_indices.is_empty() {
8786 franken_update_daily_stats_in_tx(
8787 self,
8788 &tx,
8789 &conv.agent_slug,
8790 &conv.source_id,
8791 conversation_effective_started_at(conv),
8792 StatsDelta {
8793 session_count_delta: 0,
8794 message_count_delta: inserted_indices.len() as i64,
8795 total_chars_delta: new_chars,
8796 },
8797 )?;
8798 }
8799
8800 tx.commit()?;
8801 return Ok(InsertOutcome {
8802 conversation_id: existing_id,
8803 conversation_inserted: false,
8804 inserted_indices,
8805 });
8806 }
8807 };
8808 let mut fts_entries = Vec::new();
8809 let mut fts_pending_chars = 0usize;
8810 let mut _fts_inserted_total = 0usize;
8811 let mut total_chars: i64 = 0;
8812 let mut inserted_indices = Vec::new();
8813 let mut pending_messages = HashMap::new();
8814 let mut pending_replay_fingerprints = HashSet::new();
8815 let mut idx_collision_count = 0usize;
8816 let mut first_collision_idx: Option<i64> = None;
8817 let mut new_messages = Vec::new();
8818 for msg in &conv.messages {
8819 let incoming_fingerprint = message_merge_fingerprint(msg);
8820 if let Some(existing_fingerprint) = pending_messages.get(&msg.idx) {
8821 if existing_fingerprint != &incoming_fingerprint {
8822 idx_collision_count = idx_collision_count.saturating_add(1);
8823 first_collision_idx.get_or_insert(msg.idx);
8824 }
8825 continue;
8826 }
8827 let incoming_replay = message_replay_fingerprint(msg);
8828 if pending_replay_fingerprints.contains(&incoming_replay) {
8829 tracing::debug!(
8830 conversation_id = conv_id,
8831 idx = msg.idx,
8832 source_path = %conv.source_path.display(),
8833 "skipping replay-equivalent duplicate message within new conversation insert"
8834 );
8835 continue;
8836 }
8837 pending_messages.insert(msg.idx, incoming_fingerprint);
8838 pending_replay_fingerprints.insert(incoming_replay);
8839 new_messages.push(msg);
8840 }
8841 let inserted_message_ids = franken_batch_insert_new_messages(&tx, conv_id, &new_messages)?;
8842 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
8843 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
8844 if !defer_lexical_updates {
8845 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
8846 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
8847 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
8848 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
8849 {
8850 flush_pending_fts_entries(
8851 self,
8852 &tx,
8853 &mut fts_entries,
8854 &mut fts_pending_chars,
8855 &mut _fts_inserted_total,
8856 )?;
8857 }
8858 }
8859 total_chars += msg.content.len() as i64;
8860 inserted_indices.push(msg.idx);
8861 }
8862 if idx_collision_count > 0 {
8863 tracing::warn!(
8864 conversation_id = conv_id,
8865 collision_count = idx_collision_count,
8866 first_idx = first_collision_idx,
8867 source_path = %conv.source_path.display(),
8868 "message idx collisions encountered while inserting a new conversation; retaining the first canonical variant per idx"
8869 );
8870 }
8871 if !defer_lexical_updates {
8872 flush_pending_fts_entries(
8873 self,
8874 &tx,
8875 &mut fts_entries,
8876 &mut fts_pending_chars,
8877 &mut _fts_inserted_total,
8878 )?;
8879 }
8880
8881 if !defer_analytics_updates {
8882 franken_update_daily_stats_in_tx(
8883 self,
8884 &tx,
8885 &conv.agent_slug,
8886 &conv.source_id,
8887 conversation_effective_started_at(conv),
8888 StatsDelta {
8889 session_count_delta: 1,
8890 message_count_delta: inserted_indices.len() as i64,
8891 total_chars_delta: total_chars,
8892 },
8893 )?;
8894 }
8895
8896 tx.commit()?;
8897 Ok(InsertOutcome {
8898 conversation_id: conv_id,
8899 conversation_inserted: true,
8900 inserted_indices,
8901 })
8902 }
8903
8904 #[cfg(test)]
8905 fn insert_conversation_tree_with_profile(
8906 &self,
8907 agent_id: i64,
8908 workspace_id: Option<i64>,
8909 conv: &Conversation,
8910 profile: &mut InsertConversationTreePerfProfile,
8911 ) -> Result<InsertOutcome> {
8912 let total_start = Instant::now();
8913 let normalized_conv = normalized_conversation_for_storage(conv);
8914 let conv = normalized_conv.as_ref();
8915
8916 let source_start = Instant::now();
8917 self.ensure_source_for_conversation(conv)?;
8918 profile.source_duration += source_start.elapsed();
8919
8920 let defer_lexical_updates = defer_storage_lexical_updates_enabled();
8921 let defer_analytics_updates = defer_analytics_updates_enabled();
8922 let conversation_key = conversation_merge_key(agent_id, conv);
8923
8924 let tx_open_start = Instant::now();
8925 let mut tx = self.conn.transaction()?;
8926 profile.tx_open_duration += tx_open_start.elapsed();
8927
8928 let existing_lookup_start = Instant::now();
8929 let existing =
8930 franken_find_existing_conversation_by_key(&tx, &conversation_key, Some(conv))?;
8931 profile.existing_lookup_duration += existing_lookup_start.elapsed();
8932 if let Some(existing_id) = existing {
8933 return Err(anyhow!(
8934 "profile helper expects new conversation path, found existing id {existing_id}"
8935 ));
8936 }
8937
8938 let conversation_row_start = Instant::now();
8939 let conv_id = match franken_insert_conversation_or_get_existing_after_miss(
8940 &tx,
8941 agent_id,
8942 workspace_id,
8943 conv,
8944 &conversation_key,
8945 )? {
8946 ConversationInsertStatus::Inserted(conv_id) => conv_id,
8947 ConversationInsertStatus::Existing(existing_id) => {
8948 return Err(anyhow!(
8949 "profile helper expected inserted conversation row, reused existing id {existing_id}"
8950 ));
8951 }
8952 };
8953 profile.conversation_row_duration += conversation_row_start.elapsed();
8954
8955 let mut fts_entries = Vec::new();
8956 let mut fts_pending_chars = 0usize;
8957 let mut fts_inserted_total = 0usize;
8958 let mut total_chars: i64 = 0;
8959 let mut inserted_indices = Vec::new();
8960 let mut pending_messages = HashMap::new();
8961 let mut pending_replay_fingerprints = HashSet::new();
8962 let mut idx_collision_count = 0usize;
8963 let mut first_collision_idx: Option<i64> = None;
8964 let mut new_messages = Vec::new();
8965
8966 for msg in &conv.messages {
8967 let incoming_fingerprint = message_merge_fingerprint(msg);
8968 if let Some(existing_fingerprint) = pending_messages.get(&msg.idx) {
8969 if existing_fingerprint != &incoming_fingerprint {
8970 idx_collision_count = idx_collision_count.saturating_add(1);
8971 first_collision_idx.get_or_insert(msg.idx);
8972 }
8973 continue;
8974 }
8975
8976 let incoming_replay = message_replay_fingerprint(msg);
8977 if pending_replay_fingerprints.contains(&incoming_replay) {
8978 tracing::debug!(
8979 conversation_id = conv_id,
8980 idx = msg.idx,
8981 source_path = %conv.source_path.display(),
8982 "skipping replay-equivalent duplicate message within profiled new conversation insert"
8983 );
8984 continue;
8985 }
8986
8987 pending_messages.insert(msg.idx, incoming_fingerprint);
8988 pending_replay_fingerprints.insert(incoming_replay);
8989 new_messages.push(msg);
8990 }
8991
8992 let message_insert_start = Instant::now();
8993 let inserted_message_ids = franken_batch_insert_new_messages_with_profile(
8994 &tx,
8995 conv_id,
8996 &new_messages,
8997 &mut profile.message_insert_breakdown,
8998 )?;
8999 profile.message_insert_duration += message_insert_start.elapsed();
9000
9001 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
9002 let snippet_insert_start = Instant::now();
9003 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
9004 profile.snippet_insert_duration += snippet_insert_start.elapsed();
9005
9006 if !defer_lexical_updates {
9007 let fts_entry_start = Instant::now();
9008 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
9009 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
9010 profile.fts_entry_duration += fts_entry_start.elapsed();
9011 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
9012 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
9013 {
9014 let fts_flush_start = Instant::now();
9015 flush_pending_fts_entries(
9016 self,
9017 &tx,
9018 &mut fts_entries,
9019 &mut fts_pending_chars,
9020 &mut fts_inserted_total,
9021 )?;
9022 profile.fts_flush_duration += fts_flush_start.elapsed();
9023 }
9024 }
9025
9026 total_chars += msg.content.len() as i64;
9027 inserted_indices.push(msg.idx);
9028 }
9029
9030 if idx_collision_count > 0 {
9031 tracing::warn!(
9032 conversation_id = conv_id,
9033 collision_count = idx_collision_count,
9034 first_idx = first_collision_idx,
9035 source_path = %conv.source_path.display(),
9036 "message idx collisions encountered while profiling a new conversation insert; retaining the first canonical variant per idx"
9037 );
9038 }
9039
9040 if !defer_lexical_updates {
9041 let fts_flush_start = Instant::now();
9042 flush_pending_fts_entries(
9043 self,
9044 &tx,
9045 &mut fts_entries,
9046 &mut fts_pending_chars,
9047 &mut fts_inserted_total,
9048 )?;
9049 profile.fts_flush_duration += fts_flush_start.elapsed();
9050 }
9051
9052 if !defer_analytics_updates {
9053 let analytics_start = Instant::now();
9054 franken_update_daily_stats_in_tx(
9055 self,
9056 &tx,
9057 &conv.agent_slug,
9058 &conv.source_id,
9059 conversation_effective_started_at(conv),
9060 StatsDelta {
9061 session_count_delta: 1,
9062 message_count_delta: inserted_indices.len() as i64,
9063 total_chars_delta: total_chars,
9064 },
9065 )?;
9066 profile.analytics_duration += analytics_start.elapsed();
9067 }
9068
9069 let commit_start = Instant::now();
9070 tx.commit()?;
9071 profile.commit_duration += commit_start.elapsed();
9072 profile.invocations += 1;
9073 profile.messages += conv.messages.len();
9074 profile.inserted_messages += inserted_indices.len();
9075 profile.total_duration += total_start.elapsed();
9076
9077 Ok(InsertOutcome {
9078 conversation_id: conv_id,
9079 conversation_inserted: true,
9080 inserted_indices,
9081 })
9082 }
9083
9084 #[cfg(test)]
9085 fn append_existing_conversation_with_profile(
9086 &self,
9087 agent_id: i64,
9088 _workspace_id: Option<i64>,
9089 conv: &Conversation,
9090 profile: &mut InsertConversationTreePerfProfile,
9091 ) -> Result<InsertOutcome> {
9092 let total_start = Instant::now();
9093 let normalized_conv = normalized_conversation_for_storage(conv);
9094 let conv = normalized_conv.as_ref();
9095
9096 let source_start = Instant::now();
9097 self.ensure_source_for_conversation(conv)?;
9098 profile.source_duration += source_start.elapsed();
9099
9100 let defer_lexical_updates = defer_storage_lexical_updates_enabled();
9101 let defer_analytics_updates = defer_analytics_updates_enabled();
9102 let conversation_key = conversation_merge_key(agent_id, conv);
9103
9104 let tx_open_start = Instant::now();
9105 let mut tx = self.conn.transaction()?;
9106 profile.tx_open_duration += tx_open_start.elapsed();
9107
9108 let existing_lookup_start = Instant::now();
9109 let existing = franken_find_existing_conversation_with_tail_by_key(
9110 &tx,
9111 &conversation_key,
9112 Some(conv),
9113 )?;
9114 profile.existing_lookup_duration += existing_lookup_start.elapsed();
9115 let existing = existing.ok_or_else(|| {
9116 anyhow!("append profile helper expects existing conversation for {conversation_key:?}")
9117 })?;
9118 let existing_id = existing.id;
9119
9120 let existing_idx_lookup_start = Instant::now();
9121 let append_tail_state = existing.tail_state;
9122 let append_tail_ended_at = append_tail_state.and_then(|state| state.ended_at);
9123 let existing_plan = append_tail_state.as_ref().and_then(|state| {
9124 collect_append_only_tail_messages(
9125 conv,
9126 state.last_message_idx,
9127 state.last_message_created_at,
9128 )
9129 });
9130 let used_append_tail_plan = existing_plan.is_some();
9131 profile.existing_idx_lookup_duration += existing_idx_lookup_start.elapsed();
9132
9133 let dedupe_filter_start = Instant::now();
9134 let ExistingConversationNewMessages {
9135 messages: new_messages,
9136 new_chars,
9137 idx_collision_count,
9138 first_collision_idx,
9139 } = if let Some(existing_plan) = existing_plan {
9140 existing_plan
9141 } else {
9142 let ExistingMessageLookup {
9143 by_idx: mut existing_messages,
9144 replay: mut existing_replay_fingerprints,
9145 } = franken_existing_message_lookup(&tx, existing_id, &conv.messages)?;
9146 collect_new_messages_for_existing_conversation(
9147 existing_id,
9148 conv,
9149 &mut existing_messages,
9150 &mut existing_replay_fingerprints,
9151 "skipping replay-equivalent profiled append message with shifted idx",
9152 )
9153 };
9154 profile.dedupe_filter_duration += dedupe_filter_start.elapsed();
9155
9156 let mut inserted_indices = Vec::new();
9157 let mut fts_entries = Vec::new();
9158 let mut fts_pending_chars = 0usize;
9159 let mut fts_inserted_total = 0usize;
9160 let (inserted_last_idx, inserted_last_created_at) =
9161 borrowed_messages_tail_state(&new_messages);
9162
9163 let message_insert_start = Instant::now();
9164 let inserted_message_ids = franken_append_insert_new_messages_with_profile(
9165 &tx,
9166 existing_id,
9167 &new_messages,
9168 &mut profile.message_insert_breakdown,
9169 )?;
9170 profile.message_insert_duration += message_insert_start.elapsed();
9171
9172 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
9173 let snippet_insert_start = Instant::now();
9174 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
9175 profile.snippet_insert_duration += snippet_insert_start.elapsed();
9176
9177 if !defer_lexical_updates {
9178 let fts_entry_start = Instant::now();
9179 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
9180 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
9181 profile.fts_entry_duration += fts_entry_start.elapsed();
9182 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
9183 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
9184 {
9185 let fts_flush_start = Instant::now();
9186 flush_pending_fts_entries(
9187 self,
9188 &tx,
9189 &mut fts_entries,
9190 &mut fts_pending_chars,
9191 &mut fts_inserted_total,
9192 )?;
9193 profile.fts_flush_duration += fts_flush_start.elapsed();
9194 }
9195 }
9196
9197 inserted_indices.push(msg.idx);
9198 }
9199
9200 if idx_collision_count > 0 {
9201 tracing::warn!(
9202 conversation_id = existing_id,
9203 collision_count = idx_collision_count,
9204 first_idx = first_collision_idx,
9205 source_path = %conv.source_path.display(),
9206 "message idx collisions encountered while profiling append merge; retaining canonical message variants"
9207 );
9208 }
9209
9210 if !defer_lexical_updates {
9211 let fts_flush_start = Instant::now();
9212 flush_pending_fts_entries(
9213 self,
9214 &tx,
9215 &mut fts_entries,
9216 &mut fts_pending_chars,
9217 &mut fts_inserted_total,
9218 )?;
9219 profile.fts_flush_duration += fts_flush_start.elapsed();
9220 }
9221
9222 let conversation_row_start = Instant::now();
9223 let mut exact_append_tail_set = false;
9224 if used_append_tail_plan {
9225 if let (Some(last_message_idx), Some(last_message_created_at)) =
9226 (inserted_last_idx, inserted_last_created_at)
9227 {
9228 if append_tail_ended_at.is_none_or(|ended_at| ended_at <= last_message_created_at) {
9229 franken_set_conversation_tail_state_after_append(
9230 &tx,
9231 existing_id,
9232 last_message_created_at,
9233 last_message_idx,
9234 last_message_created_at,
9235 )?;
9236 exact_append_tail_set = true;
9237 } else {
9238 franken_update_conversation_tail_state(
9239 &tx,
9240 existing_id,
9241 Some(last_message_created_at),
9242 inserted_last_idx,
9243 inserted_last_created_at,
9244 )?;
9245 }
9246 }
9247 } else {
9248 let conv_last_ts = conv.messages.iter().filter_map(|m| m.created_at).max();
9249 franken_update_conversation_tail_state(
9250 &tx,
9251 existing_id,
9252 conv_last_ts,
9253 inserted_last_idx,
9254 inserted_last_created_at,
9255 )?;
9256 }
9257 franken_update_external_conversation_tail_after_append(
9258 &tx,
9259 agent_id,
9260 conv,
9261 used_append_tail_plan,
9262 exact_append_tail_set,
9263 inserted_last_idx,
9264 inserted_last_created_at,
9265 )?;
9266 profile.conversation_row_duration += conversation_row_start.elapsed();
9267
9268 if !defer_analytics_updates && !inserted_indices.is_empty() {
9269 let analytics_start = Instant::now();
9270 franken_update_daily_stats_in_tx(
9271 self,
9272 &tx,
9273 &conv.agent_slug,
9274 &conv.source_id,
9275 conversation_effective_started_at(conv),
9276 StatsDelta {
9277 session_count_delta: 0,
9278 message_count_delta: inserted_indices.len() as i64,
9279 total_chars_delta: new_chars,
9280 },
9281 )?;
9282 profile.analytics_duration += analytics_start.elapsed();
9283 }
9284
9285 let commit_start = Instant::now();
9286 tx.commit()?;
9287 profile.commit_duration += commit_start.elapsed();
9288 profile.invocations += 1;
9289 profile.messages += conv.messages.len();
9290 profile.inserted_messages += inserted_indices.len();
9291 profile.total_duration += total_start.elapsed();
9292
9293 Ok(InsertOutcome {
9294 conversation_id: existing_id,
9295 conversation_inserted: false,
9296 inserted_indices,
9297 })
9298 }
9299
9300 #[allow(clippy::too_many_arguments)]
9302 fn franken_append_messages_with_tail_in_tx(
9303 &self,
9304 tx: &FrankenTransaction<'_>,
9305 agent_id: i64,
9306 conversation_id: i64,
9307 conv: &Conversation,
9308 append_tail_state: Option<ExistingConversationTailState>,
9309 defer_lexical_updates: bool,
9310 defer_analytics_updates: bool,
9311 ) -> Result<InsertOutcome> {
9312 let append_tail_ended_at = append_tail_state.and_then(|state| state.ended_at);
9313 let append_plan = append_tail_state.as_ref().and_then(|state| {
9314 collect_append_only_tail_messages(
9315 conv,
9316 state.last_message_idx,
9317 state.last_message_created_at,
9318 )
9319 });
9320 let used_append_tail_plan = append_plan.is_some();
9321 let ExistingConversationNewMessages {
9322 messages: new_messages,
9323 new_chars,
9324 idx_collision_count,
9325 first_collision_idx,
9326 } = if let Some(append_plan) = append_plan {
9327 append_plan
9328 } else {
9329 let ExistingMessageLookup {
9330 by_idx: mut existing_messages,
9331 replay: mut existing_replay_fingerprints,
9332 } = franken_existing_message_lookup(tx, conversation_id, &conv.messages)?;
9333 collect_new_messages_for_existing_conversation(
9334 conversation_id,
9335 conv,
9336 &mut existing_messages,
9337 &mut existing_replay_fingerprints,
9338 "skipping replay-equivalent recovered message with shifted idx",
9339 )
9340 };
9341
9342 let mut inserted_indices = Vec::new();
9343 let mut fts_entries = Vec::new();
9344 let mut fts_pending_chars = 0usize;
9345 let mut _fts_inserted_total = 0usize;
9346 let (inserted_last_idx, inserted_last_created_at) =
9347 borrowed_messages_tail_state(&new_messages);
9348 let inserted_message_ids =
9349 franken_append_insert_new_messages(tx, conversation_id, &new_messages)?;
9350 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
9351 franken_insert_snippets(tx, msg_id, &msg.snippets)?;
9352 if !defer_lexical_updates {
9353 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
9354 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
9355 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
9356 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
9357 {
9358 flush_pending_fts_entries(
9359 self,
9360 tx,
9361 &mut fts_entries,
9362 &mut fts_pending_chars,
9363 &mut _fts_inserted_total,
9364 )?;
9365 }
9366 }
9367 inserted_indices.push(msg.idx);
9368 }
9369
9370 if idx_collision_count > 0 {
9371 tracing::warn!(
9372 conversation_id,
9373 collision_count = idx_collision_count,
9374 first_idx = first_collision_idx,
9375 source_path = %conv.source_path.display(),
9376 "message idx collisions encountered while appending to an existing conversation; retaining canonical message variants"
9377 );
9378 }
9379
9380 if !defer_lexical_updates {
9381 flush_pending_fts_entries(
9382 self,
9383 tx,
9384 &mut fts_entries,
9385 &mut fts_pending_chars,
9386 &mut _fts_inserted_total,
9387 )?;
9388 }
9389
9390 let mut exact_append_tail_set = false;
9391 if used_append_tail_plan {
9392 if let (Some(last_message_idx), Some(last_message_created_at)) =
9393 (inserted_last_idx, inserted_last_created_at)
9394 {
9395 if append_tail_ended_at.is_none_or(|ended_at| ended_at <= last_message_created_at) {
9396 franken_set_conversation_tail_state_after_append(
9397 tx,
9398 conversation_id,
9399 last_message_created_at,
9400 last_message_idx,
9401 last_message_created_at,
9402 )?;
9403 exact_append_tail_set = true;
9404 } else {
9405 franken_update_conversation_tail_state(
9406 tx,
9407 conversation_id,
9408 Some(last_message_created_at),
9409 inserted_last_idx,
9410 inserted_last_created_at,
9411 )?;
9412 }
9413 }
9414 } else {
9415 let conv_last_ts = conv.messages.iter().filter_map(|m| m.created_at).max();
9416 franken_update_conversation_tail_state(
9417 tx,
9418 conversation_id,
9419 conv_last_ts,
9420 inserted_last_idx,
9421 inserted_last_created_at,
9422 )?;
9423 }
9424 franken_update_external_conversation_tail_after_append(
9425 tx,
9426 agent_id,
9427 conv,
9428 used_append_tail_plan,
9429 exact_append_tail_set,
9430 inserted_last_idx,
9431 inserted_last_created_at,
9432 )?;
9433
9434 if !defer_analytics_updates && !inserted_indices.is_empty() {
9435 let message_count = inserted_indices.len() as i64;
9436 franken_update_daily_stats_in_tx(
9437 self,
9438 tx,
9439 &conv.agent_slug,
9440 &conv.source_id,
9441 conversation_effective_started_at(conv),
9442 StatsDelta {
9443 session_count_delta: 0,
9444 message_count_delta: message_count,
9445 total_chars_delta: new_chars,
9446 },
9447 )?;
9448 }
9449
9450 Ok(InsertOutcome {
9451 conversation_id,
9452 conversation_inserted: false,
9453 inserted_indices,
9454 })
9455 }
9456
9457 pub fn rebuild_fts(&self) -> Result<()> {
9459 self.rebuild_fts_via_frankensqlite().map(|_| ())
9460 }
9461
9462 pub(crate) fn ensure_search_fallback_fts_consistency(&self) -> Result<FtsConsistencyRepair> {
9467 self.ensure_fts_consistency_via_frankensqlite()
9468 }
9469
9470 pub(crate) fn fallback_fts_is_known_healthy_for_archive_fingerprint(
9471 &self,
9472 archive_fingerprint: &str,
9473 ) -> Result<bool> {
9474 Ok(
9475 self.read_fts_franken_rebuild_generation()? == Some(FTS_FRANKEN_REBUILD_GENERATION)
9476 && self
9477 .read_fts_franken_rebuild_archive_fingerprint()?
9478 .as_deref()
9479 == Some(archive_fingerprint),
9480 )
9481 }
9482
9483 pub(crate) fn record_search_fallback_fts_archive_fingerprint(
9484 &self,
9485 archive_fingerprint: &str,
9486 ) -> Result<()> {
9487 self.conn
9488 .execute_compat(
9489 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
9490 fparams![
9491 FTS_FRANKEN_REBUILD_FINGERPRINT_META_KEY,
9492 archive_fingerprint.to_string()
9493 ],
9494 )
9495 .with_context(|| "recording frankensqlite FTS archive fingerprint")?;
9496 Ok(())
9497 }
9498
9499 pub(crate) fn daily_stats_is_known_healthy_for_archive_fingerprint(
9500 &self,
9501 archive_fingerprint: &str,
9502 ) -> Result<bool> {
9503 Ok(
9504 self.read_daily_stats_health_generation()? == Some(DAILY_STATS_HEALTH_GENERATION)
9505 && self.read_daily_stats_archive_fingerprint()?.as_deref()
9506 == Some(archive_fingerprint),
9507 )
9508 }
9509
9510 pub(crate) fn record_daily_stats_archive_fingerprint(
9511 &self,
9512 archive_fingerprint: &str,
9513 ) -> Result<()> {
9514 self.conn
9515 .execute_compat(
9516 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
9517 fparams![
9518 DAILY_STATS_HEALTH_GENERATION_META_KEY,
9519 DAILY_STATS_HEALTH_GENERATION.to_string()
9520 ],
9521 )
9522 .with_context(|| "recording daily_stats health generation")?;
9523 self.conn
9524 .execute_compat(
9525 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
9526 fparams![DAILY_STATS_HEALTH_META_KEY, archive_fingerprint.to_string()],
9527 )
9528 .with_context(|| "recording daily_stats archive fingerprint")?;
9529 Ok(())
9530 }
9531
9532 fn read_fts_franken_rebuild_generation(&self) -> Result<Option<i64>> {
9533 let value: Option<String> = self
9534 .conn
9535 .query_row_map(
9536 "SELECT value FROM meta WHERE key = ?1",
9537 fparams![FTS_FRANKEN_REBUILD_META_KEY],
9538 |row| row.get_typed(0),
9539 )
9540 .optional()?;
9541 Ok(value.and_then(|v| v.parse::<i64>().ok()))
9542 }
9543
9544 fn read_fts_franken_rebuild_archive_fingerprint(&self) -> Result<Option<String>> {
9545 Ok(self
9546 .conn
9547 .query_row_map(
9548 "SELECT value FROM meta WHERE key = ?1",
9549 fparams![FTS_FRANKEN_REBUILD_FINGERPRINT_META_KEY],
9550 |row| row.get_typed(0),
9551 )
9552 .optional()?)
9553 }
9554
9555 fn read_daily_stats_health_generation(&self) -> Result<Option<i64>> {
9556 let value: Option<String> = self
9557 .conn
9558 .query_row_map(
9559 "SELECT value FROM meta WHERE key = ?1",
9560 fparams![DAILY_STATS_HEALTH_GENERATION_META_KEY],
9561 |row| row.get_typed(0),
9562 )
9563 .optional()?;
9564 Ok(value.and_then(|value| value.parse::<i64>().ok()))
9565 }
9566
9567 fn read_daily_stats_archive_fingerprint(&self) -> Result<Option<String>> {
9568 Ok(self
9569 .conn
9570 .query_row_map(
9571 "SELECT value FROM meta WHERE key = ?1",
9572 fparams![DAILY_STATS_HEALTH_META_KEY],
9573 |row| row.get_typed(0),
9574 )
9575 .optional()?)
9576 }
9577
9578 fn record_fts_franken_rebuild_generation(&self) -> Result<()> {
9579 self.conn
9580 .execute_compat(
9581 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
9582 fparams![
9583 FTS_FRANKEN_REBUILD_META_KEY,
9584 FTS_FRANKEN_REBUILD_GENERATION.to_string()
9585 ],
9586 )
9587 .with_context(|| "recording frankensqlite FTS rebuild generation")?;
9588 Ok(())
9589 }
9590
9591 fn ensure_fts_consistency_via_frankensqlite(&self) -> Result<FtsConsistencyRepair> {
9592 if self.read_fts_franken_rebuild_generation()? != Some(FTS_FRANKEN_REBUILD_GENERATION) {
9593 let fts_already_healthy = (|| -> Result<bool> {
9598 let fts_exists: i64 = self.conn.query_row_map(
9599 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
9600 fparams![],
9601 |row| row.get_typed(0),
9602 )?;
9603 if fts_exists != 1 {
9604 return Ok(false);
9605 }
9606 let total: i64 = self.conn.query_row_map(
9607 "SELECT COUNT(*) FROM messages",
9608 fparams![],
9609 |row| row.get_typed(0),
9610 )?;
9611 if total == 0 {
9612 return Ok(false);
9613 }
9614 let indexed: i64 = self.conn.query_row_map(
9615 "SELECT COUNT(*) FROM fts_messages",
9616 fparams![],
9617 |row| row.get_typed(0),
9618 )?;
9619 Ok(indexed > 0 && indexed * 100 >= total * 90)
9621 })()
9622 .unwrap_or(false);
9623
9624 if fts_already_healthy {
9625 tracing::info!(
9626 target: "cass::fts_rebuild",
9627 "FTS already populated and consistent; setting generation marker without rebuild"
9628 );
9629 self.record_fts_franken_rebuild_generation()?;
9630 self.set_fts_messages_present_cache(true);
9631 } else {
9632 let inserted_rows = self.rebuild_fts_via_frankensqlite()?;
9633 self.record_fts_franken_rebuild_generation()?;
9634 return Ok(FtsConsistencyRepair::Rebuilt { inserted_rows });
9635 }
9636 }
9637
9638 let inspection = (|| -> Result<(i64, bool)> {
9639 let fts_schema_rows = self.conn.query_row_map(
9640 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
9641 fparams![],
9642 |row| row.get_typed::<i64>(0),
9643 )?;
9644 let fts_queryable = fts_schema_rows == 1
9645 && self.conn.query("SELECT COUNT(*) FROM fts_messages").is_ok();
9646 Ok((fts_schema_rows, fts_queryable))
9647 })();
9648
9649 let (fts_schema_rows, fts_queryable) = match inspection {
9650 Ok(result) => result,
9651 Err(err) => {
9652 tracing::warn!(
9653 error = %err,
9654 "frankensqlite FTS consistency probe failed; rebuilding authoritative FTS"
9655 );
9656 let inserted_rows = self.rebuild_fts_via_frankensqlite()?;
9657 self.record_fts_franken_rebuild_generation()?;
9658 return Ok(FtsConsistencyRepair::Rebuilt { inserted_rows });
9659 }
9660 };
9661
9662 if fts_schema_rows != 1 || !fts_queryable {
9663 let inserted_rows = self.rebuild_fts_via_frankensqlite()?;
9664 self.record_fts_franken_rebuild_generation()?;
9665 return Ok(FtsConsistencyRepair::Rebuilt { inserted_rows });
9666 }
9667
9668 let total_messages =
9669 self.conn
9670 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
9671 row.get_typed::<i64>(0)
9672 })?;
9673 let indexed_messages =
9674 self.conn
9675 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
9676 row.get_typed::<i64>(0)
9677 })?;
9678
9679 if indexed_messages == total_messages {
9680 self.set_fts_messages_present_cache(true);
9681 return Ok(FtsConsistencyRepair::AlreadyHealthy {
9682 rows: usize::try_from(total_messages.max(0)).unwrap_or(usize::MAX),
9683 });
9684 }
9685
9686 if indexed_messages > total_messages {
9687 let inserted_rows = self.rebuild_fts_via_frankensqlite()?;
9688 self.record_fts_franken_rebuild_generation()?;
9689 return Ok(FtsConsistencyRepair::Rebuilt { inserted_rows });
9690 }
9691
9692 let inserted_rows = self
9693 .stream_fts_rows_via_frankensqlite(true)
9694 .with_context(|| "incrementally repairing missing FTS rows via frankensqlite")?;
9695 let repaired_rows =
9696 self.conn
9697 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
9698 row.get_typed::<i64>(0)
9699 })?;
9700 if repaired_rows == total_messages {
9701 self.set_fts_messages_present_cache(true);
9702 return Ok(FtsConsistencyRepair::IncrementalCatchUp {
9703 inserted_rows,
9704 total_rows: usize::try_from(repaired_rows.max(0)).unwrap_or(usize::MAX),
9705 });
9706 }
9707
9708 if inserted_rows == 0 {
9716 tracing::debug!(
9717 target: "cass::fts_rebuild",
9718 indexed_messages = repaired_rows,
9719 total_messages,
9720 un_indexable_gap = total_messages.saturating_sub(repaired_rows),
9721 "FTS catch-up inserted 0 rows; remaining gap is un-indexable (likely orphaned messages with dangling conversation_id)"
9722 );
9723 self.set_fts_messages_present_cache(true);
9724 return Ok(FtsConsistencyRepair::IncrementalCatchUp {
9725 inserted_rows: 0,
9726 total_rows: usize::try_from(repaired_rows.max(0)).unwrap_or(usize::MAX),
9727 });
9728 }
9729
9730 let inserted_rows = self.rebuild_fts_via_frankensqlite()?;
9733 self.record_fts_franken_rebuild_generation()?;
9734 Ok(FtsConsistencyRepair::Rebuilt { inserted_rows })
9735 }
9736
9737 pub(crate) fn rebuild_fts_via_frankensqlite(&self) -> Result<usize> {
9738 self.invalidate_fts_messages_present_cache();
9739 self.conn
9740 .execute("DROP TABLE IF EXISTS fts_messages;")
9741 .with_context(|| "dropping derived fts_messages before frankensqlite rebuild")?;
9742 self.conn
9743 .execute_compat(FTS5_REGISTER_SQL, fparams![])
9744 .with_context(|| "creating derived fts_messages via frankensqlite rebuild")?;
9745 self.set_fts_messages_present_cache(true);
9746
9747 self.stream_fts_rows_via_frankensqlite(false)
9748 }
9749
9750 fn stream_fts_rows_via_frankensqlite(&self, missing_only: bool) -> Result<usize> {
9751 let batch_size = fts_rebuild_batch_size().max(1);
9752 let batch_limit = i64::try_from(batch_size).unwrap_or(i64::MAX);
9753 let mut total_inserted: usize = 0;
9754 let mut total_skipped_orphans: usize = 0;
9755 let mut total_skipped_existing: usize = 0;
9756 let mut last_rowid: i64 = 0;
9757 let conversation_by_id = self.load_fts_conversation_projection_map()?;
9758 let agent_slug_by_id = self.load_fts_agent_slug_map()?;
9759 let workspace_path_by_id = self.load_fts_workspace_path_map()?;
9760 let existing_fts_rowids = if missing_only {
9761 Some(self.load_fts_message_rowid_set()?)
9762 } else {
9763 None
9764 };
9765 let mut entries = Vec::new();
9766 let mut pending_chars = 0usize;
9767
9768 loop {
9769 let rows = self.fetch_fts_rebuild_message_rows(last_rowid, batch_limit)?;
9770 let fetched_count = rows.len();
9771 if fetched_count == 0 {
9772 break;
9773 }
9774
9775 let inserted_before_batch = total_inserted;
9776 let skipped_before_batch = total_skipped_orphans;
9777 let existing_before_batch = total_skipped_existing;
9778
9779 for row in rows {
9780 last_rowid = row.rowid;
9781 if existing_fts_rowids
9782 .as_ref()
9783 .is_some_and(|rowids| rowids.contains(&row.message_id))
9784 {
9785 total_skipped_existing = total_skipped_existing.saturating_add(1);
9786 continue;
9787 }
9788 let Some(conversation) = conversation_by_id.get(&row.conversation_id) else {
9789 total_skipped_orphans = total_skipped_orphans.saturating_add(1);
9790 continue;
9791 };
9792 let agent = conversation
9793 .agent_id
9794 .and_then(|agent_id| agent_slug_by_id.get(&agent_id))
9795 .filter(|slug| !slug.is_empty())
9796 .cloned()
9797 .unwrap_or_else(|| "unknown".to_string());
9798 let workspace = conversation
9799 .workspace_id
9800 .and_then(|workspace_id| workspace_path_by_id.get(&workspace_id))
9801 .cloned()
9802 .unwrap_or_default();
9803 pending_chars = pending_chars.saturating_add(row.content.len());
9804 entries.push(FtsEntry {
9805 content: row.content,
9806 title: conversation.title.clone(),
9807 agent,
9808 workspace,
9809 source_path: conversation.source_path.clone(),
9810 created_at: row.created_at,
9811 message_id: row.message_id,
9812 });
9813 if entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
9814 || pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
9815 {
9816 total_inserted = total_inserted.saturating_add(
9817 franken_batch_insert_fts_on_connection(&self.conn, &entries)?,
9818 );
9819 entries.clear();
9820 pending_chars = 0;
9821 }
9822 }
9823
9824 if !entries.is_empty() {
9825 total_inserted = total_inserted.saturating_add(
9826 franken_batch_insert_fts_on_connection(&self.conn, &entries)?,
9827 );
9828 entries.clear();
9829 pending_chars = 0;
9830 }
9831
9832 tracing::debug!(
9833 target: "cass::fts_rebuild",
9834 batch_rows = fetched_count,
9835 batch_inserted = total_inserted.saturating_sub(inserted_before_batch),
9836 batch_skipped_orphans = total_skipped_orphans.saturating_sub(skipped_before_batch),
9837 batch_skipped_existing = total_skipped_existing.saturating_sub(existing_before_batch),
9838 total_inserted,
9839 total_skipped_orphans,
9840 total_skipped_existing,
9841 last_rowid,
9842 missing_only,
9843 "FTS streaming maintenance batch complete"
9844 );
9845
9846 if fetched_count < batch_size {
9847 break;
9848 }
9849 }
9850
9851 Ok(total_inserted)
9852 }
9853
9854 fn fetch_fts_rebuild_message_rows(
9855 &self,
9856 last_rowid: i64,
9857 batch_limit: i64,
9858 ) -> Result<Vec<FtsRebuildMessageRow>> {
9859 self.conn
9860 .query_map_collect(
9861 "SELECT m.rowid, m.id, m.conversation_id, m.content, m.created_at
9862 FROM messages m
9863 WHERE m.rowid > ?1
9864 ORDER BY m.rowid
9865 LIMIT ?2",
9866 fparams![last_rowid, batch_limit],
9867 |row| {
9868 Ok(FtsRebuildMessageRow {
9869 rowid: row.get_typed(0)?,
9870 message_id: row.get_typed(1)?,
9871 conversation_id: row.get_typed(2)?,
9872 content: row.get_typed::<Option<String>>(3)?.unwrap_or_default(),
9873 created_at: row.get_typed(4)?,
9874 })
9875 },
9876 )
9877 .with_context(|| format!("fetching FTS maintenance messages after rowid {last_rowid}"))
9878 }
9879
9880 fn load_fts_message_rowid_set(&self) -> Result<HashSet<i64>> {
9881 let rows: Vec<i64> = self
9882 .conn
9883 .query_map_collect("SELECT rowid FROM fts_messages", fparams![], |row| {
9884 row.get_typed(0)
9885 })
9886 .with_context(|| "loading existing FTS message rowids")?;
9887 Ok(rows.into_iter().collect())
9888 }
9889
9890 fn load_fts_conversation_projection_map(
9891 &self,
9892 ) -> Result<HashMap<i64, FtsConversationProjection>> {
9893 let rows: Vec<(i64, FtsConversationProjection)> = self
9894 .conn
9895 .query_map_collect(
9896 "SELECT id, title, agent_id, workspace_id, source_path
9897 FROM conversations",
9898 fparams![],
9899 |row| {
9900 Ok((
9901 row.get_typed(0)?,
9902 FtsConversationProjection {
9903 title: row.get_typed::<Option<String>>(1)?.unwrap_or_default(),
9904 agent_id: row.get_typed(2)?,
9905 workspace_id: row.get_typed(3)?,
9906 source_path: row.get_typed::<Option<String>>(4)?.unwrap_or_default(),
9907 },
9908 ))
9909 },
9910 )
9911 .with_context(|| "loading FTS conversation projection map")?;
9912 Ok(rows.into_iter().collect())
9913 }
9914
9915 fn load_fts_agent_slug_map(&self) -> Result<HashMap<i64, String>> {
9916 let rows: Vec<(i64, String)> = self
9917 .conn
9918 .query_map_collect("SELECT id, slug FROM agents", fparams![], |row| {
9919 Ok((
9920 row.get_typed(0)?,
9921 row.get_typed::<Option<String>>(1)?
9922 .unwrap_or_else(|| "unknown".to_string()),
9923 ))
9924 })
9925 .with_context(|| "loading FTS agent slug map")?;
9926 Ok(rows.into_iter().collect())
9927 }
9928
9929 fn load_fts_workspace_path_map(&self) -> Result<HashMap<i64, String>> {
9930 let rows: Vec<(i64, String)> = self
9931 .conn
9932 .query_map_collect("SELECT id, path FROM workspaces", fparams![], |row| {
9933 Ok((
9934 row.get_typed(0)?,
9935 row.get_typed::<Option<String>>(1)?.unwrap_or_default(),
9936 ))
9937 })
9938 .with_context(|| "loading FTS workspace path map")?;
9939 Ok(rows.into_iter().collect())
9940 }
9941
9942 pub fn fetch_messages_for_embedding(&self) -> Result<Vec<MessageForEmbedding>> {
9944 self.conn
9949 .query_map_collect(
9950 "SELECT m.id, m.created_at, COALESCE(c.agent_id, 0), c.workspace_id, c.source_id, m.role, m.content
9951 FROM messages m
9952 JOIN conversations c ON m.conversation_id = c.id
9953 ORDER BY m.id",
9954 fparams![],
9955 |row| {
9956 let source_id: String = row.get_typed::<Option<String>>(4)?
9957 .unwrap_or_else(|| "local".to_string());
9958 Ok(MessageForEmbedding {
9959 message_id: row.get_typed(0)?,
9960 created_at: row.get_typed(1)?,
9961 agent_id: row.get_typed(2)?,
9962 workspace_id: row.get_typed(3)?,
9963 source_id_hash: crc32fast::hash(source_id.as_bytes()),
9964 role: row.get_typed(5)?,
9965 content: row.get_typed(6)?,
9966 })
9967 },
9968 )
9969 .with_context(|| "fetching messages for embedding")
9970 }
9971
9972 pub fn get_last_embedded_message_id(&self) -> Result<Option<i64>> {
9974 let result: Result<String, _> = self.conn.query_row_map(
9975 "SELECT value FROM meta WHERE key = 'last_embedded_message_id'",
9976 fparams![],
9977 |row| row.get_typed(0),
9978 );
9979 match result.optional() {
9980 Ok(Some(s)) => Ok(s.parse().ok()),
9981 Ok(None) => Ok(None),
9982 Err(e) => Err(e.into()),
9983 }
9984 }
9985
9986 pub fn set_last_embedded_message_id(&self, id: i64) -> Result<()> {
9988 self.conn.execute_compat(
9989 "INSERT OR REPLACE INTO meta(key, value) VALUES('last_embedded_message_id', ?1)",
9990 fparams![id.to_string()],
9991 )?;
9992 Ok(())
9993 }
9994
9995 pub fn get_embedding_jobs(&self, db_path: &str) -> Result<Vec<EmbeddingJobRow>> {
9997 self.conn
9998 .query_map_collect(
9999 "SELECT id, db_path, model_id, status, total_docs, completed_docs, error_message, created_at, started_at, completed_at
10000 FROM embedding_jobs WHERE db_path = ?1 ORDER BY id DESC",
10001 fparams![db_path],
10002 |row| {
10003 Ok(EmbeddingJobRow {
10004 id: row.get_typed(0)?,
10005 db_path: row.get_typed(1)?,
10006 model_id: row.get_typed(2)?,
10007 status: row.get_typed(3)?,
10008 total_docs: row.get_typed(4)?,
10009 completed_docs: row.get_typed(5)?,
10010 error_message: row.get_typed(6)?,
10011 created_at: row.get_typed(7)?,
10012 started_at: row.get_typed(8)?,
10013 completed_at: row.get_typed(9)?,
10014 })
10015 },
10016 )
10017 .with_context(|| format!("fetching embedding jobs for {db_path}"))
10018 }
10019
10020 pub fn upsert_embedding_job(
10022 &self,
10023 db_path: &str,
10024 model_id: &str,
10025 total_docs: i64,
10026 ) -> Result<i64> {
10027 let updated = self.conn.execute_compat(
10028 "UPDATE embedding_jobs
10029 SET total_docs = ?3
10030 WHERE db_path = ?1 AND model_id = ?2 AND status IN ('pending', 'running')",
10031 fparams![db_path, model_id, total_docs],
10032 )?;
10033 if updated == 0 {
10034 let insert_result = self.conn.execute_compat(
10035 "INSERT INTO embedding_jobs(db_path, model_id, total_docs) VALUES(?1,?2,?3)",
10036 fparams![db_path, model_id, total_docs],
10037 );
10038 if let Err(err) = insert_result {
10039 if !matches!(err, frankensqlite::FrankenError::UniqueViolation { .. }) {
10040 return Err(err.into());
10041 }
10042 self.conn.execute_compat(
10043 "UPDATE embedding_jobs
10044 SET total_docs = ?3
10045 WHERE db_path = ?1 AND model_id = ?2 AND status IN ('pending', 'running')",
10046 fparams![db_path, model_id, total_docs],
10047 )?;
10048 }
10049 }
10050 self.conn
10051 .query_row_map(
10052 "SELECT id FROM embedding_jobs
10053 WHERE db_path = ?1 AND model_id = ?2 AND status IN ('pending', 'running')
10054 ORDER BY id DESC
10055 LIMIT 1",
10056 fparams![db_path, model_id],
10057 |row| row.get_typed(0),
10058 )
10059 .with_context(|| "resolving embedding job id after upsert")
10060 }
10061
10062 pub fn start_embedding_job(&self, job_id: i64) -> Result<()> {
10064 self.conn.execute_compat(
10065 "UPDATE embedding_jobs SET status = 'running', started_at = datetime('now') WHERE id = ?1",
10066 fparams![job_id],
10067 )?;
10068 Ok(())
10069 }
10070
10071 pub fn complete_embedding_job(&self, job_id: i64) -> Result<()> {
10073 self.conn.execute_compat(
10074 "UPDATE embedding_jobs SET status = 'completed', completed_at = datetime('now') WHERE id = ?1",
10075 fparams![job_id],
10076 )?;
10077 Ok(())
10078 }
10079
10080 pub fn fail_embedding_job(&self, job_id: i64, error: &str) -> Result<()> {
10082 self.conn.execute_compat(
10083 "UPDATE embedding_jobs SET status = 'failed', error_message = ?2, completed_at = datetime('now') WHERE id = ?1",
10084 fparams![job_id, error],
10085 )?;
10086 Ok(())
10087 }
10088
10089 pub fn cancel_embedding_jobs(&self, db_path: &str, model_id: Option<&str>) -> Result<usize> {
10091 if let Some(mid) = model_id {
10092 Ok(self.conn.execute_compat(
10093 "UPDATE embedding_jobs SET status = 'cancelled' WHERE db_path = ?1 AND model_id = ?2 AND status IN ('pending', 'running')",
10094 fparams![db_path, mid],
10095 )?)
10096 } else {
10097 Ok(self.conn.execute_compat(
10098 "UPDATE embedding_jobs SET status = 'cancelled' WHERE db_path = ?1 AND status IN ('pending', 'running')",
10099 fparams![db_path],
10100 )?)
10101 }
10102 }
10103
10104 pub fn update_job_progress(&self, job_id: i64, completed_docs: i64) -> Result<()> {
10106 self.conn.execute_compat(
10107 "UPDATE embedding_jobs SET completed_docs = ?2 WHERE id = ?1",
10108 fparams![job_id, completed_docs],
10109 )?;
10110 Ok(())
10111 }
10112
10113 pub fn count_sessions_in_range(
10122 &self,
10123 start_ts_ms: Option<i64>,
10124 end_ts_ms: Option<i64>,
10125 agent_slug: Option<&str>,
10126 source_id: Option<&str>,
10127 ) -> Result<(i64, bool)> {
10128 let agent = agent_slug.unwrap_or("all");
10129 let source = source_id.unwrap_or("all");
10130
10131 let stats_count: i64 = self
10133 .conn
10134 .query_row_map("SELECT COUNT(*) FROM daily_stats", fparams![], |row| {
10135 row.get_typed(0)
10136 })
10137 .unwrap_or(0);
10138
10139 if stats_count == 0 {
10140 return self.count_sessions_direct(start_ts_ms, end_ts_ms, agent_slug, source_id);
10141 }
10142
10143 let start_day = start_ts_ms.map(Self::day_id_from_millis);
10145 let end_day = end_ts_ms.map(Self::day_id_from_millis);
10146
10147 let count: i64 = match (start_day, end_day) {
10148 (Some(start), Some(end)) => self.conn.query_row_map(
10149 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats
10150 WHERE day_id BETWEEN ?1 AND ?2 AND agent_slug = ?3 AND source_id = ?4",
10151 fparams![start, end, agent, source],
10152 |row| row.get_typed(0),
10153 )?,
10154 (Some(start), None) => self.conn.query_row_map(
10155 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats
10156 WHERE day_id >= ?1 AND agent_slug = ?2 AND source_id = ?3",
10157 fparams![start, agent, source],
10158 |row| row.get_typed(0),
10159 )?,
10160 (None, Some(end)) => self.conn.query_row_map(
10161 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats
10162 WHERE day_id <= ?1 AND agent_slug = ?2 AND source_id = ?3",
10163 fparams![end, agent, source],
10164 |row| row.get_typed(0),
10165 )?,
10166 (None, None) => self.conn.query_row_map(
10167 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats
10168 WHERE agent_slug = ?1 AND source_id = ?2",
10169 fparams![agent, source],
10170 |row| row.get_typed(0),
10171 )?,
10172 };
10173
10174 Ok((count, true))
10175 }
10176
10177 fn count_sessions_direct(
10179 &self,
10180 start_ts_ms: Option<i64>,
10181 end_ts_ms: Option<i64>,
10182 agent_slug: Option<&str>,
10183 source_id: Option<&str>,
10184 ) -> Result<(i64, bool)> {
10185 let mut sql = "SELECT COUNT(*) FROM conversations c WHERE 1=1".to_string();
10192 let mut param_values: Vec<ParamValue> = Vec::new();
10193 let mut idx = 1;
10194
10195 if let Some(start) = start_ts_ms {
10196 sql.push_str(&format!(" AND c.started_at >= ?{idx}"));
10197 param_values.push(ParamValue::from(start));
10198 idx += 1;
10199 }
10200 if let Some(end) = end_ts_ms {
10201 sql.push_str(&format!(" AND c.started_at <= ?{idx}"));
10202 param_values.push(ParamValue::from(end));
10203 idx += 1;
10204 }
10205 if let Some(agent) = agent_slug
10206 && agent != "all"
10207 {
10208 sql.push_str(&format!(
10209 " AND EXISTS (SELECT 1 FROM agents a WHERE a.id = c.agent_id AND a.slug = ?{idx})"
10210 ));
10211 param_values.push(ParamValue::from(agent));
10212 idx += 1;
10213 }
10214 if let Some(source) = source_id
10215 && source != "all"
10216 {
10217 sql.push_str(&format!(" AND c.source_id = ?{idx}"));
10218 param_values.push(ParamValue::from(source));
10219 let _ = idx; }
10221
10222 let count: i64 = self
10223 .conn
10224 .query_row_map(&sql, ¶m_values, |row| row.get_typed(0))?;
10225 Ok((count, false))
10226 }
10227
10228 pub fn get_daily_histogram(
10230 &self,
10231 start_ts_ms: i64,
10232 end_ts_ms: i64,
10233 agent_slug: Option<&str>,
10234 source_id: Option<&str>,
10235 ) -> Result<Vec<DailyCount>> {
10236 let start_day = Self::day_id_from_millis(start_ts_ms);
10237 let end_day = Self::day_id_from_millis(end_ts_ms);
10238 let agent = agent_slug.unwrap_or("all");
10239 let source = source_id.unwrap_or("all");
10240
10241 let rows = self.conn.query_map_collect(
10242 "SELECT day_id, session_count, message_count, total_chars
10243 FROM daily_stats
10244 WHERE day_id BETWEEN ?1 AND ?2 AND agent_slug = ?3 AND source_id = ?4
10245 ORDER BY day_id",
10246 fparams![start_day, end_day, agent, source],
10247 |row| {
10248 Ok(DailyCount {
10249 day_id: row.get_typed(0)?,
10250 sessions: row.get_typed(1)?,
10251 messages: row.get_typed(2)?,
10252 chars: row.get_typed(3)?,
10253 })
10254 },
10255 )?;
10256
10257 Ok(rows)
10258 }
10259
10260 pub fn daily_stats_health(&self) -> Result<DailyStatsHealth> {
10262 let row_count: i64 =
10263 self.conn
10264 .query_row_map("SELECT COUNT(*) FROM daily_stats", fparams![], |row| {
10265 row.get_typed(0)
10266 })?;
10267
10268 let oldest_update: Option<i64> = self.conn.query_row_map(
10269 "SELECT MIN(last_updated) FROM daily_stats",
10270 fparams![],
10271 |row| row.get_typed(0),
10272 )?;
10273
10274 let conversation_count: i64 =
10275 self.conn
10276 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
10277 row.get_typed(0)
10278 })?;
10279
10280 let materialized_total: i64 = self.conn.query_row_map(
10281 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats
10282 WHERE agent_slug = 'all' AND source_id = 'all'",
10283 fparams![],
10284 |row| row.get_typed(0),
10285 )?;
10286
10287 Ok(DailyStatsHealth {
10288 populated: row_count > 0,
10289 row_count,
10290 oldest_update_ms: oldest_update,
10291 conversation_count,
10292 materialized_total,
10293 drift: (conversation_count - materialized_total).abs(),
10294 })
10295 }
10296
10297 pub fn insert_conversations_batched(
10301 &self,
10302 conversations: &[(i64, Option<i64>, &Conversation)],
10303 ) -> Result<Vec<InsertOutcome>> {
10304 if conversations.is_empty() {
10305 return Ok(Vec::new());
10306 }
10307
10308 self.ensure_sources_for_batch(conversations)?;
10309
10310 let defer_lexical_updates = defer_storage_lexical_updates_enabled();
10311 let defer_analytics_updates = defer_analytics_updates_enabled();
10312
10313 let pricing_table = PricingTable::franken_load(&self.conn).unwrap_or_else(|e| {
10314 tracing::warn!(target: "cass::analytics::pricing", error = %e, "failed to load pricing table");
10315 PricingTable { entries: Vec::new() }
10316 });
10317 let mut pricing_diag = PricingDiagnostics::default();
10318
10319 let mut tx = self.conn.transaction()?;
10320
10321 ensure_agents_in_tx(&tx, conversations)?;
10328 ensure_workspaces_in_tx(&tx, conversations)?;
10329 ensure_sources_in_tx(&tx, conversations)?;
10330
10331 let mut outcomes = Vec::with_capacity(conversations.len());
10332 let mut fts_entries = Vec::new();
10333 let mut fts_pending_chars = 0usize;
10334 let mut fts_inserted_total = 0usize;
10335 let mut fts_count_total = 0usize;
10336 let mut stats = StatsAggregator::new();
10337 let mut token_stats = TokenStatsAggregator::new();
10338 let mut token_entries: Vec<TokenUsageEntry> = Vec::new();
10339 let mut metrics_entries: Vec<MessageMetricsEntry> = Vec::new();
10340 let mut rollup_agg = AnalyticsRollupAggregator::new();
10341 let mut conv_ids_to_summarize: Vec<i64> = Vec::new();
10342 let mut pending_conversation_ids: HashMap<PendingConversationKey, i64> = HashMap::new();
10343 let mut pending_message_fingerprints: HashMap<i64, HashMap<i64, MessageMergeFingerprint>> =
10344 HashMap::new();
10345 let mut pending_message_replay_fingerprints: HashMap<
10346 i64,
10347 HashSet<MessageReplayFingerprint>,
10348 > = HashMap::new();
10349
10350 for &(agent_id, workspace_id, raw_conv) in conversations {
10351 let normalized_conv = normalized_conversation_for_storage(raw_conv);
10352 let conv = normalized_conv.as_ref();
10353 let mut total_chars: i64 = 0;
10354 let mut inserted_indices = Vec::with_capacity(conv.messages.len());
10355 let mut inserted_messages: Vec<(i64, &Message)> =
10356 Vec::with_capacity(conv.messages.len());
10357 let mut session_count_delta = 1_i64;
10358 let conversation_key = conversation_merge_key(agent_id, conv);
10359
10360 let existing_conv_id = if let Some(existing_id) =
10361 pending_conversation_ids.get(&conversation_key)
10362 {
10363 Some(*existing_id)
10364 } else {
10365 let existing_id =
10366 franken_find_existing_conversation_by_key(&tx, &conversation_key, Some(conv))?;
10367 if let Some(existing_id) = existing_id {
10368 pending_conversation_ids.insert(conversation_key.clone(), existing_id);
10369 }
10370 existing_id
10371 };
10372
10373 let conv_id = if let Some(existing_id) = existing_conv_id {
10374 session_count_delta = 0;
10375 let ExistingMessageLookup {
10376 by_idx: mut existing_messages,
10377 replay: mut existing_replay_fingerprints,
10378 } = franken_existing_message_lookup_with_pending(
10379 &tx,
10380 existing_id,
10381 &conv.messages,
10382 &mut pending_message_fingerprints,
10383 &mut pending_message_replay_fingerprints,
10384 )?;
10385 let ExistingConversationNewMessages {
10386 messages: new_messages,
10387 new_chars,
10388 idx_collision_count,
10389 first_collision_idx,
10390 } = collect_new_messages_for_existing_conversation(
10391 existing_id,
10392 conv,
10393 &mut existing_messages,
10394 &mut existing_replay_fingerprints,
10395 "skipping replay-equivalent recovered message with shifted idx during batched merge",
10396 );
10397 let (inserted_last_idx, inserted_last_created_at) =
10398 borrowed_messages_tail_state(&new_messages);
10399 let inserted_message_ids =
10400 franken_append_insert_new_messages(&tx, existing_id, &new_messages)?;
10401 total_chars += new_chars;
10402 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
10403 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
10404 if !defer_lexical_updates {
10405 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
10406 fts_count_total += 1;
10407 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
10408 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
10409 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
10410 {
10411 flush_pending_fts_entries(
10412 self,
10413 &tx,
10414 &mut fts_entries,
10415 &mut fts_pending_chars,
10416 &mut fts_inserted_total,
10417 )?;
10418 }
10419 }
10420 inserted_indices.push(msg.idx);
10421 inserted_messages.push((msg_id, msg));
10422 }
10423
10424 if idx_collision_count > 0 {
10425 tracing::warn!(
10426 conversation_id = existing_id,
10427 collision_count = idx_collision_count,
10428 first_idx = first_collision_idx,
10429 source_path = %conv.source_path.display(),
10430 "message idx collisions encountered during batched conversation merge; retaining canonical message variants"
10431 );
10432 }
10433
10434 let conv_last_ts = conv.messages.iter().filter_map(|m| m.created_at).max();
10435 franken_update_conversation_tail_state(
10436 &tx,
10437 existing_id,
10438 conv_last_ts,
10439 inserted_last_idx,
10440 inserted_last_created_at,
10441 )?;
10442 if let Some(lookup_key) = conversation_external_lookup_key_for_conv(agent_id, conv)
10443 {
10444 franken_update_external_conversation_tail_lookup_key(
10445 &tx,
10446 &lookup_key,
10447 conv_last_ts,
10448 inserted_last_idx,
10449 inserted_last_created_at,
10450 )?;
10451 }
10452
10453 pending_message_fingerprints.insert(existing_id, existing_messages);
10454 pending_message_replay_fingerprints
10455 .insert(existing_id, existing_replay_fingerprints);
10456
10457 existing_id
10458 } else {
10459 match franken_insert_conversation_or_get_existing(
10460 &tx,
10461 agent_id,
10462 workspace_id,
10463 conv,
10464 )? {
10465 ConversationInsertStatus::Inserted(new_conv_id) => {
10466 pending_conversation_ids.insert(conversation_key.clone(), new_conv_id);
10467 let pending_messages =
10468 pending_message_fingerprints.entry(new_conv_id).or_default();
10469 let pending_replay_fingerprints = pending_message_replay_fingerprints
10470 .entry(new_conv_id)
10471 .or_default();
10472 let mut new_messages = Vec::new();
10473 for msg in &conv.messages {
10474 let incoming_replay = message_replay_fingerprint(msg);
10475 if pending_messages.contains_key(&msg.idx)
10476 || pending_replay_fingerprints.contains(&incoming_replay)
10477 {
10478 continue;
10479 }
10480 pending_messages.insert(msg.idx, message_merge_fingerprint(msg));
10481 pending_replay_fingerprints.insert(incoming_replay);
10482 new_messages.push(msg);
10483 }
10484 let inserted_message_ids =
10485 franken_batch_insert_new_messages(&tx, new_conv_id, &new_messages)?;
10486 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
10487 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
10488 if !defer_lexical_updates {
10489 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
10490 fts_count_total += 1;
10491 fts_pending_chars =
10492 fts_pending_chars.saturating_add(msg.content.len());
10493 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
10494 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
10495 {
10496 flush_pending_fts_entries(
10497 self,
10498 &tx,
10499 &mut fts_entries,
10500 &mut fts_pending_chars,
10501 &mut fts_inserted_total,
10502 )?;
10503 }
10504 }
10505 total_chars += msg.content.len() as i64;
10506 inserted_indices.push(msg.idx);
10507 inserted_messages.push((msg_id, msg));
10508 }
10509 new_conv_id
10510 }
10511 ConversationInsertStatus::Existing(existing_id) => {
10512 session_count_delta = 0;
10513 pending_conversation_ids.insert(conversation_key.clone(), existing_id);
10514 let ExistingMessageLookup {
10515 by_idx: mut existing_messages,
10516 replay: mut existing_replay_fingerprints,
10517 } = franken_existing_message_lookup_with_pending(
10518 &tx,
10519 existing_id,
10520 &conv.messages,
10521 &mut pending_message_fingerprints,
10522 &mut pending_message_replay_fingerprints,
10523 )?;
10524 let ExistingConversationNewMessages {
10525 messages: new_messages,
10526 new_chars,
10527 idx_collision_count,
10528 first_collision_idx,
10529 } = collect_new_messages_for_existing_conversation(
10530 existing_id,
10531 conv,
10532 &mut existing_messages,
10533 &mut existing_replay_fingerprints,
10534 "skipping replay-equivalent recovered message with shifted idx after duplicate conversation recovery",
10535 );
10536 let (inserted_last_idx, inserted_last_created_at) =
10537 borrowed_messages_tail_state(&new_messages);
10538 let inserted_message_ids =
10539 franken_append_insert_new_messages(&tx, existing_id, &new_messages)?;
10540 total_chars += new_chars;
10541 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
10542 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
10543 if !defer_lexical_updates {
10544 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
10545 fts_count_total += 1;
10546 fts_pending_chars =
10547 fts_pending_chars.saturating_add(msg.content.len());
10548 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
10549 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
10550 {
10551 flush_pending_fts_entries(
10552 self,
10553 &tx,
10554 &mut fts_entries,
10555 &mut fts_pending_chars,
10556 &mut fts_inserted_total,
10557 )?;
10558 }
10559 }
10560 inserted_indices.push(msg.idx);
10561 inserted_messages.push((msg_id, msg));
10562 }
10563
10564 if idx_collision_count > 0 {
10565 tracing::warn!(
10566 conversation_id = existing_id,
10567 collision_count = idx_collision_count,
10568 first_idx = first_collision_idx,
10569 source_path = %conv.source_path.display(),
10570 "message idx collisions encountered after duplicate conversation recovery; retaining canonical message variants"
10571 );
10572 }
10573
10574 let conv_last_ts = conv.messages.iter().filter_map(|m| m.created_at).max();
10575 franken_update_conversation_tail_state(
10576 &tx,
10577 existing_id,
10578 conv_last_ts,
10579 inserted_last_idx,
10580 inserted_last_created_at,
10581 )?;
10582 if let Some(lookup_key) =
10583 conversation_external_lookup_key_for_conv(agent_id, conv)
10584 {
10585 franken_update_external_conversation_tail_lookup_key(
10586 &tx,
10587 &lookup_key,
10588 conv_last_ts,
10589 inserted_last_idx,
10590 inserted_last_created_at,
10591 )?;
10592 }
10593
10594 pending_message_fingerprints.insert(existing_id, existing_messages);
10595 pending_message_replay_fingerprints
10596 .insert(existing_id, existing_replay_fingerprints);
10597
10598 existing_id
10599 }
10600 }
10601 };
10602
10603 if !defer_analytics_updates {
10604 let delta = StatsDelta {
10605 session_count_delta,
10606 message_count_delta: inserted_messages.len() as i64,
10607 total_chars_delta: total_chars,
10608 };
10609
10610 let effective_started_at = conversation_effective_started_at(conv);
10611 let day_id = effective_started_at
10612 .map(FrankenStorage::day_id_from_millis)
10613 .unwrap_or(0);
10614 stats.record_delta(
10615 &conv.agent_slug,
10616 &conv.source_id,
10617 day_id,
10618 delta.session_count_delta,
10619 delta.message_count_delta,
10620 delta.total_chars_delta,
10621 );
10622
10623 let conv_day_id = day_id;
10624 let mut session_model_family = String::from("unknown");
10625 let mut has_any_tokens = false;
10626
10627 for &(message_id, msg) in &inserted_messages {
10628 let role_s = role_str(&msg.role);
10629 let usage = if historical_raw_json(&msg.extra_json).is_some() {
10630 crate::connectors::extract_tokens_for_agent(
10631 &conv.agent_slug,
10632 &serde_json::Value::Null,
10633 &msg.content,
10634 &role_s,
10635 )
10636 } else {
10637 crate::connectors::extract_tokens_for_agent(
10638 &conv.agent_slug,
10639 &msg.extra_json,
10640 &msg.content,
10641 &role_s,
10642 )
10643 };
10644
10645 let msg_ts = msg
10646 .created_at
10647 .or(conversation_effective_started_at(conv))
10648 .unwrap_or(0);
10649 let msg_day_id = if msg_ts > 0 {
10650 FrankenStorage::day_id_from_millis(msg_ts)
10651 } else {
10652 conv_day_id
10653 };
10654
10655 let model_info = usage
10656 .model_name
10657 .as_deref()
10658 .map(crate::connectors::normalize_model);
10659
10660 let model_family = model_info
10661 .as_ref()
10662 .map(|i| i.family.clone())
10663 .unwrap_or_else(|| "unknown".into());
10664 let model_tier = model_info
10665 .as_ref()
10666 .map(|i| i.tier.clone())
10667 .unwrap_or_else(|| "unknown".into());
10668 let provider = usage
10669 .provider
10670 .clone()
10671 .or_else(|| model_info.as_ref().map(|i| i.provider.clone()))
10672 .unwrap_or_else(|| "unknown".into());
10673
10674 if model_family != "unknown" {
10675 session_model_family = model_family.clone();
10676 }
10677
10678 let estimated_cost = pricing_table.compute_cost(
10679 usage.model_name.as_deref(),
10680 msg_day_id,
10681 usage.input_tokens,
10682 usage.output_tokens,
10683 usage.cache_read_tokens,
10684 usage.cache_creation_tokens,
10685 );
10686 if estimated_cost.is_some() {
10687 pricing_diag.record_priced();
10688 } else if usage.has_token_data() {
10689 pricing_diag.record_unpriced(usage.model_name.as_deref());
10690 }
10691
10692 token_stats.record(
10693 &conv.agent_slug,
10694 &conv.source_id,
10695 msg_day_id,
10696 &model_family,
10697 &role_s,
10698 &usage,
10699 msg.content.len() as i64,
10700 estimated_cost.unwrap_or(0.0),
10701 );
10702
10703 if usage.has_token_data() {
10704 has_any_tokens = true;
10705 }
10706
10707 let content_chars = msg.content.len() as i64;
10708 let content_tokens_est = content_chars / 4;
10709 let msg_hour_id = FrankenStorage::hour_id_from_millis(msg_ts);
10710 let has_plan = has_plan_for_role(&role_s, &msg.content);
10711
10712 token_entries.push(TokenUsageEntry {
10713 message_id,
10714 conversation_id: conv_id,
10715 agent_id,
10716 workspace_id,
10717 source_id: conv.source_id.clone(),
10718 timestamp_ms: msg_ts,
10719 day_id: msg_day_id,
10720 model_name: usage.model_name.clone(),
10721 model_family: Some(model_family.clone()),
10722 model_tier: Some(model_tier.clone()),
10723 service_tier: usage.service_tier.clone(),
10724 provider: Some(provider.clone()),
10725 input_tokens: usage.input_tokens,
10726 output_tokens: usage.output_tokens,
10727 cache_read_tokens: usage.cache_read_tokens,
10728 cache_creation_tokens: usage.cache_creation_tokens,
10729 thinking_tokens: usage.thinking_tokens,
10730 total_tokens: usage.total_tokens(),
10731 estimated_cost_usd: estimated_cost,
10732 role: role_s.to_string(),
10733 content_chars,
10734 has_tool_calls: usage.has_tool_calls,
10735 tool_call_count: usage.tool_call_count,
10736 data_source: usage.data_source.as_str().to_string(),
10737 });
10738
10739 let mm = MessageMetricsEntry {
10740 message_id,
10741 created_at_ms: msg_ts,
10742 hour_id: msg_hour_id,
10743 day_id: msg_day_id,
10744 agent_slug: conv.agent_slug.clone(),
10745 workspace_id: workspace_id.unwrap_or(0),
10746 source_id: conv.source_id.clone(),
10747 role: role_s.to_string(),
10748 content_chars,
10749 content_tokens_est,
10750 model_name: usage.model_name.clone(),
10751 model_family: model_family.clone(),
10752 model_tier: model_tier.clone(),
10753 provider,
10754 api_input_tokens: usage.input_tokens,
10755 api_output_tokens: usage.output_tokens,
10756 api_cache_read_tokens: usage.cache_read_tokens,
10757 api_cache_creation_tokens: usage.cache_creation_tokens,
10758 api_thinking_tokens: usage.thinking_tokens,
10759 api_service_tier: usage.service_tier.clone(),
10760 api_data_source: usage.data_source.as_str().to_string(),
10761 tool_call_count: usage.tool_call_count as i64,
10762 has_tool_calls: usage.has_tool_calls,
10763 has_plan,
10764 };
10765 rollup_agg.record(&mm);
10766 metrics_entries.push(mm);
10767 }
10768
10769 if session_count_delta > 0 {
10770 token_stats.record_session(
10771 &conv.agent_slug,
10772 &conv.source_id,
10773 conv_day_id,
10774 &session_model_family,
10775 );
10776 }
10777
10778 if has_any_tokens {
10779 conv_ids_to_summarize.push(conv_id);
10780 }
10781 }
10782
10783 outcomes.push(InsertOutcome {
10784 conversation_id: conv_id,
10785 conversation_inserted: session_count_delta > 0,
10786 inserted_indices,
10787 });
10788 }
10789
10790 if !defer_lexical_updates {
10792 flush_pending_fts_entries(
10793 self,
10794 &tx,
10795 &mut fts_entries,
10796 &mut fts_pending_chars,
10797 &mut fts_inserted_total,
10798 )?;
10799 }
10800 if !defer_lexical_updates && fts_count_total > 0 {
10801 tracing::debug!(
10802 target: "cass::perf::fts5",
10803 total = fts_count_total,
10804 inserted = fts_inserted_total,
10805 conversations = conversations.len(),
10806 "franken_batch_fts_insert_complete"
10807 );
10808 }
10809
10810 if !defer_analytics_updates && !stats.is_empty() {
10812 let entries = stats.expand();
10813 let affected = franken_update_daily_stats_batched_in_tx(&tx, &entries)?;
10814 tracing::debug!(
10815 target: "cass::perf::daily_stats",
10816 raw = stats.raw_entry_count(),
10817 expanded = entries.len(),
10818 affected = affected,
10819 "franken_batched_stats_update_complete"
10820 );
10821 }
10822
10823 if !defer_analytics_updates && !token_entries.is_empty() {
10825 let token_count = token_entries.len();
10826 let inserted = franken_insert_token_usage_batched_in_tx(&tx, &token_entries)?;
10827 tracing::debug!(
10828 target: "cass::perf::token_usage",
10829 total = token_count,
10830 inserted = inserted,
10831 "franken_batch_token_usage_insert_complete"
10832 );
10833 }
10834
10835 if !defer_analytics_updates && !token_stats.is_empty() {
10837 let entries = token_stats.expand();
10838 let affected = franken_update_token_daily_stats_batched_in_tx(&tx, &entries)?;
10839 tracing::debug!(
10840 target: "cass::perf::token_daily_stats",
10841 raw = token_stats.raw_entry_count(),
10842 expanded = entries.len(),
10843 affected = affected,
10844 "franken_batched_token_stats_update_complete"
10845 );
10846 }
10847
10848 if !defer_analytics_updates && !metrics_entries.is_empty() {
10850 let mm_count = metrics_entries.len();
10851 let inserted = franken_insert_message_metrics_batched_in_tx(&tx, &metrics_entries)?;
10852 tracing::debug!(
10853 target: "cass::perf::message_metrics",
10854 total = mm_count,
10855 inserted = inserted,
10856 "franken_batch_message_metrics_insert_complete"
10857 );
10858 }
10859
10860 if !defer_analytics_updates && !rollup_agg.is_empty() {
10862 let (hourly, daily, models_daily) =
10863 franken_flush_analytics_rollups_in_tx(&tx, &rollup_agg)?;
10864 tracing::debug!(
10865 target: "cass::perf::usage_rollups",
10866 hourly_buckets = rollup_agg.hourly_entry_count(),
10867 daily_buckets = rollup_agg.daily_entry_count(),
10868 models_daily_buckets = rollup_agg.models_daily_entry_count(),
10869 hourly_affected = hourly,
10870 daily_affected = daily,
10871 models_daily_affected = models_daily,
10872 "franken_batched_usage_rollups_complete"
10873 );
10874 }
10875
10876 if !defer_analytics_updates {
10878 for conv_id in &conv_ids_to_summarize {
10879 franken_update_conversation_token_summaries_in_tx(&tx, *conv_id)?;
10880 }
10881 }
10882
10883 tx.commit()?;
10884
10885 pricing_diag.log_summary();
10886
10887 Ok(outcomes)
10888 }
10889}
10890
10891fn normalized_storage_source_parts(
10892 source_id: Option<&str>,
10893 origin_kind: Option<&str>,
10894 origin_host: Option<&str>,
10895) -> (String, SourceKind, Option<String>) {
10896 let host_label = crate::search::tantivy::normalized_index_origin_host(origin_host);
10897 let source_id = crate::search::tantivy::normalized_index_source_id(
10898 source_id,
10899 origin_kind,
10900 host_label.as_deref(),
10901 );
10902
10903 if source_id == LOCAL_SOURCE_ID {
10904 (source_id, SourceKind::Local, None)
10905 } else {
10906 (source_id, SourceKind::Ssh, host_label)
10907 }
10908}
10909
10910fn normalized_source_for_conversation(conv: &Conversation) -> Source {
10911 let (id, kind, host_label) = normalized_storage_source_parts(
10912 Some(conv.source_id.as_str()),
10913 None,
10914 conv.origin_host.as_deref(),
10915 );
10916 Source {
10917 id,
10918 kind,
10919 host_label,
10920 machine_id: None,
10921 platform: None,
10922 config_json: None,
10923 created_at: None,
10924 updated_at: None,
10925 }
10926}
10927
10928fn is_bootstrap_local_source(source: &Source) -> bool {
10929 source.id == LOCAL_SOURCE_ID
10930 && matches!(source.kind, SourceKind::Local)
10931 && source.host_label.is_none()
10932 && source.machine_id.is_none()
10933 && source.platform.is_none()
10934 && source.config_json.is_none()
10935 && source.created_at.is_none()
10936 && source.updated_at.is_none()
10937}
10938
10939fn normalized_conversation_for_storage<'a>(conv: &'a Conversation) -> Cow<'a, Conversation> {
10940 let normalized_source = normalized_source_for_conversation(conv);
10941 if normalized_source.id == conv.source_id && normalized_source.host_label == conv.origin_host {
10942 Cow::Borrowed(conv)
10943 } else {
10944 let mut normalized = conv.clone();
10945 normalized.source_id = normalized_source.id;
10946 normalized.origin_host = normalized_source.host_label;
10947 Cow::Owned(normalized)
10948 }
10949}
10950
10951impl FrankenStorage {
10952 fn ensure_source_for_conversation(&self, conv: &Conversation) -> Result<()> {
10953 let source = normalized_source_for_conversation(conv);
10954 if is_bootstrap_local_source(&source) {
10955 return Ok(());
10958 }
10959 let cache_key = EnsuredConversationSourceKey::from_source(&source);
10960 if self.conversation_source_already_ensured(&cache_key) {
10961 return Ok(());
10962 }
10963 self.upsert_source(&source)?;
10964 self.mark_conversation_source_ensured(cache_key);
10965 Ok(())
10966 }
10967
10968 fn ensure_sources_for_batch(
10969 &self,
10970 conversations: &[(i64, Option<i64>, &Conversation)],
10971 ) -> Result<()> {
10972 let mut seen = HashSet::with_capacity(conversations.len());
10973 for &(_, _, conv) in conversations {
10974 let source = normalized_source_for_conversation(conv);
10975 if seen.insert(source.id.clone()) {
10976 if is_bootstrap_local_source(&source) {
10977 continue;
10978 }
10979 self.upsert_source(&source)?;
10980 self.mark_conversation_source_ensured(EnsuredConversationSourceKey::from_source(
10981 &source,
10982 ));
10983 }
10984 }
10985 Ok(())
10986 }
10987}
10988
10989fn franken_last_rowid(tx: &FrankenTransaction<'_>) -> Result<i64> {
10995 tx.last_insert_rowid()
10996 .ok()
10997 .filter(|&id| id > 0)
10998 .with_context(|| "last_insert_rowid() returned NULL or 0 after INSERT")
10999}
11000
11001fn ensure_agents_in_tx(
11007 tx: &FrankenTransaction<'_>,
11008 conversations: &[(i64, Option<i64>, &Conversation)],
11009) -> Result<()> {
11010 let mut seen = HashSet::new();
11011 let now = FrankenStorage::now_millis();
11012 for &(agent_id, _, conv) in conversations {
11013 if !seen.insert(agent_id) {
11014 continue;
11015 }
11016 let exists: i64 = tx.query_row_map(
11017 "SELECT COUNT(*) FROM agents WHERE id = ?1",
11018 fparams![agent_id],
11019 |row| row.get_typed(0),
11020 )?;
11021 if exists == 0 {
11022 tracing::debug!(
11023 target: "cass::fk_guard",
11024 agent_id,
11025 slug = %conv.agent_slug,
11026 "inserting agent row inside transaction to satisfy FK constraint"
11027 );
11028 tx.execute_compat(
11032 "INSERT OR IGNORE INTO agents(id, slug, name, kind, created_at, updated_at)
11033 VALUES(?1, ?2, ?3, 'cli', ?4, ?5)",
11034 fparams![
11035 agent_id,
11036 conv.agent_slug.as_str(),
11037 conv.agent_slug.as_str(),
11038 now,
11039 now
11040 ],
11041 )?;
11042 }
11043 }
11044 Ok(())
11045}
11046
11047fn ensure_workspaces_in_tx(
11050 tx: &FrankenTransaction<'_>,
11051 conversations: &[(i64, Option<i64>, &Conversation)],
11052) -> Result<()> {
11053 let mut seen = HashSet::new();
11054 for &(_, workspace_id, conv) in conversations {
11055 let ws_id = match workspace_id {
11056 Some(id) => id,
11057 None => continue,
11058 };
11059 if !seen.insert(ws_id) {
11060 continue;
11061 }
11062 let exists: i64 = tx.query_row_map(
11063 "SELECT COUNT(*) FROM workspaces WHERE id = ?1",
11064 fparams![ws_id],
11065 |row| row.get_typed(0),
11066 )?;
11067 if exists == 0 {
11068 let path_str = conv
11069 .workspace
11070 .as_ref()
11071 .map(|p| p.to_string_lossy().to_string())
11072 .unwrap_or_default();
11073 tracing::debug!(
11074 target: "cass::fk_guard",
11075 workspace_id = ws_id,
11076 path = %path_str,
11077 "inserting workspace row inside transaction to satisfy FK constraint"
11078 );
11079 tx.execute_compat(
11080 "INSERT OR IGNORE INTO workspaces(id, path) VALUES(?1, ?2)",
11081 fparams![ws_id, path_str.as_str()],
11082 )?;
11083 }
11084 }
11085 Ok(())
11086}
11087
11088fn ensure_sources_in_tx(
11092 tx: &FrankenTransaction<'_>,
11093 conversations: &[(i64, Option<i64>, &Conversation)],
11094) -> Result<()> {
11095 let mut seen = HashSet::new();
11096 for &(_, _, conv) in conversations {
11097 let (source_id, source_kind, host_label) = normalized_storage_source_parts(
11098 Some(conv.source_id.as_str()),
11099 None,
11100 conv.origin_host.as_deref(),
11101 );
11102 if !seen.insert(source_id.clone()) {
11103 continue;
11104 }
11105 let exists: i64 = tx.query_row_map(
11106 "SELECT COUNT(*) FROM sources WHERE id = ?1",
11107 fparams![source_id.as_str()],
11108 |row| row.get_typed(0),
11109 )?;
11110 if exists == 0 {
11111 let kind_str = source_kind.to_string();
11112 let now = FrankenStorage::now_millis();
11113 tracing::debug!(
11114 target: "cass::fk_guard",
11115 source_id = %source_id,
11116 kind = kind_str.as_str(),
11117 "inserting source row inside transaction to satisfy FK constraint"
11118 );
11119 tx.execute_compat(
11120 "INSERT OR IGNORE INTO sources(id, kind, host_label, created_at, updated_at)
11121 VALUES(?1, ?2, ?3, ?4, ?5)",
11122 fparams![
11123 source_id.as_str(),
11124 kind_str.as_str(),
11125 host_label.as_deref(),
11126 now,
11127 now
11128 ],
11129 )?;
11130 }
11131 }
11132 Ok(())
11133}
11134
11135fn env_flag_enabled(name: &str) -> bool {
11136 dotenvy::var(name)
11137 .map(|v| !(v == "0" || v.eq_ignore_ascii_case("false")))
11138 .unwrap_or(false)
11139}
11140
11141fn defer_storage_lexical_updates_enabled() -> bool {
11142 env_flag_enabled("CASS_DEFER_LEXICAL_UPDATES")
11143}
11144
11145fn defer_analytics_updates_enabled() -> bool {
11146 env_flag_enabled("CASS_DEFER_ANALYTICS_UPDATES")
11147}
11148
11149enum ConversationInsertStatus {
11150 Inserted(i64),
11151 Existing(i64),
11152}
11153
11154fn franken_find_external_conversation_tail_lookup(
11155 tx: &FrankenTransaction<'_>,
11156 lookup_key: &str,
11157) -> Result<Option<ExistingConversationWithTail>> {
11158 let params = [SqliteValue::from(lookup_key)];
11159 let row = tx
11160 .query_row_with_params(
11161 "SELECT conversation_id, ended_at, last_message_idx, last_message_created_at
11162 FROM conversation_external_tail_lookup
11163 WHERE lookup_key = ?1",
11164 ¶ms,
11165 )
11166 .optional()?;
11167 let Some(row) = row else {
11168 return Ok(None);
11169 };
11170 let id = row.get_typed(0)?;
11171 let ended_at = row.get_typed(1)?;
11172 let last_message_idx = row.get_typed(2)?;
11173 let last_message_created_at = row.get_typed(3)?;
11174 Ok(Some(ExistingConversationWithTail {
11175 id,
11176 tail_state: existing_conversation_tail_state_from_cached(
11177 last_message_idx,
11178 last_message_created_at,
11179 ended_at,
11180 ),
11181 }))
11182}
11183
11184fn franken_find_external_conversation_lookup(
11185 tx: &FrankenTransaction<'_>,
11186 lookup_key: &str,
11187) -> Result<Option<i64>> {
11188 Ok(franken_find_external_conversation_tail_lookup(tx, lookup_key)?.map(|existing| existing.id))
11189}
11190
11191fn franken_insert_external_conversation_tail_lookup_key(
11192 tx: &FrankenTransaction<'_>,
11193 lookup_key: &str,
11194 conversation_id: i64,
11195 ended_at: Option<i64>,
11196 last_message_idx: Option<i64>,
11197 last_message_created_at: Option<i64>,
11198) -> Result<()> {
11199 let params = [
11200 SqliteValue::from(lookup_key),
11201 SqliteValue::from(conversation_id),
11202 SqliteValue::from(ended_at),
11203 SqliteValue::from(last_message_idx),
11204 SqliteValue::from(last_message_created_at),
11205 ];
11206 tx.execute_with_params(
11207 "INSERT OR REPLACE INTO conversation_external_tail_lookup(
11208 lookup_key, conversation_id, ended_at, last_message_idx, last_message_created_at
11209 ) VALUES(?1, ?2, ?3, ?4, ?5)",
11210 ¶ms,
11211 )?;
11212 Ok(())
11213}
11214
11215fn franken_insert_external_conversation_tail_lookup(
11216 tx: &FrankenTransaction<'_>,
11217 source_id: &str,
11218 agent_id: i64,
11219 external_id: &str,
11220 existing: ExistingConversationWithTail,
11221) -> Result<()> {
11222 let lookup_key = conversation_external_lookup_key(source_id, agent_id, external_id);
11223 let ended_at = existing.tail_state.and_then(|state| state.ended_at);
11224 let last_message_idx = existing.tail_state.map(|state| state.last_message_idx);
11225 let last_message_created_at = existing
11226 .tail_state
11227 .map(|state| state.last_message_created_at);
11228 franken_insert_external_conversation_tail_lookup_key(
11229 tx,
11230 &lookup_key,
11231 existing.id,
11232 ended_at,
11233 last_message_idx,
11234 last_message_created_at,
11235 )
11236}
11237
11238fn franken_update_external_conversation_tail_lookup_key(
11239 tx: &FrankenTransaction<'_>,
11240 lookup_key: &str,
11241 ended_at_candidate: Option<i64>,
11242 last_message_idx_candidate: Option<i64>,
11243 last_message_created_at_candidate: Option<i64>,
11244) -> Result<()> {
11245 if ended_at_candidate.is_none()
11246 && last_message_idx_candidate.is_none()
11247 && last_message_created_at_candidate.is_none()
11248 {
11249 return Ok(());
11250 }
11251 tx.execute_compat(
11252 "UPDATE conversation_external_tail_lookup
11253 SET ended_at = CASE
11254 WHEN ?1 IS NULL THEN ended_at
11255 ELSE MAX(IFNULL(ended_at, 0), ?1)
11256 END,
11257 last_message_idx = CASE
11258 WHEN ?2 IS NULL THEN last_message_idx
11259 WHEN last_message_idx IS NULL OR last_message_idx < ?2 THEN ?2
11260 ELSE last_message_idx
11261 END,
11262 last_message_created_at = CASE
11263 WHEN ?3 IS NULL THEN last_message_created_at
11264 WHEN last_message_created_at IS NULL OR last_message_created_at < ?3 THEN ?3
11265 ELSE last_message_created_at
11266 END
11267 WHERE lookup_key = ?4",
11268 fparams![
11269 ended_at_candidate,
11270 last_message_idx_candidate,
11271 last_message_created_at_candidate,
11272 lookup_key
11273 ],
11274 )?;
11275 Ok(())
11276}
11277
11278fn franken_set_external_conversation_tail_lookup_after_append(
11279 tx: &FrankenTransaction<'_>,
11280 lookup_key: &str,
11281 ended_at: i64,
11282 last_message_idx: i64,
11283 last_message_created_at: i64,
11284) -> Result<()> {
11285 tx.execute_compat(
11286 "UPDATE conversation_external_tail_lookup
11287 SET ended_at = ?1,
11288 last_message_idx = ?2,
11289 last_message_created_at = ?3
11290 WHERE lookup_key = ?4",
11291 fparams![
11292 ended_at,
11293 last_message_idx,
11294 last_message_created_at,
11295 lookup_key
11296 ],
11297 )?;
11298 Ok(())
11299}
11300
11301fn franken_update_external_conversation_tail_after_append(
11302 tx: &FrankenTransaction<'_>,
11303 agent_id: i64,
11304 conv: &Conversation,
11305 used_append_tail_plan: bool,
11306 exact_append_set: bool,
11307 inserted_last_idx: Option<i64>,
11308 inserted_last_created_at: Option<i64>,
11309) -> Result<()> {
11310 let Some(lookup_key) = conversation_external_lookup_key_for_conv(agent_id, conv) else {
11311 return Ok(());
11312 };
11313
11314 if exact_append_set
11315 && let (Some(last_message_idx), Some(last_message_created_at)) =
11316 (inserted_last_idx, inserted_last_created_at)
11317 {
11318 return franken_set_external_conversation_tail_lookup_after_append(
11319 tx,
11320 &lookup_key,
11321 last_message_created_at,
11322 last_message_idx,
11323 last_message_created_at,
11324 );
11325 }
11326
11327 let ended_at_candidate = if used_append_tail_plan {
11328 inserted_last_created_at
11329 } else {
11330 conv.messages.iter().filter_map(|m| m.created_at).max()
11331 };
11332 franken_update_external_conversation_tail_lookup_key(
11333 tx,
11334 &lookup_key,
11335 ended_at_candidate,
11336 inserted_last_idx,
11337 inserted_last_created_at,
11338 )
11339}
11340
11341fn franken_find_existing_conversation_by_key(
11342 tx: &FrankenTransaction<'_>,
11343 key: &PendingConversationKey,
11344 conv: Option<&Conversation>,
11345) -> Result<Option<i64>> {
11346 franken_find_existing_conversation_by_key_impl(tx, key, conv, false)
11347}
11348
11349fn franken_find_existing_conversation_by_key_after_conflict(
11350 tx: &FrankenTransaction<'_>,
11351 key: &PendingConversationKey,
11352 conv: Option<&Conversation>,
11353) -> Result<Option<i64>> {
11354 franken_find_existing_conversation_by_key_impl(tx, key, conv, true)
11355}
11356
11357fn franken_find_existing_conversation_by_key_impl(
11358 tx: &FrankenTransaction<'_>,
11359 key: &PendingConversationKey,
11360 conv: Option<&Conversation>,
11361 allow_legacy_external_scan: bool,
11362) -> Result<Option<i64>> {
11363 match key {
11364 PendingConversationKey::External {
11365 source_id,
11366 agent_id,
11367 external_id,
11368 } => {
11369 let lookup_key = conversation_external_lookup_key(source_id, *agent_id, external_id);
11370 if let Some(existing_id) = franken_find_external_conversation_lookup(tx, &lookup_key)? {
11371 return Ok(Some(existing_id));
11372 }
11373 if !allow_legacy_external_scan {
11374 return Ok(None);
11375 }
11376
11377 let existing_id = tx
11378 .query_row_map(
11379 "SELECT id
11380 FROM conversations
11381 WHERE source_id = ?1 AND agent_id = ?2 AND external_id = ?3",
11382 fparams![source_id.as_str(), *agent_id, external_id.as_str()],
11383 |row| row.get_typed(0),
11384 )
11385 .optional()?;
11386 if let Some(existing_id) = existing_id {
11387 let tail_state = franken_existing_conversation_append_tail_state(tx, existing_id)?;
11388 franken_insert_external_conversation_tail_lookup_key(
11389 tx,
11390 &lookup_key,
11391 existing_id,
11392 tail_state.and_then(|state| state.ended_at),
11393 tail_state.map(|state| state.last_message_idx),
11394 tail_state.map(|state| state.last_message_created_at),
11395 )?;
11396 Ok(Some(existing_id))
11397 } else {
11398 Ok(None)
11399 }
11400 }
11401 PendingConversationKey::SourcePath {
11402 source_id,
11403 agent_id,
11404 source_path,
11405 started_at,
11406 } => {
11407 let exact_match = tx
11408 .query_row_map(
11409 "SELECT c.id
11410 FROM conversations c
11411 WHERE c.source_id = ?1
11412 AND c.agent_id = ?2
11413 AND c.source_path = ?3
11414 AND ((
11415 COALESCE(
11416 c.started_at,
11417 (SELECT MIN(created_at)
11418 FROM messages
11419 WHERE conversation_id = c.id
11420 AND created_at IS NOT NULL)
11421 ) IS NULL
11422 AND ?4 IS NULL
11423 ) OR COALESCE(
11424 c.started_at,
11425 (SELECT MIN(created_at)
11426 FROM messages
11427 WHERE conversation_id = c.id
11428 AND created_at IS NOT NULL)
11429 ) = ?4)
11430 ORDER BY c.id
11431 LIMIT 1",
11432 fparams![
11433 source_id.as_str(),
11434 *agent_id,
11435 source_path.as_str(),
11436 *started_at
11437 ],
11438 |row| row.get_typed(0),
11439 )
11440 .optional()?;
11441 if exact_match.is_some() {
11442 return Ok(exact_match);
11443 }
11444
11445 let Some(conv) = conv else {
11446 return Ok(None);
11447 };
11448 let incoming_fingerprints = conversation_message_fingerprints(conv);
11449 if incoming_fingerprints.is_empty() {
11450 return Ok(None);
11451 }
11452 let incoming_replay_fingerprints = conversation_message_replay_fingerprints(conv);
11453
11454 let candidates: Vec<(i64, Option<i64>)> = tx.query_map_collect(
11455 "SELECT
11456 c.id,
11457 COALESCE(
11458 c.started_at,
11459 (SELECT MIN(created_at)
11460 FROM messages
11461 WHERE conversation_id = c.id
11462 AND created_at IS NOT NULL)
11463 ) AS effective_started_at
11464 FROM conversations c
11465 WHERE c.source_id = ?1
11466 AND c.agent_id = ?2
11467 AND c.source_path = ?3
11468 ORDER BY c.id",
11469 fparams![source_id.as_str(), *agent_id, source_path.as_str()],
11470 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
11471 )?;
11472
11473 let mut best_candidate: Option<(i64, ConversationMergeEvidence)> = None;
11474 for (candidate_id, candidate_started_at) in candidates {
11475 let existing_fingerprints =
11476 franken_existing_message_fingerprints(tx, candidate_id)?;
11477 let existing_replay_fingerprints =
11478 replay_fingerprints_from_merge_set(&existing_fingerprints);
11479 let Some(evidence) = conversation_merge_evidence(
11480 &incoming_fingerprints,
11481 &incoming_replay_fingerprints,
11482 &existing_fingerprints,
11483 &existing_replay_fingerprints,
11484 *started_at,
11485 candidate_started_at,
11486 ) else {
11487 continue;
11488 };
11489
11490 let candidate_key = (
11491 evidence.exact_overlap,
11492 evidence.replay_overlap,
11493 evidence.started_close,
11494 evidence.smaller_replay_set,
11495 std::cmp::Reverse(evidence.start_distance_ms),
11496 );
11497 let should_replace = best_candidate
11498 .as_ref()
11499 .map(|(_, best_evidence)| {
11500 candidate_key
11501 > (
11502 best_evidence.exact_overlap,
11503 best_evidence.replay_overlap,
11504 best_evidence.started_close,
11505 best_evidence.smaller_replay_set,
11506 std::cmp::Reverse(best_evidence.start_distance_ms),
11507 )
11508 })
11509 .unwrap_or(true);
11510
11511 if should_replace {
11512 best_candidate = Some((candidate_id, evidence));
11513 }
11514 }
11515
11516 Ok(best_candidate.map(|(candidate_id, _)| candidate_id))
11517 }
11518 }
11519}
11520
11521fn franken_insert_conversation_or_get_existing(
11522 tx: &FrankenTransaction<'_>,
11523 agent_id: i64,
11524 workspace_id: Option<i64>,
11525 conv: &Conversation,
11526) -> Result<ConversationInsertStatus> {
11527 let conversation_key = conversation_merge_key(agent_id, conv);
11528 if let Some(existing_id) =
11529 franken_find_existing_conversation_by_key(tx, &conversation_key, Some(conv))?
11530 {
11531 return Ok(ConversationInsertStatus::Existing(existing_id));
11532 }
11533
11534 franken_insert_conversation_or_get_existing_after_miss(
11535 tx,
11536 agent_id,
11537 workspace_id,
11538 conv,
11539 &conversation_key,
11540 )
11541}
11542
11543fn franken_insert_conversation_or_get_existing_after_miss(
11544 tx: &FrankenTransaction<'_>,
11545 agent_id: i64,
11546 workspace_id: Option<i64>,
11547 conv: &Conversation,
11548 conversation_key: &PendingConversationKey,
11549) -> Result<ConversationInsertStatus> {
11550 match franken_insert_conversation(tx, agent_id, workspace_id, conv) {
11551 Ok(Some(conv_id)) => Ok(ConversationInsertStatus::Inserted(conv_id)),
11552 Ok(None) => {
11553 let existing_id =
11556 franken_find_existing_conversation_by_key_after_conflict(
11557 tx,
11558 conversation_key,
11559 Some(conv),
11560 )?
11561 .with_context(|| {
11562 format!(
11563 "conversation INSERT produced a duplicate conflict but existing row was not found for source_id={} agent_id={} external_id={:?} source_path={}",
11564 conv.source_id,
11565 agent_id,
11566 conv.external_id,
11567 conv.source_path.display()
11568 )
11569 })?;
11570 tracing::warn!(
11571 source_id = %conv.source_id,
11572 agent_id,
11573 external_id = ?conv.external_id,
11574 existing_id,
11575 source_path = %conv.source_path.display(),
11576 "conversation INSERT: duplicate gracefully recovered, reusing existing row"
11577 );
11578 Ok(ConversationInsertStatus::Existing(existing_id))
11579 }
11580 Err(error) => {
11581 tracing::error!(
11582 source_id = %conv.source_id,
11583 agent_id,
11584 external_id = ?conv.external_id,
11585 error = %error,
11586 source_path = %conv.source_path.display(),
11587 "franken_insert_conversation failed"
11588 );
11589 Err(error)
11590 }
11591 }
11592}
11593
11594fn franken_insert_conversation(
11600 tx: &FrankenTransaction<'_>,
11601 agent_id: i64,
11602 workspace_id: Option<i64>,
11603 conv: &Conversation,
11604) -> Result<Option<i64>> {
11605 let (metadata_json_str, metadata_bin) = franken_metadata_insert_payload(&conv.metadata_json)?;
11606 let (last_message_idx, last_message_created_at) = conversation_tail_state(conv);
11607 let metadata_bin_bytes = metadata_bin.as_deref();
11608
11609 match tx.execute_compat(
11610 "INSERT INTO conversations(
11611 agent_id, workspace_id, source_id, external_id, title, source_path,
11612 started_at, ended_at, approx_tokens, metadata_json, origin_host, metadata_bin,
11613 last_message_idx, last_message_created_at
11614 ) VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14)",
11615 fparams![
11616 agent_id,
11617 workspace_id,
11618 conv.source_id.as_str(),
11619 conv.external_id.as_deref(),
11620 conv.title.as_deref(),
11621 path_to_string(&conv.source_path),
11622 conv.started_at,
11623 conv.ended_at,
11624 conv.approx_tokens,
11625 metadata_json_str.as_deref(),
11626 conv.origin_host.as_deref(),
11627 metadata_bin_bytes,
11628 last_message_idx,
11629 last_message_created_at
11630 ],
11631 ) {
11632 Ok(_) => {
11633 let conv_id = franken_last_rowid(tx)?;
11634 franken_insert_conversation_tail_state(
11635 tx,
11636 conv_id,
11637 conv.ended_at,
11638 last_message_idx,
11639 last_message_created_at,
11640 )?;
11641 if let Some(external_id) = conv.external_id.as_deref() {
11642 franken_insert_external_conversation_tail_lookup(
11643 tx,
11644 conv.source_id.as_str(),
11645 agent_id,
11646 external_id,
11647 ExistingConversationWithTail {
11648 id: conv_id,
11649 tail_state: existing_conversation_tail_state_from_cached(
11650 last_message_idx,
11651 last_message_created_at,
11652 conv.ended_at,
11653 ),
11654 },
11655 )?;
11656 }
11657 Ok(Some(conv_id))
11658 }
11659 Err(frankensqlite::FrankenError::UniqueViolation { .. }) => {
11660 tracing::debug!(
11661 source_id = %conv.source_id,
11662 agent_id,
11663 external_id = ?conv.external_id,
11664 source_path = %conv.source_path.display(),
11665 "conversation INSERT: duplicate provenance conflict"
11666 );
11667 Ok(None)
11668 }
11669 Err(error) => Err(error.into()),
11670 }
11671}
11672
11673type MetadataInsertPayload<'a> = (Option<Cow<'a, str>>, Option<Vec<u8>>);
11674
11675fn franken_metadata_insert_payload(value: &serde_json::Value) -> Result<MetadataInsertPayload<'_>> {
11676 if let Some(raw) = historical_raw_json(value) {
11677 Ok((Some(Cow::Borrowed(raw)), None))
11678 } else if value.is_null() {
11679 Ok((Some(Cow::Borrowed("null")), None))
11680 } else if value.as_object().is_some_and(|object| object.is_empty()) {
11681 Ok((None, None))
11682 } else if let Some(metadata_bin) = serialize_json_to_msgpack(value) {
11683 Ok((None, Some(metadata_bin)))
11684 } else {
11685 Ok((Some(Cow::Owned(serde_json::to_string(value)?)), None))
11686 }
11687}
11688
11689fn franken_insert_new_message(
11690 tx: &FrankenTransaction<'_>,
11691 conversation_id: i64,
11692 msg: &Message,
11693) -> Result<i64> {
11694 let (extra_json_str, extra_bin) = franken_message_insert_payload(msg)?;
11695 let extra_bin_bytes = extra_bin.as_deref();
11696
11697 tx.execute_compat(
11698 "INSERT INTO messages(conversation_id, idx, role, author, created_at, content, extra_json, extra_bin)
11699 VALUES(?1,?2,?3,?4,?5,?6,?7,?8)",
11700 fparams![
11701 conversation_id,
11702 msg.idx,
11703 role_as_str(&msg.role),
11704 msg.author.as_deref(),
11705 msg.created_at,
11706 msg.content.as_str(),
11707 extra_json_str.as_deref(),
11708 extra_bin_bytes
11709 ],
11710 )?;
11711 franken_last_rowid(tx)
11712}
11713
11714type MessageInsertPayload<'a> = (Option<Cow<'a, str>>, Option<Vec<u8>>);
11715
11716fn franken_message_insert_payload(msg: &Message) -> Result<MessageInsertPayload<'_>> {
11717 if let Some(raw) = historical_raw_json(&msg.extra_json) {
11718 Ok((Some(Cow::Borrowed(raw)), None))
11719 } else if msg.extra_json.is_null() {
11720 Ok((None, None))
11721 } else {
11722 let extra_bin = serialize_json_to_msgpack(&msg.extra_json);
11723 if extra_bin.is_some() {
11724 Ok((None, extra_bin))
11725 } else {
11726 Ok((
11727 Some(Cow::Owned(serde_json::to_string(&msg.extra_json)?)),
11728 None,
11729 ))
11730 }
11731 }
11732}
11733
11734const MESSAGE_INSERT_BATCH_SIZE: usize = 100;
11739
11740const APPEND_MESSAGE_INSERT_BATCH_SIZE: usize = 50;
11746
11747fn message_insert_batch_sql(row_count: usize) -> &'static str {
11748 static MESSAGE_INSERT_BATCH_SQL: std::sync::OnceLock<Vec<String>> = std::sync::OnceLock::new();
11749
11750 let max_batch_size = MESSAGE_INSERT_BATCH_SIZE.max(APPEND_MESSAGE_INSERT_BATCH_SIZE);
11751 let cached_sql = MESSAGE_INSERT_BATCH_SQL.get_or_init(|| {
11752 let mut sql_by_row_count = Vec::with_capacity(max_batch_size + 1);
11753 sql_by_row_count.push(String::new());
11754 for row_count in 1..=max_batch_size {
11755 let placeholders = (0..row_count)
11756 .map(|idx| {
11757 let base = idx * 8;
11758 format!(
11759 "(?{},?{},?{},?{},?{},?{},?{},?{})",
11760 base + 1,
11761 base + 2,
11762 base + 3,
11763 base + 4,
11764 base + 5,
11765 base + 6,
11766 base + 7,
11767 base + 8
11768 )
11769 })
11770 .collect::<Vec<_>>()
11771 .join(",");
11772 sql_by_row_count.push(format!(
11773 "INSERT INTO messages(conversation_id, idx, role, author, created_at, content, extra_json, extra_bin) VALUES {placeholders}"
11774 ));
11775 }
11776 sql_by_row_count
11777 });
11778
11779 cached_sql
11780 .get(row_count)
11781 .map(String::as_str)
11782 .expect("message insert batch size must be covered by the cached SQL table")
11783}
11784
11785fn franken_batch_insert_new_messages(
11786 tx: &FrankenTransaction<'_>,
11787 conversation_id: i64,
11788 messages: &[&Message],
11789) -> Result<Vec<i64>> {
11790 franken_batch_insert_new_messages_with_batch_size(
11791 tx,
11792 conversation_id,
11793 messages,
11794 MESSAGE_INSERT_BATCH_SIZE,
11795 )
11796}
11797
11798fn franken_append_insert_new_messages(
11799 tx: &FrankenTransaction<'_>,
11800 conversation_id: i64,
11801 messages: &[&Message],
11802) -> Result<Vec<i64>> {
11803 franken_batch_insert_new_messages_with_batch_size(
11804 tx,
11805 conversation_id,
11806 messages,
11807 APPEND_MESSAGE_INSERT_BATCH_SIZE,
11808 )
11809}
11810
11811fn franken_batch_insert_new_messages_with_batch_size(
11812 tx: &FrankenTransaction<'_>,
11813 conversation_id: i64,
11814 messages: &[&Message],
11815 batch_size: usize,
11816) -> Result<Vec<i64>> {
11817 let batch_size = batch_size.max(1);
11818 let mut inserted_ids = Vec::with_capacity(messages.len());
11819 for chunk in messages.chunks(batch_size) {
11820 if chunk.len() == 1 {
11821 inserted_ids.push(franken_insert_new_message(tx, conversation_id, chunk[0])?);
11822 continue;
11823 }
11824 let sql = message_insert_batch_sql(chunk.len());
11825
11826 let mut param_values: Vec<SqliteValue> = Vec::with_capacity(chunk.len() * 8);
11827 for msg in chunk {
11828 let (extra_json_str, extra_bin) = franken_message_insert_payload(msg)?;
11829 param_values.push(SqliteValue::from(conversation_id));
11830 param_values.push(SqliteValue::from(msg.idx));
11831 param_values.push(SqliteValue::from(role_as_str(&msg.role)));
11832 param_values.push(SqliteValue::from(msg.author.as_deref()));
11833 param_values.push(SqliteValue::from(msg.created_at));
11834 param_values.push(SqliteValue::from(msg.content.as_str()));
11835 param_values.push(SqliteValue::from(extra_json_str.as_deref()));
11836 param_values.push(SqliteValue::from(extra_bin.as_deref()));
11837 }
11838
11839 tx.execute_with_params(sql, ¶m_values)?;
11840
11841 let last_id = franken_last_rowid(tx)?;
11842 let first_id = last_id
11843 .checked_sub((chunk.len() - 1) as i64)
11844 .with_context(|| {
11845 format!(
11846 "inferring rowid range for {}-row message batch ending at {last_id}",
11847 chunk.len()
11848 )
11849 })?;
11850 inserted_ids.extend((0..chunk.len()).map(|offset| first_id + offset as i64));
11851 }
11852
11853 Ok(inserted_ids)
11854}
11855
11856#[cfg(test)]
11857fn franken_insert_new_message_with_profile(
11858 tx: &FrankenTransaction<'_>,
11859 conversation_id: i64,
11860 msg: &Message,
11861 profile: &mut MessageInsertSubstageProfile,
11862) -> Result<i64> {
11863 profile.single_row_calls += 1;
11864 profile.batch_rows += 1;
11865
11866 let payload_start = Instant::now();
11867 let (extra_json_str, extra_bin) = franken_message_insert_payload(msg)?;
11868 profile.payload_duration += payload_start.elapsed();
11869 let extra_bin_bytes = extra_bin.as_deref();
11870
11871 let execute_start = Instant::now();
11872 tx.execute_compat(
11873 "INSERT INTO messages(conversation_id, idx, role, author, created_at, content, extra_json, extra_bin)
11874 VALUES(?1,?2,?3,?4,?5,?6,?7,?8)",
11875 fparams![
11876 conversation_id,
11877 msg.idx,
11878 role_as_str(&msg.role),
11879 msg.author.as_deref(),
11880 msg.created_at,
11881 msg.content.as_str(),
11882 extra_json_str.as_deref(),
11883 extra_bin_bytes
11884 ],
11885 )?;
11886 profile.execute_duration += execute_start.elapsed();
11887
11888 let rowid_start = Instant::now();
11889 let rowid = franken_last_rowid(tx)?;
11890 profile.rowid_duration += rowid_start.elapsed();
11891 Ok(rowid)
11892}
11893
11894#[cfg(test)]
11895fn franken_batch_insert_new_messages_with_profile(
11896 tx: &FrankenTransaction<'_>,
11897 conversation_id: i64,
11898 messages: &[&Message],
11899 profile: &mut MessageInsertSubstageProfile,
11900) -> Result<Vec<i64>> {
11901 franken_batch_insert_new_messages_with_profile_batch_size(
11902 tx,
11903 conversation_id,
11904 messages,
11905 profile,
11906 MESSAGE_INSERT_BATCH_SIZE,
11907 )
11908}
11909
11910#[cfg(test)]
11911fn franken_append_insert_new_messages_with_profile(
11912 tx: &FrankenTransaction<'_>,
11913 conversation_id: i64,
11914 messages: &[&Message],
11915 profile: &mut MessageInsertSubstageProfile,
11916) -> Result<Vec<i64>> {
11917 franken_batch_insert_new_messages_with_profile_batch_size(
11918 tx,
11919 conversation_id,
11920 messages,
11921 profile,
11922 APPEND_MESSAGE_INSERT_BATCH_SIZE,
11923 )
11924}
11925
11926#[cfg(test)]
11927fn franken_batch_insert_new_messages_with_profile_batch_size(
11928 tx: &FrankenTransaction<'_>,
11929 conversation_id: i64,
11930 messages: &[&Message],
11931 profile: &mut MessageInsertSubstageProfile,
11932 batch_size: usize,
11933) -> Result<Vec<i64>> {
11934 let batch_size = batch_size.max(1);
11935 let mut inserted_ids = Vec::with_capacity(messages.len());
11936 for chunk in messages.chunks(batch_size) {
11937 if chunk.len() == 1 {
11938 inserted_ids.push(franken_insert_new_message_with_profile(
11939 tx,
11940 conversation_id,
11941 chunk[0],
11942 profile,
11943 )?);
11944 continue;
11945 }
11946
11947 profile.batch_calls += 1;
11948 profile.batch_rows += chunk.len();
11949
11950 let sql_build_start = Instant::now();
11951 let sql = message_insert_batch_sql(chunk.len());
11952 profile.sql_build_duration += sql_build_start.elapsed();
11953
11954 let mut param_values: Vec<SqliteValue> = Vec::with_capacity(chunk.len() * 8);
11955 for msg in chunk {
11956 let payload_start = Instant::now();
11957 let (extra_json_str, extra_bin) = franken_message_insert_payload(msg)?;
11958 profile.payload_duration += payload_start.elapsed();
11959
11960 let param_build_start = Instant::now();
11961 param_values.push(SqliteValue::from(conversation_id));
11962 param_values.push(SqliteValue::from(msg.idx));
11963 param_values.push(SqliteValue::from(role_as_str(&msg.role)));
11964 param_values.push(SqliteValue::from(msg.author.as_deref()));
11965 param_values.push(SqliteValue::from(msg.created_at));
11966 param_values.push(SqliteValue::from(msg.content.as_str()));
11967 param_values.push(SqliteValue::from(extra_json_str.as_deref()));
11968 param_values.push(SqliteValue::from(extra_bin.as_deref()));
11969 profile.param_build_duration += param_build_start.elapsed();
11970 }
11971
11972 let execute_start = Instant::now();
11973 tx.execute_with_params(sql, ¶m_values)?;
11974 profile.execute_duration += execute_start.elapsed();
11975
11976 let rowid_start = Instant::now();
11977 let last_id = franken_last_rowid(tx)?;
11978 let first_id = last_id
11979 .checked_sub((chunk.len() - 1) as i64)
11980 .with_context(|| {
11981 format!(
11982 "inferring rowid range for {}-row message batch ending at {last_id}",
11983 chunk.len()
11984 )
11985 })?;
11986 inserted_ids.extend((0..chunk.len()).map(|offset| first_id + offset as i64));
11987 profile.rowid_duration += rowid_start.elapsed();
11988 }
11989
11990 Ok(inserted_ids)
11991}
11992
11993fn franken_insert_snippets(
11995 tx: &FrankenTransaction<'_>,
11996 message_id: i64,
11997 snippets: &[Snippet],
11998) -> Result<()> {
11999 for snip in snippets {
12000 let file_path_str = snip.file_path.as_ref().map(path_to_string);
12001 tx.execute_compat(
12002 "INSERT INTO snippets(message_id, file_path, start_line, end_line, language, snippet_text)
12003 VALUES(?1,?2,?3,?4,?5,?6)",
12004 fparams![
12005 message_id,
12006 file_path_str.as_deref(),
12007 snip.start_line,
12008 snip.end_line,
12009 snip.language.as_deref(),
12010 snip.snippet_text.as_deref()
12011 ],
12012 )?;
12013 }
12014 Ok(())
12015}
12016
12017fn franken_existing_message_fingerprints(
12018 tx: &FrankenTransaction<'_>,
12019 conversation_id: i64,
12020) -> Result<HashSet<MessageMergeFingerprint>> {
12021 let rows = tx.query_params(
12022 "SELECT idx, role, author, created_at, content
12023 FROM messages
12024 WHERE conversation_id = ?1",
12025 fparams![conversation_id],
12026 )?;
12027 let mut fingerprints = HashSet::with_capacity(rows.len());
12028 for row in rows {
12029 let role: String = row.get_typed(1)?;
12030 let content: String = row.get_typed(4)?;
12031 fingerprints.insert(MessageMergeFingerprint {
12032 idx: row.get_typed(0)?,
12033 created_at: row.get_typed(3)?,
12034 role: role_from_str(&role),
12035 author: row.get_typed(2)?,
12036 content_hash: *blake3::hash(content.as_bytes()).as_bytes(),
12037 });
12038 }
12039 Ok(fingerprints)
12040}
12041
12042struct ExistingMessageLookup {
12043 by_idx: HashMap<i64, MessageMergeFingerprint>,
12044 replay: HashSet<MessageReplayFingerprint>,
12045}
12046
12047fn franken_existing_message_lookup(
12048 tx: &FrankenTransaction<'_>,
12049 conversation_id: i64,
12050 incoming_messages: &[Message],
12051) -> Result<ExistingMessageLookup> {
12052 if incoming_messages.is_empty() {
12053 return Ok(ExistingMessageLookup {
12054 by_idx: HashMap::new(),
12055 replay: HashSet::new(),
12056 });
12057 }
12058
12059 let min_idx = incoming_messages
12060 .iter()
12061 .map(|msg| msg.idx)
12062 .min()
12063 .unwrap_or(0);
12064 let max_idx = incoming_messages
12065 .iter()
12066 .map(|msg| msg.idx)
12067 .max()
12068 .unwrap_or(min_idx);
12069 let requires_full_scan = incoming_messages.iter().any(|msg| msg.created_at.is_none());
12070 let created_bounds = incoming_messages
12071 .iter()
12072 .filter_map(|msg| msg.created_at)
12073 .fold(None, |bounds: Option<(i64, i64)>, created_at| {
12074 Some(match bounds {
12075 Some((min_created_at, max_created_at)) => (
12076 min_created_at.min(created_at),
12077 max_created_at.max(created_at),
12078 ),
12079 None => (created_at, created_at),
12080 })
12081 });
12082
12083 let mut indexed_by_idx = HashMap::with_capacity(incoming_messages.len());
12084 let mut indexed_replay = HashSet::with_capacity(incoming_messages.len());
12085 let mut exact_idx_match = true;
12086 for msg in incoming_messages {
12087 record_message_lookup_exact_idx_probe();
12088 let Some((role, author, created_at, content)) = tx
12089 .query_row_map(
12090 "SELECT role, author, created_at, content
12091 FROM messages INDEXED BY sqlite_autoindex_messages_1
12092 WHERE conversation_id = ?1 AND idx = ?2
12093 LIMIT 1",
12094 fparams![conversation_id, msg.idx],
12095 |row| {
12096 Ok((
12097 row.get_typed::<String>(0)?,
12098 row.get_typed::<Option<String>>(1)?,
12099 row.get_typed::<Option<i64>>(2)?,
12100 row.get_typed::<String>(3)?,
12101 ))
12102 },
12103 )
12104 .optional()?
12105 else {
12106 exact_idx_match = false;
12107 break;
12108 };
12109 let role = role_from_str(&role);
12110 let content_hash = *blake3::hash(content.as_bytes()).as_bytes();
12111 let fingerprint = MessageMergeFingerprint {
12112 idx: msg.idx,
12113 created_at,
12114 role: role.clone(),
12115 author: author.clone(),
12116 content_hash,
12117 };
12118 if fingerprint != message_merge_fingerprint(msg) {
12119 exact_idx_match = false;
12120 break;
12121 }
12122 indexed_by_idx.insert(msg.idx, fingerprint);
12123 indexed_replay.insert(MessageReplayFingerprint {
12124 created_at,
12125 role,
12126 author,
12127 content_hash,
12128 });
12129 }
12130
12131 if exact_idx_match {
12132 return Ok(ExistingMessageLookup {
12133 by_idx: indexed_by_idx,
12134 replay: indexed_replay,
12135 });
12136 }
12137
12138 let (rows, replay_full_scan) = if requires_full_scan {
12139 let rows = tx.query_params(
12140 "SELECT idx, role, author, created_at, content
12141 FROM messages INDEXED BY sqlite_autoindex_messages_1
12142 WHERE conversation_id = ?1",
12143 fparams![conversation_id],
12144 )?;
12145 record_message_lookup_full_scan_query(rows.len());
12146 (rows, true)
12147 } else if let Some((min_created_at, max_created_at)) = created_bounds {
12148 let mut rows = tx.query_params(
12149 "SELECT idx, role, author, created_at, content
12150 FROM messages INDEXED BY sqlite_autoindex_messages_1
12151 WHERE conversation_id = ?1
12152 AND idx >= ?2
12153 AND idx <= ?3",
12154 fparams![conversation_id, min_idx, max_idx],
12155 )?;
12156 rows.extend(tx.query_params(
12157 "SELECT idx, role, author, created_at, content
12158 FROM messages INDEXED BY sqlite_autoindex_messages_1
12159 WHERE conversation_id = ?1
12160 AND created_at IS NOT NULL
12161 AND created_at >= ?2
12162 AND created_at <= ?3",
12163 fparams![conversation_id, min_created_at, max_created_at],
12164 )?);
12165 record_message_lookup_bounded_queries(2, rows.len());
12166 (rows, false)
12167 } else {
12168 let rows = tx.query_params(
12169 "SELECT idx, role, author, created_at, content
12170 FROM messages INDEXED BY sqlite_autoindex_messages_1
12171 WHERE conversation_id = ?1",
12172 fparams![conversation_id],
12173 )?;
12174 record_message_lookup_full_scan_query(rows.len());
12175 (rows, true)
12176 };
12177
12178 let mut by_idx = HashMap::with_capacity(rows.len());
12179 let mut replay = HashSet::with_capacity(rows.len());
12180 for row in rows {
12181 let idx: i64 = row.get_typed(0)?;
12182 let role: String = row.get_typed(1)?;
12183 let author: Option<String> = row.get_typed(2)?;
12184 let created_at: Option<i64> = row.get_typed(3)?;
12185 let content: String = row.get_typed(4)?;
12186 let role = role_from_str(&role);
12187 let content_hash = *blake3::hash(content.as_bytes()).as_bytes();
12188
12189 if idx >= min_idx && idx <= max_idx {
12190 by_idx.insert(
12191 idx,
12192 MessageMergeFingerprint {
12193 idx,
12194 created_at,
12195 role: role.clone(),
12196 author: author.clone(),
12197 content_hash,
12198 },
12199 );
12200 }
12201
12202 let replay_matches = if replay_full_scan {
12203 true
12204 } else if let Some((min_created_at, max_created_at)) = created_bounds {
12205 created_at.is_some_and(|ts| ts >= min_created_at && ts <= max_created_at)
12206 } else {
12207 true
12208 };
12209 if replay_matches {
12210 replay.insert(MessageReplayFingerprint {
12211 created_at,
12212 role,
12213 author,
12214 content_hash,
12215 });
12216 }
12217 }
12218
12219 Ok(ExistingMessageLookup { by_idx, replay })
12220}
12221
12222fn franken_existing_message_lookup_with_pending(
12223 tx: &FrankenTransaction<'_>,
12224 conversation_id: i64,
12225 incoming_messages: &[Message],
12226 pending_message_fingerprints: &mut HashMap<i64, HashMap<i64, MessageMergeFingerprint>>,
12227 pending_message_replay_fingerprints: &mut HashMap<i64, HashSet<MessageReplayFingerprint>>,
12228) -> Result<ExistingMessageLookup> {
12229 if let (Some(by_idx), Some(replay)) = (
12230 pending_message_fingerprints.get(&conversation_id),
12231 pending_message_replay_fingerprints.get(&conversation_id),
12232 ) {
12233 if incoming_messages.iter().all(|msg| {
12234 by_idx.contains_key(&msg.idx) || replay.contains(&message_replay_fingerprint(msg))
12235 }) {
12236 return Ok(ExistingMessageLookup {
12237 by_idx: by_idx.clone(),
12238 replay: replay.clone(),
12239 });
12240 }
12241
12242 let fresh = franken_existing_message_lookup(tx, conversation_id, incoming_messages)?;
12243 let mut merged_by_idx = by_idx.clone();
12244 let mut merged_replay = replay.clone();
12245 merged_by_idx.extend(fresh.by_idx);
12246 merged_replay.extend(fresh.replay);
12247 pending_message_fingerprints.insert(conversation_id, merged_by_idx.clone());
12248 pending_message_replay_fingerprints.insert(conversation_id, merged_replay.clone());
12249 return Ok(ExistingMessageLookup {
12250 by_idx: merged_by_idx,
12251 replay: merged_replay,
12252 });
12253 }
12254
12255 let lookup = franken_existing_message_lookup(tx, conversation_id, incoming_messages)?;
12256 pending_message_fingerprints.insert(conversation_id, lookup.by_idx.clone());
12257 pending_message_replay_fingerprints.insert(conversation_id, lookup.replay.clone());
12258 Ok(lookup)
12259}
12260
12261fn franken_batch_insert_fts(tx: &FrankenTransaction<'_>, entries: &[FtsEntry]) -> Result<usize> {
12263 if entries.is_empty() {
12264 return Ok(0);
12265 }
12266
12267 let mut inserted = 0;
12268
12269 for chunk in entries.chunks(FTS5_BATCH_SIZE) {
12270 let placeholders: String = chunk
12271 .iter()
12272 .enumerate()
12273 .map(|(i, _)| {
12274 let base = i * 7 + 1; format!(
12276 "(?{},?{},?{},?{},?{},?{},?{})",
12277 base,
12278 base + 1,
12279 base + 2,
12280 base + 3,
12281 base + 4,
12282 base + 5,
12283 base + 6
12284 )
12285 })
12286 .collect::<Vec<_>>()
12287 .join(",");
12288
12289 let sql = format!(
12290 "INSERT INTO fts_messages(rowid, content, title, agent, workspace, source_path, created_at) VALUES {placeholders}"
12291 );
12292
12293 let mut param_values: Vec<SqliteValue> = Vec::with_capacity(chunk.len() * 7);
12294 for entry in chunk {
12295 param_values.push(SqliteValue::from(entry.message_id));
12296 param_values.push(SqliteValue::from(entry.content.as_str()));
12297 param_values.push(SqliteValue::from(entry.title.as_str()));
12298 param_values.push(SqliteValue::from(entry.agent.as_str()));
12299 param_values.push(SqliteValue::from(entry.workspace.as_str()));
12300 param_values.push(SqliteValue::from(entry.source_path.as_str()));
12301 param_values.push(SqliteValue::from(entry.created_at));
12302 }
12303
12304 match tx.execute_with_params(&sql, ¶m_values) {
12305 Ok(_) => {
12306 inserted += chunk.len();
12307 }
12308 Err(err) => {
12309 tracing::warn!(
12310 error = %err,
12311 chunk_docs = chunk.len(),
12312 "frankensqlite FTS batch insert failed; skipping db-resident FTS maintenance because Tantivy is authoritative"
12313 );
12314 return Ok(inserted);
12315 }
12316 }
12317 }
12318
12319 Ok(inserted)
12320}
12321
12322fn franken_batch_insert_fts_on_connection(
12323 conn: &FrankenConnection,
12324 entries: &[FtsEntry],
12325) -> Result<usize> {
12326 if entries.is_empty() {
12327 return Ok(0);
12328 }
12329
12330 let mut inserted = 0;
12331
12332 for chunk in entries.chunks(FTS5_BATCH_SIZE) {
12333 let placeholders: String = chunk
12334 .iter()
12335 .enumerate()
12336 .map(|(i, _)| {
12337 let base = i * 7 + 1;
12338 format!(
12339 "(?{},?{},?{},?{},?{},?{},?{})",
12340 base,
12341 base + 1,
12342 base + 2,
12343 base + 3,
12344 base + 4,
12345 base + 5,
12346 base + 6
12347 )
12348 })
12349 .collect::<Vec<_>>()
12350 .join(",");
12351
12352 let sql = format!(
12353 "INSERT INTO fts_messages(rowid, content, title, agent, workspace, source_path, created_at) VALUES {placeholders}"
12354 );
12355
12356 let mut param_values: Vec<SqliteValue> = Vec::with_capacity(chunk.len() * 7);
12357 for entry in chunk {
12358 param_values.push(SqliteValue::from(entry.message_id));
12359 param_values.push(SqliteValue::from(entry.content.as_str()));
12360 param_values.push(SqliteValue::from(entry.title.as_str()));
12361 param_values.push(SqliteValue::from(entry.agent.as_str()));
12362 param_values.push(SqliteValue::from(entry.workspace.as_str()));
12363 param_values.push(SqliteValue::from(entry.source_path.as_str()));
12364 param_values.push(SqliteValue::from(entry.created_at));
12365 }
12366
12367 conn.execute_with_params(&sql, ¶m_values)
12368 .with_context(|| {
12369 format!(
12370 "inserting {} rows into fts_messages during streaming FTS maintenance",
12371 chunk.len()
12372 )
12373 })?;
12374 inserted += chunk.len();
12375 }
12376
12377 Ok(inserted)
12378}
12379
12380fn franken_update_daily_stats_in_tx(
12382 storage: &FrankenStorage,
12383 tx: &FrankenTransaction<'_>,
12384 agent_slug: &str,
12385 source_id: &str,
12386 started_at: Option<i64>,
12387 delta: StatsDelta,
12388) -> Result<()> {
12389 let day_id = started_at
12390 .map(FrankenStorage::day_id_from_millis)
12391 .unwrap_or(0);
12392 let now = FrankenStorage::now_millis();
12393
12394 let targets = [
12395 DailyStatsTarget {
12396 day_id,
12397 agent_slug,
12398 source_id,
12399 },
12400 DailyStatsTarget {
12401 day_id,
12402 agent_slug: "all",
12403 source_id,
12404 },
12405 DailyStatsTarget {
12406 day_id,
12407 agent_slug,
12408 source_id: "all",
12409 },
12410 DailyStatsTarget {
12411 day_id,
12412 agent_slug: "all",
12413 source_id: "all",
12414 },
12415 ];
12416
12417 if agent_slug != "all"
12418 && source_id != "all"
12419 && franken_update_ensured_daily_stats_targets_in_tx(storage, tx, &targets, now, delta)?
12420 {
12421 return Ok(());
12422 }
12423
12424 for target in targets {
12425 franken_apply_daily_stats_delta_in_tx(storage, tx, target, now, delta)?;
12426 }
12427
12428 Ok(())
12429}
12430
12431#[derive(Clone, Copy)]
12432struct DailyStatsTarget<'a> {
12433 day_id: i64,
12434 agent_slug: &'a str,
12435 source_id: &'a str,
12436}
12437
12438fn franken_update_ensured_daily_stats_targets_in_tx(
12439 storage: &FrankenStorage,
12440 tx: &FrankenTransaction<'_>,
12441 targets: &[DailyStatsTarget<'_>; 4],
12442 now: i64,
12443 delta: StatsDelta,
12444) -> Result<bool> {
12445 let cache_keys = targets.map(|target| {
12446 EnsuredDailyStatsKey::new(target.day_id, target.agent_slug, target.source_id)
12447 });
12448 if !storage.daily_stats_keys_already_ensured(&cache_keys) {
12449 return Ok(false);
12450 }
12451
12452 let primary = targets[0];
12453 let rows_changed = tx.execute_compat(
12454 "UPDATE daily_stats
12455 SET session_count = session_count + ?4,
12456 message_count = message_count + ?5,
12457 total_chars = total_chars + ?6,
12458 last_updated = ?7
12459 WHERE day_id = ?1
12460 AND ((agent_slug = ?2 AND source_id = ?3)
12461 OR (agent_slug = 'all' AND source_id = ?3)
12462 OR (agent_slug = ?2 AND source_id = 'all')
12463 OR (agent_slug = 'all' AND source_id = 'all'))",
12464 fparams![
12465 primary.day_id,
12466 primary.agent_slug,
12467 primary.source_id,
12468 delta.session_count_delta,
12469 delta.message_count_delta,
12470 delta.total_chars_delta,
12471 now
12472 ],
12473 )?;
12474 if rows_changed == targets.len() {
12475 return Ok(true);
12476 }
12477
12478 for (target, cache_key) in targets.iter().copied().zip(cache_keys) {
12479 let exists = tx
12480 .query_row_map(
12481 "SELECT 1 FROM daily_stats
12482 WHERE day_id = ?1 AND agent_slug = ?2 AND source_id = ?3
12483 LIMIT 1",
12484 fparams![target.day_id, target.agent_slug, target.source_id],
12485 |row| row.get_typed::<i64>(0),
12486 )
12487 .optional()?
12488 .is_some();
12489 if exists {
12490 continue;
12491 }
12492
12493 tx.execute_compat(
12494 "INSERT INTO daily_stats(day_id, agent_slug, source_id, session_count, message_count, total_chars, last_updated)
12495 VALUES(?1,?2,?3,?4,?5,?6,?7)",
12496 fparams![
12497 target.day_id,
12498 target.agent_slug,
12499 target.source_id,
12500 delta.session_count_delta,
12501 delta.message_count_delta,
12502 delta.total_chars_delta,
12503 now
12504 ],
12505 )?;
12506 storage.mark_daily_stats_key_ensured(cache_key);
12507 }
12508
12509 Ok(true)
12510}
12511
12512fn franken_apply_daily_stats_delta_in_tx(
12513 storage: &FrankenStorage,
12514 tx: &FrankenTransaction<'_>,
12515 target: DailyStatsTarget<'_>,
12516 now: i64,
12517 delta: StatsDelta,
12518) -> Result<()> {
12519 let cache_key = EnsuredDailyStatsKey::new(target.day_id, target.agent_slug, target.source_id);
12520 if storage.daily_stats_key_already_ensured(&cache_key) {
12521 let rows_changed = tx.execute_compat(
12522 "UPDATE daily_stats
12523 SET session_count = session_count + ?4,
12524 message_count = message_count + ?5,
12525 total_chars = total_chars + ?6,
12526 last_updated = ?7
12527 WHERE day_id = ?1 AND agent_slug = ?2 AND source_id = ?3",
12528 fparams![
12529 target.day_id,
12530 target.agent_slug,
12531 target.source_id,
12532 delta.session_count_delta,
12533 delta.message_count_delta,
12534 delta.total_chars_delta,
12535 now
12536 ],
12537 )?;
12538 if rows_changed > 0 {
12539 return Ok(());
12540 }
12541 }
12542
12543 tx.execute_compat(
12544 "INSERT INTO daily_stats(day_id, agent_slug, source_id, session_count, message_count, total_chars, last_updated)
12545 VALUES(?1,?2,?3,?4,?5,?6,?7)
12546 ON CONFLICT(day_id, agent_slug, source_id) DO UPDATE SET
12547 session_count = session_count + excluded.session_count,
12548 message_count = message_count + excluded.message_count,
12549 total_chars = total_chars + excluded.total_chars,
12550 last_updated = excluded.last_updated",
12551 fparams![
12552 target.day_id,
12553 target.agent_slug,
12554 target.source_id,
12555 delta.session_count_delta,
12556 delta.message_count_delta,
12557 delta.total_chars_delta,
12558 now
12559 ],
12560 )?;
12561 storage.mark_daily_stats_key_ensured(cache_key);
12562 Ok(())
12563}
12564
12565fn franken_update_daily_stats_batched_in_tx(
12571 tx: &FrankenTransaction<'_>,
12572 entries: &[(i64, String, String, StatsDelta)],
12573) -> Result<usize> {
12574 if entries.is_empty() {
12575 return Ok(0);
12576 }
12577
12578 let now = FrankenStorage::now_millis();
12579 let mut total_affected = 0;
12580
12581 for (day_id, agent, source, delta) in entries {
12586 total_affected += tx.execute_compat(
12587 "INSERT INTO daily_stats (day_id, agent_slug, source_id, session_count, message_count, total_chars, last_updated)
12588 VALUES(?1,?2,?3,?4,?5,?6,?7)
12589 ON CONFLICT(day_id, agent_slug, source_id) DO UPDATE SET
12590 session_count = session_count + excluded.session_count,
12591 message_count = message_count + excluded.message_count,
12592 total_chars = total_chars + excluded.total_chars,
12593 last_updated = excluded.last_updated",
12594 fparams![
12595 *day_id,
12596 agent.as_str(),
12597 source.as_str(),
12598 delta.session_count_delta,
12599 delta.message_count_delta,
12600 delta.total_chars_delta,
12601 now
12602 ],
12603 )?;
12604 }
12605
12606 Ok(total_affected)
12607}
12608
12609fn franken_insert_token_usage_batched_in_tx(
12615 tx: &FrankenTransaction<'_>,
12616 entries: &[TokenUsageEntry],
12617) -> Result<usize> {
12618 if entries.is_empty() {
12619 return Ok(0);
12620 }
12621
12622 let mut total_inserted = 0;
12623
12624 for e in entries {
12625 let params_vec: Vec<ParamValue> = vec![
12626 ParamValue::from(e.message_id),
12627 ParamValue::from(e.conversation_id),
12628 ParamValue::from(e.agent_id),
12629 ParamValue::from(e.workspace_id),
12630 ParamValue::from(e.source_id.clone()),
12631 ParamValue::from(e.timestamp_ms),
12632 ParamValue::from(e.day_id),
12633 ParamValue::from(e.model_name.clone()),
12634 ParamValue::from(e.model_family.clone()),
12635 ParamValue::from(e.model_tier.clone()),
12636 ParamValue::from(e.service_tier.clone()),
12637 ParamValue::from(e.provider.clone()),
12638 ParamValue::from(e.input_tokens),
12639 ParamValue::from(e.output_tokens),
12640 ParamValue::from(e.cache_read_tokens),
12641 ParamValue::from(e.cache_creation_tokens),
12642 ParamValue::from(e.thinking_tokens),
12643 ParamValue::from(e.total_tokens),
12644 ParamValue::from(e.estimated_cost_usd),
12645 ParamValue::from(e.role.clone()),
12646 ParamValue::from(e.content_chars),
12647 ParamValue::from(e.has_tool_calls as i64),
12648 ParamValue::from(e.tool_call_count as i64),
12649 ParamValue::from(e.data_source.clone()),
12650 ];
12651
12652 let values = param_slice_to_values(¶ms_vec);
12653 total_inserted += tx.execute_with_params(
12654 "INSERT OR IGNORE INTO token_usage (
12655 message_id, conversation_id, agent_id, workspace_id, source_id,
12656 timestamp_ms, day_id,
12657 model_name, model_family, model_tier, service_tier, provider,
12658 input_tokens, output_tokens, cache_read_tokens, cache_creation_tokens,
12659 thinking_tokens, total_tokens, estimated_cost_usd,
12660 role, content_chars, has_tool_calls, tool_call_count, data_source
12661 )
12662 VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19,?20,?21,?22,?23,?24)",
12663 &values,
12664 )?;
12665 }
12666
12667 Ok(total_inserted)
12668}
12669
12670fn franken_update_token_daily_stats_batched_in_tx(
12672 tx: &FrankenTransaction<'_>,
12673 entries: &[(i64, String, String, String, TokenStatsDelta)],
12674) -> Result<usize> {
12675 if entries.is_empty() {
12676 return Ok(0);
12677 }
12678
12679 let now = FrankenStorage::now_millis();
12680 let mut total_affected = 0;
12681
12682 for (day_id, agent, source, model, delta) in entries {
12683 total_affected += tx.execute_compat(
12684 "INSERT INTO token_daily_stats (
12685 day_id, agent_slug, source_id, model_family,
12686 api_call_count, user_message_count, assistant_message_count, tool_message_count,
12687 total_input_tokens, total_output_tokens, total_cache_read_tokens,
12688 total_cache_creation_tokens, total_thinking_tokens, grand_total_tokens,
12689 total_content_chars, total_tool_calls, estimated_cost_usd, session_count,
12690 last_updated
12691 )
12692 VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19)
12693 ON CONFLICT(day_id, agent_slug, source_id, model_family) DO UPDATE SET
12694 api_call_count = api_call_count + excluded.api_call_count,
12695 user_message_count = user_message_count + excluded.user_message_count,
12696 assistant_message_count = assistant_message_count + excluded.assistant_message_count,
12697 tool_message_count = tool_message_count + excluded.tool_message_count,
12698 total_input_tokens = total_input_tokens + excluded.total_input_tokens,
12699 total_output_tokens = total_output_tokens + excluded.total_output_tokens,
12700 total_cache_read_tokens = total_cache_read_tokens + excluded.total_cache_read_tokens,
12701 total_cache_creation_tokens = total_cache_creation_tokens + excluded.total_cache_creation_tokens,
12702 total_thinking_tokens = total_thinking_tokens + excluded.total_thinking_tokens,
12703 grand_total_tokens = grand_total_tokens + excluded.grand_total_tokens,
12704 total_content_chars = total_content_chars + excluded.total_content_chars,
12705 total_tool_calls = total_tool_calls + excluded.total_tool_calls,
12706 estimated_cost_usd = estimated_cost_usd + excluded.estimated_cost_usd,
12707 session_count = session_count + excluded.session_count,
12708 last_updated = excluded.last_updated",
12709 fparams![
12710 *day_id,
12711 agent.as_str(),
12712 source.as_str(),
12713 model.as_str(),
12714 delta.api_call_count,
12715 delta.user_message_count,
12716 delta.assistant_message_count,
12717 delta.tool_message_count,
12718 delta.total_input_tokens,
12719 delta.total_output_tokens,
12720 delta.total_cache_read_tokens,
12721 delta.total_cache_creation_tokens,
12722 delta.total_thinking_tokens,
12723 delta.grand_total_tokens,
12724 delta.total_content_chars,
12725 delta.total_tool_calls,
12726 delta.estimated_cost_usd,
12727 delta.session_count,
12728 now
12729 ],
12730 )?;
12731 }
12732
12733 Ok(total_affected)
12734}
12735
12736fn franken_insert_message_metrics_batched_in_tx(
12742 tx: &FrankenTransaction<'_>,
12743 entries: &[MessageMetricsEntry],
12744) -> Result<usize> {
12745 if entries.is_empty() {
12746 return Ok(0);
12747 }
12748
12749 let mut total_inserted = 0;
12750
12751 for e in entries {
12752 let params_vec: Vec<ParamValue> = vec![
12753 ParamValue::from(e.message_id),
12754 ParamValue::from(e.created_at_ms),
12755 ParamValue::from(e.hour_id),
12756 ParamValue::from(e.day_id),
12757 ParamValue::from(e.agent_slug.clone()),
12758 ParamValue::from(e.workspace_id),
12759 ParamValue::from(e.source_id.clone()),
12760 ParamValue::from(e.role.clone()),
12761 ParamValue::from(e.content_chars),
12762 ParamValue::from(e.content_tokens_est),
12763 ParamValue::from(e.model_name.clone()),
12764 ParamValue::from(e.model_family.clone()),
12765 ParamValue::from(e.model_tier.clone()),
12766 ParamValue::from(e.provider.clone()),
12767 ParamValue::from(e.api_input_tokens),
12768 ParamValue::from(e.api_output_tokens),
12769 ParamValue::from(e.api_cache_read_tokens),
12770 ParamValue::from(e.api_cache_creation_tokens),
12771 ParamValue::from(e.api_thinking_tokens),
12772 ParamValue::from(e.api_service_tier.clone()),
12773 ParamValue::from(e.api_data_source.clone()),
12774 ParamValue::from(e.tool_call_count),
12775 ParamValue::from(e.has_tool_calls as i64),
12776 ParamValue::from(e.has_plan as i64),
12777 ];
12778
12779 let values = param_slice_to_values(¶ms_vec);
12780 total_inserted += tx.execute_with_params(
12781 "INSERT OR IGNORE INTO message_metrics (
12782 message_id, created_at_ms, hour_id, day_id,
12783 agent_slug, workspace_id, source_id, role,
12784 content_chars, content_tokens_est,
12785 model_name, model_family, model_tier, provider,
12786 api_input_tokens, api_output_tokens, api_cache_read_tokens,
12787 api_cache_creation_tokens, api_thinking_tokens,
12788 api_service_tier, api_data_source,
12789 tool_call_count, has_tool_calls, has_plan
12790 )
12791 VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19,?20,?21,?22,?23,?24)",
12792 &values,
12793 )?;
12794 }
12795
12796 Ok(total_inserted)
12797}
12798
12799fn franken_flush_rollup_table(
12801 tx: &FrankenTransaction<'_>,
12802 table: &str,
12803 bucket_col: &str,
12804 deltas: &HashMap<(i64, String, i64, String), UsageRollupDelta>,
12805 now: i64,
12806) -> Result<usize> {
12807 if deltas.is_empty() {
12808 return Ok(0);
12809 }
12810
12811 let mut total_affected = 0;
12812
12813 for ((bucket_id, agent, workspace_id, source), d) in deltas {
12814 let sql = format!(
12815 "INSERT INTO {table} (
12816 {bucket_col}, agent_slug, workspace_id, source_id,
12817 message_count, user_message_count, assistant_message_count,
12818 tool_call_count, plan_message_count, plan_content_tokens_est_total,
12819 plan_api_tokens_total, api_coverage_message_count,
12820 content_tokens_est_total, content_tokens_est_user, content_tokens_est_assistant,
12821 api_tokens_total, api_input_tokens_total, api_output_tokens_total,
12822 api_cache_read_tokens_total, api_cache_creation_tokens_total,
12823 api_thinking_tokens_total, last_updated
12824 )
12825 VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19,?20,?21,?22)
12826 ON CONFLICT({bucket_col}, agent_slug, workspace_id, source_id) DO UPDATE SET
12827 message_count = message_count + excluded.message_count,
12828 user_message_count = user_message_count + excluded.user_message_count,
12829 assistant_message_count = assistant_message_count + excluded.assistant_message_count,
12830 tool_call_count = tool_call_count + excluded.tool_call_count,
12831 plan_message_count = plan_message_count + excluded.plan_message_count,
12832 plan_content_tokens_est_total = plan_content_tokens_est_total + excluded.plan_content_tokens_est_total,
12833 plan_api_tokens_total = plan_api_tokens_total + excluded.plan_api_tokens_total,
12834 api_coverage_message_count = api_coverage_message_count + excluded.api_coverage_message_count,
12835 content_tokens_est_total = content_tokens_est_total + excluded.content_tokens_est_total,
12836 content_tokens_est_user = content_tokens_est_user + excluded.content_tokens_est_user,
12837 content_tokens_est_assistant = content_tokens_est_assistant + excluded.content_tokens_est_assistant,
12838 api_tokens_total = api_tokens_total + excluded.api_tokens_total,
12839 api_input_tokens_total = api_input_tokens_total + excluded.api_input_tokens_total,
12840 api_output_tokens_total = api_output_tokens_total + excluded.api_output_tokens_total,
12841 api_cache_read_tokens_total = api_cache_read_tokens_total + excluded.api_cache_read_tokens_total,
12842 api_cache_creation_tokens_total = api_cache_creation_tokens_total + excluded.api_cache_creation_tokens_total,
12843 api_thinking_tokens_total = api_thinking_tokens_total + excluded.api_thinking_tokens_total,
12844 last_updated = excluded.last_updated"
12845 );
12846
12847 total_affected += tx.execute_compat(
12848 &sql,
12849 fparams![
12850 *bucket_id,
12851 agent.as_str(),
12852 *workspace_id,
12853 source.as_str(),
12854 d.message_count,
12855 d.user_message_count,
12856 d.assistant_message_count,
12857 d.tool_call_count,
12858 d.plan_message_count,
12859 d.plan_content_tokens_est_total,
12860 d.plan_api_tokens_total,
12861 d.api_coverage_message_count,
12862 d.content_tokens_est_total,
12863 d.content_tokens_est_user,
12864 d.content_tokens_est_assistant,
12865 d.api_tokens_total,
12866 d.api_input_tokens_total,
12867 d.api_output_tokens_total,
12868 d.api_cache_read_tokens_total,
12869 d.api_cache_creation_tokens_total,
12870 d.api_thinking_tokens_total,
12871 now
12872 ],
12873 )?;
12874 }
12875
12876 Ok(total_affected)
12877}
12878
12879fn franken_flush_model_daily_rollup_table(
12881 tx: &FrankenTransaction<'_>,
12882 deltas: &HashMap<(i64, String, i64, String, String, String), UsageRollupDelta>,
12883 now: i64,
12884) -> Result<usize> {
12885 if deltas.is_empty() {
12886 return Ok(0);
12887 }
12888
12889 let mut total_affected = 0;
12890
12891 for ((day_id, agent, workspace_id, source, model_family, model_tier), d) in deltas {
12892 total_affected += tx.execute_compat(
12893 "INSERT INTO usage_models_daily (
12894 day_id, agent_slug, workspace_id, source_id, model_family, model_tier,
12895 message_count, user_message_count, assistant_message_count,
12896 tool_call_count, plan_message_count, api_coverage_message_count,
12897 content_tokens_est_total, content_tokens_est_user, content_tokens_est_assistant,
12898 api_tokens_total, api_input_tokens_total, api_output_tokens_total,
12899 api_cache_read_tokens_total, api_cache_creation_tokens_total,
12900 api_thinking_tokens_total, last_updated
12901 )
12902 VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19,?20,?21,?22)
12903 ON CONFLICT(day_id, agent_slug, workspace_id, source_id, model_family, model_tier) DO UPDATE SET
12904 message_count = message_count + excluded.message_count,
12905 user_message_count = user_message_count + excluded.user_message_count,
12906 assistant_message_count = assistant_message_count + excluded.assistant_message_count,
12907 tool_call_count = tool_call_count + excluded.tool_call_count,
12908 plan_message_count = plan_message_count + excluded.plan_message_count,
12909 api_coverage_message_count = api_coverage_message_count + excluded.api_coverage_message_count,
12910 content_tokens_est_total = content_tokens_est_total + excluded.content_tokens_est_total,
12911 content_tokens_est_user = content_tokens_est_user + excluded.content_tokens_est_user,
12912 content_tokens_est_assistant = content_tokens_est_assistant + excluded.content_tokens_est_assistant,
12913 api_tokens_total = api_tokens_total + excluded.api_tokens_total,
12914 api_input_tokens_total = api_input_tokens_total + excluded.api_input_tokens_total,
12915 api_output_tokens_total = api_output_tokens_total + excluded.api_output_tokens_total,
12916 api_cache_read_tokens_total = api_cache_read_tokens_total + excluded.api_cache_read_tokens_total,
12917 api_cache_creation_tokens_total = api_cache_creation_tokens_total + excluded.api_cache_creation_tokens_total,
12918 api_thinking_tokens_total = api_thinking_tokens_total + excluded.api_thinking_tokens_total,
12919 last_updated = excluded.last_updated",
12920 fparams![
12921 *day_id,
12922 agent.as_str(),
12923 *workspace_id,
12924 source.as_str(),
12925 model_family.as_str(),
12926 model_tier.as_str(),
12927 d.message_count,
12928 d.user_message_count,
12929 d.assistant_message_count,
12930 d.tool_call_count,
12931 d.plan_message_count,
12932 d.api_coverage_message_count,
12933 d.content_tokens_est_total,
12934 d.content_tokens_est_user,
12935 d.content_tokens_est_assistant,
12936 d.api_tokens_total,
12937 d.api_input_tokens_total,
12938 d.api_output_tokens_total,
12939 d.api_cache_read_tokens_total,
12940 d.api_cache_creation_tokens_total,
12941 d.api_thinking_tokens_total,
12942 now
12943 ],
12944 )?;
12945 }
12946
12947 Ok(total_affected)
12948}
12949
12950fn franken_flush_analytics_rollups_in_tx(
12952 tx: &FrankenTransaction<'_>,
12953 agg: &AnalyticsRollupAggregator,
12954) -> Result<(usize, usize, usize)> {
12955 let now = FrankenStorage::now_millis();
12956
12957 let hourly_affected =
12958 franken_flush_rollup_table(tx, "usage_hourly", "hour_id", &agg.hourly, now)?;
12959 let daily_affected = franken_flush_rollup_table(tx, "usage_daily", "day_id", &agg.daily, now)?;
12960 let models_daily_affected = franken_flush_model_daily_rollup_table(tx, &agg.models_daily, now)?;
12961
12962 Ok((hourly_affected, daily_affected, models_daily_affected))
12963}
12964
12965fn franken_update_conversation_token_summaries_in_tx(
12967 tx: &FrankenTransaction<'_>,
12968 conversation_id: i64,
12969) -> Result<()> {
12970 tx.execute_compat(
12971 "UPDATE conversations SET
12972 total_input_tokens = (SELECT SUM(input_tokens) FROM token_usage WHERE conversation_id = ?1),
12973 total_output_tokens = (SELECT SUM(output_tokens) FROM token_usage WHERE conversation_id = ?1),
12974 total_cache_read_tokens = (SELECT SUM(cache_read_tokens) FROM token_usage WHERE conversation_id = ?1),
12975 total_cache_creation_tokens = (SELECT SUM(cache_creation_tokens) FROM token_usage WHERE conversation_id = ?1),
12976 grand_total_tokens = (SELECT SUM(total_tokens) FROM token_usage WHERE conversation_id = ?1),
12977 estimated_cost_usd = (SELECT SUM(estimated_cost_usd) FROM token_usage WHERE conversation_id = ?1),
12978 primary_model = (SELECT model_name FROM token_usage WHERE conversation_id = ?1
12979 AND model_name IS NOT NULL
12980 GROUP BY model_name ORDER BY COUNT(*) DESC LIMIT 1),
12981 api_call_count = (SELECT COUNT(*) FROM token_usage WHERE conversation_id = ?1
12982 AND data_source = 'api'),
12983 tool_call_count = (SELECT SUM(tool_call_count) FROM token_usage WHERE conversation_id = ?1),
12984 user_message_count = (SELECT COUNT(*) FROM token_usage WHERE conversation_id = ?1
12985 AND role = 'user'),
12986 assistant_message_count = (SELECT COUNT(*) FROM token_usage WHERE conversation_id = ?1
12987 AND role IN ('assistant', 'agent'))
12988 WHERE id = ?1",
12989 fparams![conversation_id],
12990 )?;
12991 Ok(())
12992}
12993
12994impl FrankenStorage {
12995 pub fn rebuild_token_daily_stats(&self) -> Result<usize> {
12997 const CONVERSATION_BATCH_SIZE: usize = 1_000;
12998 const TOKEN_USAGE_BATCH_SIZE: usize = 10_000;
12999
13000 let total_usage_rows: i64 =
13001 self.conn
13002 .query_row_map("SELECT COUNT(*) FROM token_usage", fparams![], |row| {
13003 row.get_typed(0)
13004 })?;
13005 tracing::info!(
13006 target: "cass::analytics",
13007 total_usage_rows,
13008 "token_daily_stats_rebuild_start"
13009 );
13010
13011 let mut tx = self.conn.transaction()?;
13012 tx.execute("DELETE FROM token_daily_stats")?;
13013
13014 let mut last_conversation_id = 0_i64;
13015 let mut rows_created = 0_usize;
13016
13017 loop {
13018 let conversation_rows = tx.query_map_collect(
13019 "SELECT c.id, c.started_at, c.source_id,
13020 COALESCE((SELECT a.slug FROM agents a WHERE a.id = c.agent_id), 'unknown')
13021 FROM conversations c
13022 WHERE c.id > ?1
13023 ORDER BY c.id
13024 LIMIT ?2",
13025 fparams![last_conversation_id, CONVERSATION_BATCH_SIZE as i64],
13026 |row| {
13027 Ok((
13028 row.get_typed::<i64>(0)?,
13029 row.get_typed::<Option<i64>>(1)?,
13030 row.get_typed::<String>(2)?,
13031 row.get_typed::<String>(3)?,
13032 ))
13033 },
13034 )?;
13035 if conversation_rows.is_empty() {
13036 break;
13037 }
13038
13039 let mut aggregate = TokenStatsAggregator::new();
13040
13041 for (conversation_id, started_at, source_id, agent_slug) in conversation_rows {
13042 last_conversation_id = conversation_id;
13043 let conversation_day_id = started_at.map(Self::day_id_from_millis).unwrap_or(0);
13044 let mut last_token_usage_id = 0_i64;
13045 let mut session_model_family = String::from("unknown");
13046
13047 loop {
13048 let usage_rows = tx.query_map_collect(
13049 "SELECT id, day_id, role,
13050 COALESCE(model_family, 'unknown'),
13051 input_tokens, output_tokens, cache_read_tokens,
13052 cache_creation_tokens, thinking_tokens,
13053 has_tool_calls, tool_call_count,
13054 content_chars, estimated_cost_usd
13055 FROM token_usage
13056 WHERE conversation_id = ?1
13057 AND id > ?2
13058 ORDER BY id
13059 LIMIT ?3",
13060 fparams![
13061 conversation_id,
13062 last_token_usage_id,
13063 TOKEN_USAGE_BATCH_SIZE as i64
13064 ],
13065 |row| {
13066 Ok((
13067 row.get_typed::<i64>(0)?,
13068 row.get_typed::<i64>(1)?,
13069 row.get_typed::<String>(2)?,
13070 row.get_typed::<String>(3)?,
13071 row.get_typed::<Option<i64>>(4)?,
13072 row.get_typed::<Option<i64>>(5)?,
13073 row.get_typed::<Option<i64>>(6)?,
13074 row.get_typed::<Option<i64>>(7)?,
13075 row.get_typed::<Option<i64>>(8)?,
13076 row.get_typed::<i64>(9)?,
13077 row.get_typed::<i64>(10)?,
13078 row.get_typed::<i64>(11)?,
13079 row.get_typed::<Option<f64>>(12)?,
13080 ))
13081 },
13082 )?;
13083 if usage_rows.is_empty() {
13084 break;
13085 }
13086
13087 for (
13088 token_usage_id,
13089 day_id,
13090 role,
13091 model_family,
13092 input_tokens,
13093 output_tokens,
13094 cache_read_tokens,
13095 cache_creation_tokens,
13096 thinking_tokens,
13097 has_tool_calls,
13098 tool_call_count,
13099 content_chars,
13100 estimated_cost_usd,
13101 ) in usage_rows
13102 {
13103 last_token_usage_id = token_usage_id;
13104 if model_family != "unknown" {
13105 session_model_family = model_family.clone();
13106 }
13107 let usage = crate::connectors::ExtractedTokenUsage {
13108 model_name: None,
13109 provider: None,
13110 input_tokens,
13111 output_tokens,
13112 cache_read_tokens,
13113 cache_creation_tokens,
13114 thinking_tokens,
13115 service_tier: None,
13116 has_tool_calls: has_tool_calls != 0,
13117 tool_call_count: u32::try_from(tool_call_count.max(0)).unwrap_or(0),
13118 data_source: franken_agent_detection::TokenDataSource::Api,
13119 };
13120 aggregate.record(
13121 &agent_slug,
13122 &source_id,
13123 day_id,
13124 &model_family,
13125 &role,
13126 &usage,
13127 content_chars,
13128 estimated_cost_usd.unwrap_or(0.0),
13129 );
13130 }
13131 }
13132
13133 aggregate.record_session(
13134 &agent_slug,
13135 &source_id,
13136 conversation_day_id,
13137 &session_model_family,
13138 );
13139 }
13140
13141 let entries = aggregate.expand();
13142 rows_created = rows_created.saturating_add(entries.len());
13143 franken_update_token_daily_stats_batched_in_tx(&tx, &entries)?;
13144 }
13145
13146 tx.commit()?;
13147
13148 tracing::info!(
13149 target: "cass::analytics",
13150 rows_created,
13151 "token_daily_stats_rebuild_complete"
13152 );
13153
13154 Ok(rows_created)
13155 }
13156
13157 pub fn rebuild_analytics(&self) -> Result<AnalyticsRebuildResult> {
13160 let start = Instant::now();
13161
13162 let total_messages: i64 =
13163 self.conn
13164 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
13165 row.get_typed(0)
13166 })?;
13167 tracing::info!(
13168 target: "cass::analytics",
13169 total_messages,
13170 "analytics_rebuild_start"
13171 );
13172
13173 let mut tx = self.conn.transaction()?;
13174
13175 tx.execute("DELETE FROM message_metrics")?;
13176 tx.execute("DELETE FROM usage_hourly")?;
13177 tx.execute("DELETE FROM usage_daily")?;
13178 tx.execute("DELETE FROM usage_models_daily")?;
13179
13180 const CHUNK_SIZE: i64 = 10_000;
13181 let mut offset: i64 = 0;
13182 let mut total_inserted: usize = 0;
13183 let mut usage_hourly_rows: usize = 0;
13184 let mut usage_daily_rows: usize = 0;
13185 let mut usage_models_daily_rows: usize = 0;
13186
13187 loop {
13188 #[allow(clippy::type_complexity)]
13189 let rows: Vec<(
13190 i64,
13191 String,
13192 String,
13193 Option<serde_json::Value>,
13194 Option<i64>,
13195 Option<i64>,
13196 String,
13197 Option<i64>,
13198 String,
13199 )> = tx.query_map_collect(
13200 "SELECT m.id, m.idx, m.role, m.content, m.extra_json, m.extra_bin,
13206 m.created_at,
13207 c.id AS conv_id, c.started_at AS conv_started_at,
13208 c.source_id, c.workspace_id,
13209 COALESCE((SELECT a.slug FROM agents a WHERE a.id = c.agent_id), 'unknown') AS agent_slug
13210 FROM messages m
13211 JOIN conversations c ON m.conversation_id = c.id
13212 ORDER BY m.id
13213 LIMIT ?1 OFFSET ?2",
13214 fparams![CHUNK_SIZE, offset],
13215 |row| {
13216 let msg_id: i64 = row.get_typed(0)?;
13217 let role: String = row.get_typed(2)?;
13218 let content: String = row.get_typed(3)?;
13219 let extra_json = row
13220 .get_typed::<Option<String>>(4)?
13221 .and_then(|s| serde_json::from_str(&s).ok())
13222 .or_else(|| {
13223 row.get_typed::<Option<Vec<u8>>>(5)
13224 .ok()
13225 .flatten()
13226 .and_then(|b| rmp_serde::from_slice(&b).ok())
13227 });
13228 let msg_ts: Option<i64> = row.get_typed(6)?;
13229 let conv_started_at: Option<i64> = row.get_typed(8)?;
13230 let source_id: String = row.get_typed(9)?;
13231 let workspace_id: Option<i64> = row.get_typed(10)?;
13232 let agent_slug: String = row.get_typed(11)?;
13233 let effective_ts = msg_ts.or(conv_started_at).unwrap_or(0);
13234
13235 Ok((
13236 msg_id,
13237 role,
13238 content,
13239 extra_json,
13240 Some(effective_ts),
13241 workspace_id,
13242 source_id,
13243 conv_started_at,
13244 agent_slug,
13245 ))
13246 },
13247 )?;
13248
13249 if rows.is_empty() {
13250 break;
13251 }
13252
13253 let chunk_len = rows.len();
13254 let mut entries = Vec::with_capacity(chunk_len);
13255 let mut rollup_agg = AnalyticsRollupAggregator::new();
13256
13257 for (
13258 msg_id,
13259 role,
13260 content,
13261 extra_json,
13262 effective_ts,
13263 workspace_id,
13264 source_id,
13265 _conv_started_at,
13266 agent_slug,
13267 ) in &rows
13268 {
13269 let ts = effective_ts.unwrap_or(0);
13270 let day_id = Self::day_id_from_millis(ts);
13271 let hour_id = Self::hour_id_from_millis(ts);
13272 let content_chars = content.len() as i64;
13273 let content_tokens_est = content_chars / 4;
13274 let extra = extra_json
13275 .as_ref()
13276 .cloned()
13277 .unwrap_or(serde_json::Value::Null);
13278 let usage =
13279 crate::connectors::extract_tokens_for_agent(agent_slug, &extra, content, role);
13280 let model_info = usage
13281 .model_name
13282 .as_deref()
13283 .map(crate::connectors::normalize_model);
13284 let model_family = model_info
13285 .as_ref()
13286 .map(|i| i.family.clone())
13287 .unwrap_or_else(|| "unknown".into());
13288 let model_tier = model_info
13289 .as_ref()
13290 .map(|i| i.tier.clone())
13291 .unwrap_or_else(|| "unknown".into());
13292 let provider = usage
13293 .provider
13294 .clone()
13295 .or_else(|| model_info.as_ref().map(|i| i.provider.clone()))
13296 .unwrap_or_else(|| "unknown".into());
13297
13298 let entry = MessageMetricsEntry {
13299 message_id: *msg_id,
13300 created_at_ms: ts,
13301 hour_id,
13302 day_id,
13303 agent_slug: agent_slug.clone(),
13304 workspace_id: workspace_id.unwrap_or(0),
13305 source_id: source_id.clone(),
13306 role: role.clone(),
13307 content_chars,
13308 content_tokens_est,
13309 model_name: usage.model_name.clone(),
13310 model_family,
13311 model_tier,
13312 provider,
13313 api_input_tokens: usage.input_tokens,
13314 api_output_tokens: usage.output_tokens,
13315 api_cache_read_tokens: usage.cache_read_tokens,
13316 api_cache_creation_tokens: usage.cache_creation_tokens,
13317 api_thinking_tokens: usage.thinking_tokens,
13318 api_service_tier: usage.service_tier,
13319 api_data_source: usage.data_source.as_str().to_string(),
13320 tool_call_count: usage.tool_call_count as i64,
13321 has_tool_calls: usage.has_tool_calls,
13322 has_plan: has_plan_for_role(role, content),
13323 };
13324 rollup_agg.record(&entry);
13325 entries.push(entry);
13326 }
13327
13328 total_inserted += franken_insert_message_metrics_batched_in_tx(&tx, &entries)?;
13329 let (hourly, daily, models_daily) =
13330 franken_flush_analytics_rollups_in_tx(&tx, &rollup_agg)?;
13331 usage_hourly_rows += hourly;
13332 usage_daily_rows += daily;
13333 usage_models_daily_rows += models_daily;
13334 offset += chunk_len as i64;
13335
13336 tracing::debug!(
13337 target: "cass::analytics",
13338 offset,
13339 chunk = chunk_len,
13340 inserted = entries.len(),
13341 total = total_inserted,
13342 "analytics_rebuild_chunk"
13343 );
13344
13345 if (chunk_len as i64) < CHUNK_SIZE {
13346 break;
13347 }
13348 }
13349
13350 tx.commit()?;
13351
13352 let elapsed = start.elapsed();
13353 let elapsed_ms = elapsed.as_millis() as u64;
13354 let msgs_per_sec = if elapsed_ms > 0 {
13355 (total_inserted as f64) / (elapsed_ms as f64 / 1000.0)
13356 } else {
13357 0.0
13358 };
13359
13360 tracing::info!(
13361 target: "cass::analytics",
13362 message_metrics_rows = total_inserted,
13363 usage_hourly_rows,
13364 usage_daily_rows,
13365 usage_models_daily_rows,
13366 elapsed_ms,
13367 messages_per_sec = format!("{:.0}", msgs_per_sec),
13368 "analytics_rebuild_complete"
13369 );
13370
13371 Ok(AnalyticsRebuildResult {
13372 message_metrics_rows: total_inserted,
13373 usage_hourly_rows,
13374 usage_daily_rows,
13375 usage_models_daily_rows,
13376 elapsed_ms,
13377 messages_per_sec: msgs_per_sec,
13378 })
13379 }
13380
13381 pub fn rebuild_daily_stats(&self) -> Result<DailyStatsRebuildResult> {
13383 const DAILY_STATS_REBUILD_CONVERSATION_BATCH_SIZE: usize = 1_000;
13384 const DAILY_STATS_REBUILD_MESSAGE_BATCH_SIZE: usize = 10_000;
13385
13386 let mut conversation_batch_size = rebuild_batch_size_env(
13387 "CASS_DAILY_STATS_REBUILD_CONVERSATION_BATCH_SIZE",
13388 DAILY_STATS_REBUILD_CONVERSATION_BATCH_SIZE,
13389 );
13390 let mut message_batch_size = rebuild_batch_size_env(
13391 "CASS_DAILY_STATS_REBUILD_MESSAGE_BATCH_SIZE",
13392 DAILY_STATS_REBUILD_MESSAGE_BATCH_SIZE,
13393 );
13394
13395 let total_messages: i64 =
13396 self.conn
13397 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
13398 row.get_typed(0)
13399 })?;
13400 let message_metrics_rows: i64 =
13401 self.conn
13402 .query_row_map("SELECT COUNT(*) FROM message_metrics", fparams![], |row| {
13403 row.get_typed(0)
13404 })?;
13405 let use_message_metrics = total_messages > 0 && total_messages == message_metrics_rows;
13406
13407 tracing::info!(
13408 target: "cass::perf::daily_stats",
13409 total_messages,
13410 message_metrics_rows,
13411 use_message_metrics,
13412 "daily_stats rebuild selected message source"
13413 );
13414
13415 let mut tx = self.conn.transaction()?;
13416 tx.execute("DELETE FROM daily_stats")?;
13417
13418 let mut last_conversation_id = 0_i64;
13419 let mut conversation_batch_count = 0_usize;
13420 let mut conversations_processed = 0_usize;
13421 let mut messages_processed = 0_usize;
13422 let mut message_batch_count = 0_usize;
13423 let mut raw_entries_flushed = 0_usize;
13424 let mut expanded_entries_flushed = 0_usize;
13425 let message_scan_sql = if use_message_metrics {
13426 "SELECT m.idx, mm.content_chars
13427 FROM messages m
13428 JOIN message_metrics mm ON mm.message_id = m.id
13429 WHERE m.conversation_id = ?1
13430 AND m.idx > ?2
13431 ORDER BY m.conversation_id, m.idx
13432 LIMIT ?3"
13433 } else {
13434 "SELECT m.idx, COALESCE(LENGTH(CAST(m.content AS BLOB)), 0)
13435 FROM messages m
13436 WHERE m.conversation_id = ?1
13437 AND m.idx > ?2
13438 ORDER BY m.conversation_id, m.idx
13439 LIMIT ?3"
13440 };
13441
13442 loop {
13443 let conversation_rows = match self.conn.query_with_params(
13449 "SELECT c.id, c.started_at,
13450 COALESCE((SELECT a.slug FROM agents a WHERE a.id = c.agent_id), 'unknown'),
13451 c.source_id
13452 FROM conversations c
13453 WHERE c.id > ?1
13454 ORDER BY c.id
13455 LIMIT ?2",
13456 ¶ms_from_iter([
13457 ParamValue::from(last_conversation_id),
13458 ParamValue::from(conversation_batch_size as i64),
13459 ]),
13460 ) {
13461 Ok(rows) => rows,
13462 Err(err) if is_out_of_memory_error(&err) && conversation_batch_size > 1 => {
13463 let previous_batch_size = conversation_batch_size;
13464 conversation_batch_size = (conversation_batch_size / 2).max(1);
13465 tracing::warn!(
13466 previous_batch_size,
13467 conversation_batch_size,
13468 last_conversation_id,
13469 "daily_stats conversation scan ran out of memory; retrying with smaller batch"
13470 );
13471 continue;
13472 }
13473 Err(err) => return Err(err.into()),
13474 };
13475 if conversation_rows.is_empty() {
13476 break;
13477 }
13478
13479 let mut aggregate = StatsAggregator::new();
13480 let mut conversation_batch_meta: Vec<(i64, i64, String, String)> =
13481 Vec::with_capacity(conversation_rows.len());
13482 for row in &conversation_rows {
13483 let conversation_id: i64 = row.get_typed(0)?;
13484 let started_at: Option<i64> = row.get_typed(1)?;
13485 let agent_slug: String = row.get_typed(2)?;
13486 let source_id: String = row.get_typed(3)?;
13487 last_conversation_id = conversation_id;
13488 let day_id = started_at.map(Self::day_id_from_millis).unwrap_or(0);
13489 aggregate.record_delta(&agent_slug, &source_id, day_id, 1, 0, 0);
13490 conversation_batch_meta.push((conversation_id, day_id, agent_slug, source_id));
13491 conversations_processed += 1;
13492 }
13493
13494 conversation_batch_count += 1;
13495 raw_entries_flushed += aggregate.raw_entry_count();
13496 let entries = aggregate.expand();
13497 expanded_entries_flushed += entries.len();
13498 if !entries.is_empty() {
13499 franken_update_daily_stats_batched_in_tx(&tx, &entries)?;
13500 }
13501 if conversation_batch_count.is_multiple_of(25) {
13502 tracing::info!(
13503 target: "cass::perf::daily_stats",
13504 conversations_processed,
13505 batches = conversation_batch_count,
13506 batch_size = conversation_batch_size,
13507 last_conversation_id,
13508 "daily_stats rebuild conversation scan progress"
13509 );
13510 }
13511 if conversation_batch_meta.is_empty() {
13512 continue;
13513 }
13514
13515 for (conversation_id, day_id, agent_slug, source_id) in conversation_batch_meta {
13516 let mut cursor_message_idx = -1_i64;
13517 loop {
13518 let message_rows = match self.conn.query_with_params(
13519 message_scan_sql,
13520 ¶ms_from_iter([
13521 ParamValue::from(conversation_id),
13522 ParamValue::from(cursor_message_idx),
13523 ParamValue::from(message_batch_size as i64),
13524 ]),
13525 ) {
13526 Ok(rows) => rows,
13527 Err(err) if is_out_of_memory_error(&err) && message_batch_size > 1 => {
13528 let previous_batch_size = message_batch_size;
13529 message_batch_size = (message_batch_size / 2).max(1);
13530 tracing::warn!(
13531 previous_batch_size,
13532 message_batch_size,
13533 conversation_id,
13534 cursor_message_idx,
13535 "daily_stats message scan ran out of memory; retrying with smaller batch"
13536 );
13537 continue;
13538 }
13539 Err(err) => return Err(err.into()),
13540 };
13541 if message_rows.is_empty() {
13542 break;
13543 }
13544
13545 let mut aggregate = StatsAggregator::new();
13546 for row in &message_rows {
13547 let message_idx: i64 = row.get_typed(0)?;
13548 let content_len: i64 = row.get_typed(1)?;
13549 cursor_message_idx = message_idx;
13550 aggregate.record_delta(&agent_slug, &source_id, day_id, 0, 1, content_len);
13551 messages_processed += 1;
13552 }
13553
13554 message_batch_count += 1;
13555 raw_entries_flushed += aggregate.raw_entry_count();
13556 let entries = aggregate.expand();
13557 expanded_entries_flushed += entries.len();
13558 if !entries.is_empty() {
13559 franken_update_daily_stats_batched_in_tx(&tx, &entries)?;
13560 }
13561 if message_batch_count.is_multiple_of(50) {
13562 tracing::info!(
13563 target: "cass::perf::daily_stats",
13564 messages_processed,
13565 batches = message_batch_count,
13566 batch_size = message_batch_size,
13567 source = if use_message_metrics {
13568 "message_metrics"
13569 } else {
13570 "messages"
13571 },
13572 conversation_id,
13573 cursor_message_idx,
13574 "daily_stats rebuild message scan progress"
13575 );
13576 }
13577 }
13578 }
13579 }
13580
13581 let rows_created: i64 =
13582 tx.query_row_map("SELECT COUNT(*) FROM daily_stats", fparams![], |row| {
13583 row.get_typed(0)
13584 })?;
13585 let total_sessions: i64 = tx.query_row_map(
13586 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats WHERE agent_slug = 'all' AND source_id = 'all'",
13587 fparams![],
13588 |row| row.get_typed(0),
13589 )?;
13590
13591 tx.commit()?;
13592
13593 tracing::info!(
13594 target: "cass::perf::daily_stats",
13595 rows_created,
13596 total_sessions,
13597 conversations_processed,
13598 conversation_batches = conversation_batch_count,
13599 conversation_batch_size,
13600 message_batches = message_batch_count,
13601 message_batch_size,
13602 messages_processed,
13603 use_message_metrics,
13604 raw_entries_flushed,
13605 expanded_entries_flushed,
13606 "Daily stats rebuilt from conversations"
13607 );
13608
13609 Ok(DailyStatsRebuildResult {
13610 rows_created,
13611 total_sessions,
13612 })
13613 }
13614}
13615
13616#[derive(Debug, Default)]
13643pub struct IndexingCache {
13644 agent_ids: HashMap<String, i64>,
13645 workspace_ids: HashMap<PathBuf, i64>,
13646 hits: u64,
13647 misses: u64,
13648}
13649
13650pub trait IndexingCacheStorage {
13651 fn ensure_indexing_agent(&self, agent: &Agent) -> Result<i64>;
13652 fn ensure_indexing_workspace(&self, path: &Path, display_name: Option<&str>) -> Result<i64>;
13653}
13654
13655impl IndexingCacheStorage for FrankenStorage {
13656 fn ensure_indexing_agent(&self, agent: &Agent) -> Result<i64> {
13657 self.ensure_agent(agent)
13658 }
13659
13660 fn ensure_indexing_workspace(&self, path: &Path, display_name: Option<&str>) -> Result<i64> {
13661 self.ensure_workspace(path, display_name)
13662 }
13663}
13664
13665impl IndexingCache {
13668 pub fn new() -> Self {
13670 Self {
13671 agent_ids: HashMap::new(),
13672 workspace_ids: HashMap::new(),
13673 hits: 0,
13674 misses: 0,
13675 }
13676 }
13677
13678 pub fn is_enabled() -> bool {
13681 dotenvy::var("CASS_SQLITE_CACHE")
13682 .map(|v| v != "0" && v.to_lowercase() != "false")
13683 .unwrap_or(true)
13684 }
13685
13686 pub fn get_or_insert_agent<S>(&mut self, storage: &S, agent: &Agent) -> Result<i64>
13691 where
13692 S: IndexingCacheStorage + ?Sized,
13693 {
13694 if let Some(&cached) = self.agent_ids.get(&agent.slug) {
13695 self.hits += 1;
13696 return Ok(cached);
13697 }
13698
13699 self.misses += 1;
13700 let id = storage.ensure_indexing_agent(agent)?;
13701 self.agent_ids.insert(agent.slug.clone(), id);
13702 Ok(id)
13703 }
13704
13705 pub fn get_or_insert_workspace(
13710 &mut self,
13711 storage: &(impl IndexingCacheStorage + ?Sized),
13712 path: &Path,
13713 display_name: Option<&str>,
13714 ) -> Result<i64> {
13715 if let Some(&cached) = self.workspace_ids.get(path) {
13716 self.hits += 1;
13717 return Ok(cached);
13718 }
13719
13720 self.misses += 1;
13721 let id = storage.ensure_indexing_workspace(path, display_name)?;
13722 self.workspace_ids.insert(path.to_path_buf(), id);
13723 Ok(id)
13724 }
13725
13726 pub fn stats(&self) -> (u64, u64, f64) {
13728 let total = self.hits + self.misses;
13729 let hit_rate = if total > 0 {
13730 self.hits as f64 / total as f64
13731 } else {
13732 0.0
13733 };
13734 (self.hits, self.misses, hit_rate)
13735 }
13736
13737 pub fn clear(&mut self) {
13739 self.agent_ids.clear();
13740 self.workspace_ids.clear();
13741 self.hits = 0;
13742 self.misses = 0;
13743 }
13744
13745 pub fn agent_count(&self) -> usize {
13747 self.agent_ids.len()
13748 }
13749
13750 pub fn workspace_count(&self) -> usize {
13752 self.workspace_ids.len()
13753 }
13754}
13755
13756#[derive(Clone, Copy, Debug, Default)]
13765pub struct StatsDelta {
13766 pub session_count_delta: i64,
13767 pub message_count_delta: i64,
13768 pub total_chars_delta: i64,
13769}
13770
13771#[derive(Debug, Default)]
13787pub struct StatsAggregator {
13788 deltas: HashMap<(i64, String, String), StatsDelta>,
13791}
13792
13793impl StatsAggregator {
13794 pub fn new() -> Self {
13796 Self {
13797 deltas: HashMap::new(),
13798 }
13799 }
13800
13801 pub fn record(
13812 &mut self,
13813 agent_slug: &str,
13814 source_id: &str,
13815 day_id: i64,
13816 message_count: i64,
13817 total_chars: i64,
13818 ) {
13819 self.record_delta(agent_slug, source_id, day_id, 1, message_count, total_chars);
13820 }
13821
13822 pub fn record_delta(
13825 &mut self,
13826 agent_slug: &str,
13827 source_id: &str,
13828 day_id: i64,
13829 session_count_delta: i64,
13830 message_count_delta: i64,
13831 total_chars_delta: i64,
13832 ) {
13833 if session_count_delta == 0 && message_count_delta == 0 && total_chars_delta == 0 {
13834 return;
13835 }
13836 let key = (day_id, agent_slug.to_owned(), source_id.to_owned());
13837 let delta = self.deltas.entry(key).or_default();
13838 delta.session_count_delta += session_count_delta;
13839 delta.message_count_delta += message_count_delta;
13840 delta.total_chars_delta += total_chars_delta;
13841 }
13842
13843 pub fn expand(&self) -> Vec<(i64, String, String, StatsDelta)> {
13851 let mut expanded: HashMap<(i64, String, String), StatsDelta> = HashMap::new();
13852
13853 for ((day_id, agent, source), delta) in &self.deltas {
13854 let permutations = [
13855 (agent.as_str(), source.as_str()),
13856 ("all", source.as_str()),
13857 (agent.as_str(), "all"),
13858 ("all", "all"),
13859 ];
13860
13861 for idx in 0..permutations.len() {
13863 let (a, s) = permutations[idx];
13864 if permutations[..idx].contains(&(a, s)) {
13865 continue;
13866 }
13867 let key = (*day_id, a.to_owned(), s.to_owned());
13868 let entry = expanded.entry(key).or_default();
13869 entry.session_count_delta += delta.session_count_delta;
13870 entry.message_count_delta += delta.message_count_delta;
13871 entry.total_chars_delta += delta.total_chars_delta;
13872 }
13873 }
13874
13875 let mut out: Vec<(i64, String, String, StatsDelta)> = expanded
13876 .into_iter()
13877 .map(|((d, a, s), delta)| (d, a, s, delta))
13878 .collect();
13879 out.sort_by(|(d1, a1, s1, _), (d2, a2, s2, _)| {
13880 d1.cmp(d2).then_with(|| a1.cmp(a2)).then_with(|| s1.cmp(s2))
13881 });
13882 out
13883 }
13884
13885 pub fn is_empty(&self) -> bool {
13887 self.deltas.is_empty()
13888 }
13889
13890 pub fn raw_entry_count(&self) -> usize {
13892 self.deltas.len()
13893 }
13894}
13895
13896#[derive(Clone, Debug, Default)]
13905pub struct TokenStatsDelta {
13906 pub api_call_count: i64,
13907 pub user_message_count: i64,
13908 pub assistant_message_count: i64,
13909 pub tool_message_count: i64,
13910 pub total_input_tokens: i64,
13911 pub total_output_tokens: i64,
13912 pub total_cache_read_tokens: i64,
13913 pub total_cache_creation_tokens: i64,
13914 pub total_thinking_tokens: i64,
13915 pub grand_total_tokens: i64,
13916 pub total_content_chars: i64,
13917 pub total_tool_calls: i64,
13918 pub estimated_cost_usd: f64,
13919 pub session_count: i64,
13920}
13921
13922#[derive(Debug, Default)]
13928pub struct TokenStatsAggregator {
13929 deltas: HashMap<(i64, String, String, String), TokenStatsDelta>,
13931}
13932
13933impl TokenStatsAggregator {
13934 pub fn new() -> Self {
13935 Self {
13936 deltas: HashMap::new(),
13937 }
13938 }
13939
13940 #[allow(clippy::too_many_arguments)]
13942 pub fn record(
13943 &mut self,
13944 agent_slug: &str,
13945 source_id: &str,
13946 day_id: i64,
13947 model_family: &str,
13948 role: &str,
13949 usage: &crate::connectors::ExtractedTokenUsage,
13950 content_chars: i64,
13951 estimated_cost_usd: f64,
13952 ) {
13953 let key = (
13954 day_id,
13955 agent_slug.to_owned(),
13956 source_id.to_owned(),
13957 model_family.to_owned(),
13958 );
13959 let delta = self.deltas.entry(key).or_default();
13960
13961 delta.api_call_count += 1;
13962 match role {
13963 "user" => delta.user_message_count += 1,
13964 "assistant" | "agent" => delta.assistant_message_count += 1,
13965 "tool" => delta.tool_message_count += 1,
13966 _ => {}
13967 }
13968
13969 delta.total_input_tokens += usage.input_tokens.unwrap_or(0);
13970 delta.total_output_tokens += usage.output_tokens.unwrap_or(0);
13971 delta.total_cache_read_tokens += usage.cache_read_tokens.unwrap_or(0);
13972 delta.total_cache_creation_tokens += usage.cache_creation_tokens.unwrap_or(0);
13973 delta.total_thinking_tokens += usage.thinking_tokens.unwrap_or(0);
13974 delta.grand_total_tokens += usage.total_tokens().unwrap_or(0);
13975 delta.total_content_chars += content_chars;
13976 delta.total_tool_calls += usage.tool_call_count as i64;
13977 delta.estimated_cost_usd += estimated_cost_usd;
13978 }
13979
13980 pub fn record_session(
13982 &mut self,
13983 agent_slug: &str,
13984 source_id: &str,
13985 day_id: i64,
13986 model_family: &str,
13987 ) {
13988 let key = (
13989 day_id,
13990 agent_slug.to_owned(),
13991 source_id.to_owned(),
13992 model_family.to_owned(),
13993 );
13994 self.deltas.entry(key).or_default().session_count += 1;
13995 }
13996
13997 pub fn expand(&self) -> Vec<(i64, String, String, String, TokenStatsDelta)> {
14004 let mut expanded: HashMap<(i64, String, String, String), TokenStatsDelta> = HashMap::new();
14005
14006 for ((day_id, agent, source, model), delta) in &self.deltas {
14007 let permutations = [
14008 (agent.as_str(), source.as_str(), model.as_str()),
14009 ("all", source.as_str(), model.as_str()),
14010 (agent.as_str(), "all", model.as_str()),
14011 (agent.as_str(), source.as_str(), "all"),
14012 ("all", "all", "all"),
14013 ];
14014
14015 for idx in 0..permutations.len() {
14016 let (a, s, m) = permutations[idx];
14017 if permutations[..idx].contains(&(a, s, m)) {
14019 continue;
14020 }
14021 let key = (*day_id, a.to_owned(), s.to_owned(), m.to_owned());
14022 let entry = expanded.entry(key).or_default();
14023 entry.api_call_count += delta.api_call_count;
14024 entry.user_message_count += delta.user_message_count;
14025 entry.assistant_message_count += delta.assistant_message_count;
14026 entry.tool_message_count += delta.tool_message_count;
14027 entry.total_input_tokens += delta.total_input_tokens;
14028 entry.total_output_tokens += delta.total_output_tokens;
14029 entry.total_cache_read_tokens += delta.total_cache_read_tokens;
14030 entry.total_cache_creation_tokens += delta.total_cache_creation_tokens;
14031 entry.total_thinking_tokens += delta.total_thinking_tokens;
14032 entry.grand_total_tokens += delta.grand_total_tokens;
14033 entry.total_content_chars += delta.total_content_chars;
14034 entry.total_tool_calls += delta.total_tool_calls;
14035 entry.estimated_cost_usd += delta.estimated_cost_usd;
14036 entry.session_count += delta.session_count;
14037 }
14038 }
14039
14040 let mut out: Vec<(i64, String, String, String, TokenStatsDelta)> = expanded
14041 .into_iter()
14042 .map(|((d, a, s, m), delta)| (d, a, s, m, delta))
14043 .collect();
14044 out.sort_by(|(d1, a1, s1, m1, _), (d2, a2, s2, m2, _)| {
14045 d1.cmp(d2)
14046 .then_with(|| a1.cmp(a2))
14047 .then_with(|| s1.cmp(s2))
14048 .then_with(|| m1.cmp(m2))
14049 });
14050 out
14051 }
14052
14053 pub fn is_empty(&self) -> bool {
14054 self.deltas.is_empty()
14055 }
14056
14057 pub fn raw_entry_count(&self) -> usize {
14058 self.deltas.len()
14059 }
14060}
14061
14062#[derive(Clone, Debug, Default)]
14070pub struct UsageRollupDelta {
14071 pub message_count: i64,
14072 pub user_message_count: i64,
14073 pub assistant_message_count: i64,
14074 pub tool_call_count: i64,
14075 pub plan_message_count: i64,
14076 pub plan_content_tokens_est_total: i64,
14077 pub plan_api_tokens_total: i64,
14078 pub api_coverage_message_count: i64,
14079 pub content_tokens_est_total: i64,
14080 pub content_tokens_est_user: i64,
14081 pub content_tokens_est_assistant: i64,
14082 pub api_tokens_total: i64,
14083 pub api_input_tokens_total: i64,
14084 pub api_output_tokens_total: i64,
14085 pub api_cache_read_tokens_total: i64,
14086 pub api_cache_creation_tokens_total: i64,
14087 pub api_thinking_tokens_total: i64,
14088}
14089
14090#[derive(Debug, Clone)]
14092pub struct MessageMetricsEntry {
14093 pub message_id: i64,
14094 pub created_at_ms: i64,
14095 pub hour_id: i64,
14096 pub day_id: i64,
14097 pub agent_slug: String,
14098 pub workspace_id: i64,
14099 pub source_id: String,
14100 pub role: String,
14101 pub content_chars: i64,
14102 pub content_tokens_est: i64,
14103 pub model_name: Option<String>,
14104 pub model_family: String,
14105 pub model_tier: String,
14106 pub provider: String,
14107 pub api_input_tokens: Option<i64>,
14108 pub api_output_tokens: Option<i64>,
14109 pub api_cache_read_tokens: Option<i64>,
14110 pub api_cache_creation_tokens: Option<i64>,
14111 pub api_thinking_tokens: Option<i64>,
14112 pub api_service_tier: Option<String>,
14113 pub api_data_source: String,
14114 pub tool_call_count: i64,
14115 pub has_tool_calls: bool,
14116 pub has_plan: bool,
14117}
14118
14119#[derive(Debug, Default)]
14124pub struct AnalyticsRollupAggregator {
14125 hourly: HashMap<(i64, String, i64, String), UsageRollupDelta>,
14126 daily: HashMap<(i64, String, i64, String), UsageRollupDelta>,
14127 models_daily: HashMap<(i64, String, i64, String, String, String), UsageRollupDelta>,
14128}
14129
14130impl AnalyticsRollupAggregator {
14131 pub fn new() -> Self {
14132 Self::default()
14133 }
14134
14135 pub fn record(&mut self, entry: &MessageMetricsEntry) {
14137 let content_est = entry.content_tokens_est;
14138 let api_total = entry.api_input_tokens.unwrap_or(0)
14139 + entry.api_output_tokens.unwrap_or(0)
14140 + entry.api_cache_read_tokens.unwrap_or(0)
14141 + entry.api_cache_creation_tokens.unwrap_or(0)
14142 + entry.api_thinking_tokens.unwrap_or(0);
14143 let is_api = entry.api_data_source == "api";
14144 let is_user = entry.role == "user";
14145 let is_assistant = entry.role == "assistant" || entry.role == "agent";
14146
14147 for (map, bucket_id) in [
14149 (&mut self.hourly, entry.hour_id),
14150 (&mut self.daily, entry.day_id),
14151 ] {
14152 let key = (
14153 bucket_id,
14154 entry.agent_slug.clone(),
14155 entry.workspace_id,
14156 entry.source_id.clone(),
14157 );
14158 let d = map.entry(key).or_default();
14159 d.message_count += 1;
14160 if is_user {
14161 d.user_message_count += 1;
14162 d.content_tokens_est_user += content_est;
14163 }
14164 if is_assistant {
14165 d.assistant_message_count += 1;
14166 d.content_tokens_est_assistant += content_est;
14167 }
14168 d.tool_call_count += entry.tool_call_count;
14169 if entry.has_plan {
14170 d.plan_message_count += 1;
14171 d.plan_content_tokens_est_total += content_est;
14172 if is_api {
14173 d.plan_api_tokens_total += api_total;
14174 }
14175 }
14176 if is_api {
14177 d.api_coverage_message_count += 1;
14178 d.api_tokens_total += api_total;
14179 d.api_input_tokens_total += entry.api_input_tokens.unwrap_or(0);
14180 d.api_output_tokens_total += entry.api_output_tokens.unwrap_or(0);
14181 d.api_cache_read_tokens_total += entry.api_cache_read_tokens.unwrap_or(0);
14182 d.api_cache_creation_tokens_total += entry.api_cache_creation_tokens.unwrap_or(0);
14183 d.api_thinking_tokens_total += entry.api_thinking_tokens.unwrap_or(0);
14184 }
14185 d.content_tokens_est_total += content_est;
14186 }
14187
14188 let model_key = (
14189 entry.day_id,
14190 entry.agent_slug.clone(),
14191 entry.workspace_id,
14192 entry.source_id.clone(),
14193 entry.model_family.clone(),
14194 entry.model_tier.clone(),
14195 );
14196 let d = self.models_daily.entry(model_key).or_default();
14197 d.message_count += 1;
14198 if is_user {
14199 d.user_message_count += 1;
14200 d.content_tokens_est_user += content_est;
14201 }
14202 if is_assistant {
14203 d.assistant_message_count += 1;
14204 d.content_tokens_est_assistant += content_est;
14205 }
14206 d.tool_call_count += entry.tool_call_count;
14207 if entry.has_plan {
14208 d.plan_message_count += 1;
14209 d.plan_content_tokens_est_total += content_est;
14210 if is_api {
14211 d.plan_api_tokens_total += api_total;
14212 }
14213 }
14214 if is_api {
14215 d.api_coverage_message_count += 1;
14216 d.api_tokens_total += api_total;
14217 d.api_input_tokens_total += entry.api_input_tokens.unwrap_or(0);
14218 d.api_output_tokens_total += entry.api_output_tokens.unwrap_or(0);
14219 d.api_cache_read_tokens_total += entry.api_cache_read_tokens.unwrap_or(0);
14220 d.api_cache_creation_tokens_total += entry.api_cache_creation_tokens.unwrap_or(0);
14221 d.api_thinking_tokens_total += entry.api_thinking_tokens.unwrap_or(0);
14222 }
14223 d.content_tokens_est_total += content_est;
14224 }
14225
14226 pub fn is_empty(&self) -> bool {
14227 self.hourly.is_empty() && self.daily.is_empty() && self.models_daily.is_empty()
14228 }
14229
14230 pub fn hourly_entry_count(&self) -> usize {
14231 self.hourly.len()
14232 }
14233
14234 pub fn daily_entry_count(&self) -> usize {
14235 self.daily.len()
14236 }
14237
14238 pub fn models_daily_entry_count(&self) -> usize {
14239 self.models_daily.len()
14240 }
14241}
14242
14243fn has_plan_for_role(role: &str, content: &str) -> bool {
14247 let role = role.trim();
14248 (role.eq_ignore_ascii_case("assistant") || role.eq_ignore_ascii_case("agent"))
14249 && has_plan_heuristic(content)
14250}
14251
14252fn has_plan_heuristic(content: &str) -> bool {
14259 if content.len() < 24 {
14260 return false;
14261 }
14262
14263 let lower = content.to_lowercase();
14264
14265 let looks_like_tool_blob = lower.contains("```")
14267 || lower.contains("\"tool\"")
14268 || lower.contains("stdout:")
14269 || lower.contains("stderr:")
14270 || lower.contains("exit code:");
14271
14272 let mut lines: Vec<&str> = Vec::with_capacity(60);
14273 let mut in_fenced_code = false;
14274 for raw in lower.lines() {
14275 let line = raw.trim();
14276 if line.starts_with("```") {
14277 in_fenced_code = !in_fenced_code;
14278 continue;
14279 }
14280 if in_fenced_code || line.is_empty() {
14281 continue;
14282 }
14283 lines.push(line);
14284 if lines.len() >= 60 {
14285 break;
14286 }
14287 }
14288
14289 let header_pos = lines.iter().position(|line| {
14290 line.starts_with("## plan")
14291 || line.starts_with("# plan")
14292 || line.starts_with("plan:")
14293 || line.starts_with("implementation plan")
14294 || line.starts_with("next steps:")
14295 || line.starts_with("action plan:")
14296 });
14297 let preview_top = lines.iter().take(8).copied().collect::<Vec<_>>().join("\n");
14298 let header_near_top = header_pos.is_some_and(|idx| idx <= 6) || preview_top.contains("plan:");
14299
14300 if !header_near_top {
14301 return false;
14302 }
14303 if looks_like_tool_blob && header_pos.is_none() {
14304 return false;
14305 }
14306
14307 let numbered_steps = lines
14308 .iter()
14309 .filter(|line| is_numbered_step_line(line))
14310 .count();
14311 let bullet_steps = lines
14312 .iter()
14313 .filter(|line| {
14314 line.starts_with("- ")
14315 || line.starts_with("* ")
14316 || line.starts_with("+ ")
14317 || line.starts_with("- [ ] ")
14318 || line.starts_with("- [x] ")
14319 })
14320 .count();
14321
14322 numbered_steps >= 2 || (numbered_steps >= 1 && bullet_steps >= 1) || bullet_steps >= 3
14323}
14324
14325fn is_numbered_step_line(line: &str) -> bool {
14326 let trimmed = line.trim_start();
14327 let digit_count = trimmed.chars().take_while(|c| c.is_ascii_digit()).count();
14328 if digit_count == 0 || digit_count > 3 {
14329 return false;
14330 }
14331 let rest = &trimmed[digit_count..];
14332 rest.starts_with(". ") || rest.starts_with(") ")
14333}
14334
14335#[derive(Debug, Clone)]
14337pub struct TokenUsageEntry {
14338 pub message_id: i64,
14339 pub conversation_id: i64,
14340 pub agent_id: i64,
14341 pub workspace_id: Option<i64>,
14342 pub source_id: String,
14343 pub timestamp_ms: i64,
14344 pub day_id: i64,
14345 pub model_name: Option<String>,
14346 pub model_family: Option<String>,
14347 pub model_tier: Option<String>,
14348 pub service_tier: Option<String>,
14349 pub provider: Option<String>,
14350 pub input_tokens: Option<i64>,
14351 pub output_tokens: Option<i64>,
14352 pub cache_read_tokens: Option<i64>,
14353 pub cache_creation_tokens: Option<i64>,
14354 pub thinking_tokens: Option<i64>,
14355 pub total_tokens: Option<i64>,
14356 pub estimated_cost_usd: Option<f64>,
14357 pub role: String,
14358 pub content_chars: i64,
14359 pub has_tool_calls: bool,
14360 pub tool_call_count: u32,
14361 pub data_source: String,
14362}
14363
14364#[derive(Debug, Clone)]
14370pub struct PricingEntry {
14371 pub model_pattern: String,
14372 pub provider: String,
14373 pub input_cost_per_mtok: f64,
14374 pub output_cost_per_mtok: f64,
14375 pub cache_read_cost_per_mtok: Option<f64>,
14376 pub cache_creation_cost_per_mtok: Option<f64>,
14377 pub effective_day_id: i64,
14379}
14380
14381#[derive(Debug, Clone, Default)]
14383pub struct PricingDiagnostics {
14384 pub priced_count: u64,
14385 pub unpriced_count: u64,
14386 pub unknown_models: HashMap<String, u64>,
14388}
14389
14390impl PricingDiagnostics {
14391 fn record_priced(&mut self) {
14392 self.priced_count += 1;
14393 }
14394
14395 fn record_unpriced(&mut self, model_name: Option<&str>) {
14396 self.unpriced_count += 1;
14397 let key = model_name.unwrap_or("(none)").to_string();
14398 *self.unknown_models.entry(key).or_insert(0) += 1;
14399 }
14400
14401 pub fn log_summary(&self) {
14403 let total = self.priced_count + self.unpriced_count;
14404 if total == 0 {
14405 return;
14406 }
14407 let pct = (self.priced_count as f64 / total as f64) * 100.0;
14408 tracing::info!(
14409 target: "cass::analytics::pricing",
14410 priced = self.priced_count,
14411 unpriced = self.unpriced_count,
14412 total = total,
14413 coverage_pct = format!("{pct:.1}%"),
14414 "pricing coverage"
14415 );
14416 if !self.unknown_models.is_empty() {
14417 let mut sorted: Vec<_> = self.unknown_models.iter().collect();
14418 sorted.sort_by(|a, b| b.1.cmp(a.1));
14419 for (model, count) in sorted.iter().take(5) {
14420 tracing::debug!(
14421 target: "cass::analytics::pricing",
14422 model = model.as_str(),
14423 count = count,
14424 "unknown model (no pricing)"
14425 );
14426 }
14427 }
14428 }
14429}
14430
14431#[derive(Debug, Clone)]
14433pub struct PricingTable {
14434 entries: Vec<PricingEntry>,
14435}
14436
14437impl PricingTable {
14438 pub fn load(conn: &FrankenConnection) -> Result<Self> {
14440 Self::franken_load(conn)
14441 }
14442
14443 pub fn franken_load(conn: &FrankenConnection) -> Result<Self> {
14445 let rows = conn.query(
14446 "SELECT model_pattern, provider, input_cost_per_mtok, output_cost_per_mtok,
14447 cache_read_cost_per_mtok, cache_creation_cost_per_mtok, effective_date
14448 FROM model_pricing
14449 ORDER BY effective_date DESC",
14450 )?;
14451 let mut entries = Vec::with_capacity(rows.len());
14452 for row in &rows {
14453 let effective_date: String = row.get_typed(6)?;
14454 let effective_day_id = date_str_to_day_id(&effective_date)?;
14455 entries.push(PricingEntry {
14456 model_pattern: row.get_typed(0)?,
14457 provider: row.get_typed(1)?,
14458 input_cost_per_mtok: row.get_typed(2)?,
14459 output_cost_per_mtok: row.get_typed(3)?,
14460 cache_read_cost_per_mtok: row.get_typed(4)?,
14461 cache_creation_cost_per_mtok: row.get_typed(5)?,
14462 effective_day_id,
14463 });
14464 }
14465 Ok(Self { entries })
14466 }
14467
14468 pub fn lookup(&self, model_name: &str, message_day_id: i64) -> Option<&PricingEntry> {
14476 let mut best: Option<&PricingEntry> = None;
14477
14478 for entry in &self.entries {
14479 if entry.effective_day_id > message_day_id {
14480 continue;
14481 }
14482 if !sql_like_match(model_name, &entry.model_pattern) {
14483 continue;
14484 }
14485
14486 match best {
14487 None => best = Some(entry),
14488 Some(current) => {
14489 if entry.effective_day_id > current.effective_day_id
14490 || (entry.effective_day_id == current.effective_day_id
14491 && entry.model_pattern.len() > current.model_pattern.len())
14492 {
14493 best = Some(entry);
14494 }
14495 }
14496 }
14497 }
14498
14499 best
14500 }
14501
14502 pub fn compute_cost(
14506 &self,
14507 model_name: Option<&str>,
14508 message_day_id: i64,
14509 input_tokens: Option<i64>,
14510 output_tokens: Option<i64>,
14511 cache_read_tokens: Option<i64>,
14512 cache_creation_tokens: Option<i64>,
14513 ) -> Option<f64> {
14514 let model = model_name?;
14515 let pricing = self.lookup(model, message_day_id)?;
14516
14517 if input_tokens.is_none() && output_tokens.is_none() {
14518 return None;
14519 }
14520
14521 let mut cost = 0.0;
14522 let cache_read = cache_read_tokens.unwrap_or(0);
14523 let cache_creation = cache_creation_tokens.unwrap_or(0);
14524 let non_cache_input = input_tokens
14527 .unwrap_or(0)
14528 .saturating_sub(cache_read)
14529 .saturating_sub(cache_creation)
14530 .max(0);
14531 cost += non_cache_input as f64 * pricing.input_cost_per_mtok / 1_000_000.0;
14532 cost += output_tokens.unwrap_or(0) as f64 * pricing.output_cost_per_mtok / 1_000_000.0;
14533
14534 if let Some(cache_price) = pricing.cache_read_cost_per_mtok {
14535 cost += cache_read as f64 * cache_price / 1_000_000.0;
14536 }
14537 if let Some(cache_price) = pricing.cache_creation_cost_per_mtok {
14538 cost += cache_creation as f64 * cache_price / 1_000_000.0;
14539 }
14540
14541 Some(cost)
14542 }
14543
14544 pub fn is_empty(&self) -> bool {
14546 self.entries.is_empty()
14547 }
14548}
14549
14550fn date_str_to_day_id(s: &str) -> Result<i64> {
14553 use chrono::NaiveDate;
14554 const EPOCH_2020: NaiveDate = match NaiveDate::from_ymd_opt(2020, 1, 1) {
14555 Some(d) => d,
14556 None => unreachable!(),
14557 };
14558 NaiveDate::parse_from_str(s, "%Y-%m-%d")
14559 .map(|d| (d - EPOCH_2020).num_days())
14560 .with_context(|| format!("invalid effective_date '{s}'"))
14561}
14562
14563fn sql_like_match(value: &str, pattern: &str) -> bool {
14565 sql_like_match_bytes(
14566 value.to_ascii_lowercase().as_bytes(),
14567 pattern.to_ascii_lowercase().as_bytes(),
14568 )
14569}
14570
14571fn utf8_char_len(b: u8) -> usize {
14573 if b < 0x80 {
14574 1
14575 } else if b < 0xE0 {
14576 2
14577 } else if b < 0xF0 {
14578 3
14579 } else {
14580 4
14581 }
14582}
14583
14584fn sql_like_match_bytes(val: &[u8], pat: &[u8]) -> bool {
14585 if pat.is_empty() {
14586 return val.is_empty();
14587 }
14588 match pat[0] {
14589 b'%' => {
14590 let mut p = 1;
14591 while p < pat.len() && pat[p] == b'%' {
14592 p += 1;
14593 }
14594 let rest = &pat[p..];
14595 let mut i = 0;
14597 while i <= val.len() {
14598 if sql_like_match_bytes(&val[i..], rest) {
14599 return true;
14600 }
14601 if i < val.len() {
14602 i += utf8_char_len(val[i]);
14603 } else {
14604 break;
14605 }
14606 }
14607 false
14608 }
14609 b'_' => {
14610 if val.is_empty() {
14612 return false;
14613 }
14614 let char_len = utf8_char_len(val[0]);
14615 val.len() >= char_len && sql_like_match_bytes(&val[char_len..], &pat[1..])
14616 }
14617 c => !val.is_empty() && val[0] == c && sql_like_match_bytes(&val[1..], &pat[1..]),
14618 }
14619}
14620
14621fn rebuild_batch_size_env(var: &str, default: usize) -> usize {
14622 dotenvy::var(var)
14623 .ok()
14624 .and_then(|raw| raw.parse::<usize>().ok())
14625 .filter(|value| *value > 0)
14626 .unwrap_or(default)
14627}
14628
14629fn is_out_of_memory_error<E: OutOfMemoryProbe + ?Sized>(err: &E) -> bool {
14639 err.is_out_of_memory()
14640}
14641
14642trait OutOfMemoryProbe {
14643 fn is_out_of_memory(&self) -> bool;
14644}
14645
14646impl OutOfMemoryProbe for anyhow::Error {
14647 fn is_out_of_memory(&self) -> bool {
14648 self.chain().any(|cause| {
14649 if cause
14650 .downcast_ref::<frankensqlite::FrankenError>()
14651 .is_some_and(|err| matches!(err, frankensqlite::FrankenError::OutOfMemory))
14652 {
14653 return true;
14654 }
14655 is_exact_out_of_memory_message(&cause.to_string())
14656 })
14657 }
14658}
14659
14660impl OutOfMemoryProbe for frankensqlite::FrankenError {
14661 fn is_out_of_memory(&self) -> bool {
14662 matches!(self, frankensqlite::FrankenError::OutOfMemory)
14663 }
14664}
14665
14666fn is_exact_out_of_memory_message(message: &str) -> bool {
14667 matches!(
14668 message.trim().to_ascii_lowercase().as_str(),
14669 "out of memory" | "not enough memory"
14670 )
14671}
14672
14673#[derive(Debug, Clone)]
14679pub struct DailyCount {
14680 pub day_id: i64,
14681 pub sessions: i64,
14682 pub messages: i64,
14683 pub chars: i64,
14684}
14685
14686#[derive(Debug, Clone)]
14688pub struct AnalyticsRebuildResult {
14689 pub message_metrics_rows: usize,
14690 pub usage_hourly_rows: usize,
14691 pub usage_daily_rows: usize,
14692 pub usage_models_daily_rows: usize,
14693 pub elapsed_ms: u64,
14694 pub messages_per_sec: f64,
14695}
14696
14697#[derive(Debug, Clone)]
14699pub struct DailyStatsRebuildResult {
14700 pub rows_created: i64,
14701 pub total_sessions: i64,
14702}
14703
14704#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
14706pub struct AgentArchivePurgeResult {
14707 pub conversations_deleted: usize,
14708 pub messages_deleted: usize,
14709}
14710
14711#[derive(Debug, Clone)]
14713pub struct DailyStatsHealth {
14714 pub populated: bool,
14715 pub row_count: i64,
14716 pub oldest_update_ms: Option<i64>,
14717 pub conversation_count: i64,
14718 pub materialized_total: i64,
14719 pub drift: i64,
14720}
14721
14722const FTS5_BATCH_SIZE: usize = 100;
14730
14731#[derive(Debug, Clone)]
14732struct FtsRebuildMessageRow {
14733 rowid: i64,
14734 message_id: i64,
14735 conversation_id: i64,
14736 content: String,
14737 created_at: Option<i64>,
14738}
14739
14740#[derive(Debug, Clone)]
14741struct FtsConversationProjection {
14742 title: String,
14743 agent_id: Option<i64>,
14744 workspace_id: Option<i64>,
14745 source_path: String,
14746}
14747
14748#[derive(Debug, Clone)]
14750pub struct FtsEntry {
14751 pub content: String,
14752 pub title: String,
14753 pub agent: String,
14754 pub workspace: String,
14755 pub source_path: String,
14756 pub created_at: Option<i64>,
14757 pub message_id: i64,
14758}
14759
14760impl FtsEntry {
14761 pub fn from_message(message_id: i64, msg: &Message, conv: &Conversation) -> Self {
14763 FtsEntry {
14764 content: msg.content.clone(),
14765 title: conv.title.clone().unwrap_or_default(),
14766 agent: conv.agent_slug.clone(),
14767 workspace: conv
14768 .workspace
14769 .as_ref()
14770 .map(|p| p.to_string_lossy().into_owned())
14771 .unwrap_or_default(),
14772 source_path: path_to_string(&conv.source_path),
14773 created_at: msg.created_at.or(conv.started_at),
14774 message_id,
14775 }
14776 }
14777}
14778
14779const FTS_ENTRY_BATCH_MAX_DOCS: usize = 512;
14780const FTS_ENTRY_BATCH_MAX_CHARS: usize = 1024 * 1024;
14781
14782const FTS_REBUILD_BATCH_SIZE_DEFAULT: usize = 5_000;
14787
14788fn fts_rebuild_batch_size() -> usize {
14791 dotenvy::var("CASS_FTS_REBUILD_BATCH_SIZE")
14792 .ok()
14793 .and_then(|v| v.parse::<usize>().ok())
14794 .filter(|&n| n > 0)
14795 .unwrap_or(FTS_REBUILD_BATCH_SIZE_DEFAULT)
14796}
14797
14798fn flush_pending_fts_entries(
14799 storage: &FrankenStorage,
14800 tx: &FrankenTransaction<'_>,
14801 entries: &mut Vec<FtsEntry>,
14802 pending_chars: &mut usize,
14803 inserted_total: &mut usize,
14804) -> Result<()> {
14805 if entries.is_empty() {
14806 return Ok(());
14807 }
14808
14809 if storage.fts_messages_present_cached(tx) {
14810 *inserted_total += franken_batch_insert_fts(tx, entries)?;
14811 }
14812 entries.clear();
14813 *pending_chars = 0;
14814 Ok(())
14815}
14816
14817fn path_to_string<P: AsRef<Path>>(p: P) -> String {
14818 p.as_ref().to_string_lossy().into_owned()
14819}
14820
14821fn role_str(role: &MessageRole) -> String {
14822 role_as_str(role).to_owned()
14823}
14824
14825fn role_as_str(role: &MessageRole) -> &str {
14826 match role {
14827 MessageRole::User => "user",
14828 MessageRole::Agent => "agent",
14829 MessageRole::Tool => "tool",
14830 MessageRole::System => "system",
14831 MessageRole::Other(v) => v.as_str(),
14832 }
14833}
14834
14835fn agent_kind_str(kind: AgentKind) -> String {
14836 match kind {
14837 AgentKind::Cli => "cli".into(),
14838 AgentKind::VsCode => "vscode".into(),
14839 AgentKind::Hybrid => "hybrid".into(),
14840 }
14841}
14842
14843#[cfg(test)]
14848mod tests {
14849 use super::*;
14850 use serial_test::serial;
14851 use tempfile::TempDir;
14852
14853 struct EnvGuard {
14854 key: &'static str,
14855 previous: Option<String>,
14856 }
14857
14858 impl Drop for EnvGuard {
14859 fn drop(&mut self) {
14860 if let Some(value) = &self.previous {
14861 unsafe {
14863 std::env::set_var(self.key, value);
14864 }
14865 } else {
14866 unsafe {
14868 std::env::remove_var(self.key);
14869 }
14870 }
14871 }
14872 }
14873
14874 fn set_env_var(key: &'static str, value: impl AsRef<str>) -> EnvGuard {
14875 let previous = dotenvy::var(key).ok();
14876 unsafe {
14878 std::env::set_var(key, value.as_ref());
14879 }
14880 EnvGuard { key, previous }
14881 }
14882
14883 #[test]
14884 fn doctor_mutation_open_guard_only_targets_canonical_archive_db() {
14885 let dir = TempDir::new().unwrap();
14886 let canonical = dir.path().join("agent_search.db");
14887 let scratch = dir.path().join("scratch.db");
14888
14889 assert_eq!(
14890 doctor_mutation_lock_path_for_db_open(&canonical),
14891 Some(dir.path().join("doctor/locks/doctor-repair.lock"))
14892 );
14893 assert_eq!(doctor_mutation_lock_path_for_db_open(&scratch), None);
14894 }
14895
14896 #[test]
14897 fn doctor_lock_metadata_pid_detection_is_exact() {
14898 let current = std::process::id();
14899
14900 assert!(doctor_lock_metadata_pid_is_current_process(&format!(
14901 "schema_version=1\npid={current}\nmode=safe_auto_run\n"
14902 )));
14903 assert!(!doctor_lock_metadata_pid_is_current_process(
14904 "schema_version=1\npid=not-a-pid\n"
14905 ));
14906 assert!(!doctor_lock_metadata_pid_is_current_process(&format!(
14907 "pid={}\n",
14908 current.saturating_add(1)
14909 )));
14910 }
14911
14912 #[test]
14913 fn doctor_storage_open_refuses_active_doctor_mutation_lock_from_other_process() {
14914 use std::io::Write as _;
14915
14916 let dir = TempDir::new().unwrap();
14917 let db_path = dir.path().join("agent_search.db");
14918 {
14919 let storage = FrankenStorage::open(&db_path).unwrap();
14920 storage.close().unwrap();
14921 }
14922
14923 let lock_path = doctor_mutation_lock_path_for_db_open(&db_path).unwrap();
14924 let mut lock_file = fs::OpenOptions::new()
14925 .create(true)
14926 .truncate(false)
14927 .read(true)
14928 .write(true)
14929 .open(&lock_path)
14930 .unwrap();
14931 fs2::FileExt::try_lock_exclusive(&lock_file).unwrap();
14932 lock_file.set_len(0).unwrap();
14933 lock_file.write_all(b"schema_version=1\npid=1\n").unwrap();
14934 lock_file.sync_all().unwrap();
14935
14936 let err =
14937 open_franken_raw_readonly_connection_with_timeout(&db_path, Duration::from_millis(25))
14938 .expect_err("active doctor mutation lock must block canonical DB opens");
14939 let message = err.to_string();
14940 assert!(
14941 message.contains("doctor mutation lock") && message.contains("active"),
14942 "error should identify the active doctor mutation lock: {message}"
14943 );
14944
14945 fs2::FileExt::unlock(&lock_file).unwrap();
14946 }
14947
14948 #[test]
14949 fn doctor_storage_open_allows_current_doctor_process_probe() {
14950 use std::io::Write as _;
14951
14952 let dir = TempDir::new().unwrap();
14953 let db_path = dir.path().join("agent_search.db");
14954 {
14955 let storage = FrankenStorage::open(&db_path).unwrap();
14956 storage.close().unwrap();
14957 }
14958
14959 let lock_path = doctor_mutation_lock_path_for_db_open(&db_path).unwrap();
14960 let mut lock_file = fs::OpenOptions::new()
14961 .create(true)
14962 .truncate(false)
14963 .read(true)
14964 .write(true)
14965 .open(&lock_path)
14966 .unwrap();
14967 fs2::FileExt::try_lock_exclusive(&lock_file).unwrap();
14968 lock_file.set_len(0).unwrap();
14969 write!(lock_file, "schema_version=1\npid={}\n", std::process::id()).unwrap();
14970 lock_file.sync_all().unwrap();
14971
14972 let conn =
14973 open_franken_raw_readonly_connection_with_timeout(&db_path, Duration::from_millis(25))
14974 .expect(
14975 "doctor process must be able to run post-repair read probes under its own lock",
14976 );
14977 drop(conn);
14978
14979 fs2::FileExt::unlock(&lock_file).unwrap();
14980 }
14981
14982 #[test]
14983 fn autocommit_retain_disable_tries_compat_then_canonical_pragma() {
14984 let mut attempts = Vec::new();
14985
14986 let selected = disable_autocommit_retain(|pragma| {
14987 attempts.push(pragma);
14988 if pragma == "PRAGMA fsqlite.autocommit_retain = OFF;" {
14989 Err("compat namespace unavailable")
14990 } else {
14991 Ok(())
14992 }
14993 })
14994 .expect("canonical pragma should disable autocommit retain");
14995
14996 assert_eq!(selected, "PRAGMA autocommit_retain = OFF;");
14997 assert_eq!(attempts, AUTOCOMMIT_RETAIN_OFF_PRAGMAS);
14998 }
14999
15000 #[test]
15001 fn autocommit_retain_disable_fails_closed_when_no_pragma_works() {
15002 let mut attempts = Vec::new();
15003
15004 let err = disable_autocommit_retain(|pragma| {
15005 attempts.push(pragma);
15006 Err("unsupported pragma")
15007 })
15008 .expect_err("unsupported autocommit retain controls should fail closed");
15009
15010 assert_eq!(attempts, AUTOCOMMIT_RETAIN_OFF_PRAGMAS);
15011 let message = err.to_string();
15012 assert!(
15013 message.contains("refusing to keep a long-lived MVCC connection"),
15014 "error should force callers away from unbounded snapshot retention: {message}"
15015 );
15016 assert!(
15017 message.contains("PRAGMA fsqlite.autocommit_retain = OFF;")
15018 && message.contains("PRAGMA autocommit_retain = OFF;"),
15019 "error should preserve attempted PRAGMAs for diagnostics: {message}"
15020 );
15021 }
15022
15023 fn rusqlite_test_fixture_conn(db_path: &Path) -> rusqlite::Connection {
15032 rusqlite::Connection::open(db_path).expect("open rusqlite test fixture connection")
15033 }
15034
15035 fn seed_historical_db_direct(
15036 db_path: &Path,
15037 conversations: &[crate::model::types::Conversation],
15038 ) {
15039 if let Some(parent) = db_path.parent() {
15040 fs::create_dir_all(parent).unwrap();
15041 }
15042
15043 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
15044 conn.execute_batch(HISTORICAL_RECOVERY_CORE_SCHEMA).unwrap();
15045 conn.execute_compat(
15046 "INSERT INTO agents(id, slug, name, version, kind, created_at, updated_at)
15047 VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7)",
15048 fparams![1_i64, "codex", "Codex", "0.2.3", "cli", 0_i64, 0_i64],
15049 )
15050 .unwrap();
15051
15052 let mut next_message_id = 1_i64;
15053 for (conv_index, conv) in conversations.iter().enumerate() {
15054 let conversation_id = i64::try_from(conv_index + 1).unwrap();
15055 let workspace_id = conv.workspace.as_ref().map(|workspace| {
15056 let workspace_id = conversation_id;
15057 let workspace_path = workspace.to_string_lossy().into_owned();
15058 conn.execute_compat(
15059 "INSERT INTO workspaces(id, path, display_name) VALUES(?1, ?2, ?3)",
15060 fparams![
15061 workspace_id,
15062 workspace_path.as_str(),
15063 workspace_path.as_str()
15064 ],
15065 )
15066 .unwrap();
15067 workspace_id
15068 });
15069 let source_path = conv.source_path.to_string_lossy().into_owned();
15070 let metadata_json = conv.metadata_json.to_string();
15071 conn.execute_compat(
15072 "INSERT INTO conversations (
15073 id, agent_id, workspace_id, source_id, external_id, title, source_path,
15074 started_at, ended_at, approx_tokens, metadata_json, origin_host
15075 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
15076 fparams![
15077 conversation_id,
15078 1_i64,
15079 workspace_id,
15080 conv.source_id.as_str(),
15081 conv.external_id.as_deref(),
15082 conv.title.as_deref(),
15083 source_path.as_str(),
15084 conv.started_at,
15085 conv.ended_at,
15086 conv.approx_tokens,
15087 metadata_json.as_str(),
15088 conv.origin_host.as_deref()
15089 ],
15090 )
15091 .unwrap();
15092
15093 for msg in &conv.messages {
15094 let extra_json = msg.extra_json.to_string();
15095 let role = role_str(&msg.role);
15096 conn.execute_compat(
15097 "INSERT INTO messages(
15098 id, conversation_id, idx, role, author, created_at, content, extra_json
15099 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
15100 fparams![
15101 next_message_id,
15102 conversation_id,
15103 msg.idx,
15104 role.as_str(),
15105 msg.author.as_deref(),
15106 msg.created_at,
15107 msg.content.as_str(),
15108 extra_json.as_str()
15109 ],
15110 )
15111 .unwrap();
15112 next_message_id += 1;
15113 }
15114 }
15115 }
15116
15117 #[test]
15122 fn is_user_data_file_detects_bookmarks() {
15123 assert!(is_user_data_file(Path::new("/data/bookmarks.db")));
15124 assert!(is_user_data_file(Path::new("bookmarks.db")));
15125 }
15126
15127 #[test]
15128 fn is_user_data_file_detects_tui_state() {
15129 assert!(is_user_data_file(Path::new("/data/tui_state.json")));
15130 }
15131
15132 #[test]
15133 fn is_user_data_file_detects_sources_toml() {
15134 assert!(is_user_data_file(Path::new("/config/sources.toml")));
15135 }
15136
15137 #[test]
15138 fn is_user_data_file_detects_env() {
15139 assert!(is_user_data_file(Path::new(".env")));
15140 }
15141
15142 #[test]
15143 fn is_user_data_file_rejects_other_files() {
15144 assert!(!is_user_data_file(Path::new("index.db")));
15145 assert!(!is_user_data_file(Path::new("conversations.db")));
15146 assert!(!is_user_data_file(Path::new("random.txt")));
15147 }
15148
15149 #[test]
15154 fn create_backup_returns_none_for_nonexistent() {
15155 let dir = TempDir::new().unwrap();
15156 let db_path = dir.path().join("nonexistent.db");
15157 let result = create_backup(&db_path).unwrap();
15158 assert!(result.is_none());
15159 }
15160
15161 #[test]
15162 fn create_backup_creates_named_file() {
15163 let dir = TempDir::new().unwrap();
15164 let db_path = dir.path().join("test.db");
15165 std::fs::write(&db_path, b"test data").unwrap();
15166
15167 let backup_path = create_backup(&db_path).unwrap();
15168 assert!(backup_path.is_some());
15169 let backup = backup_path.unwrap();
15170 assert!(backup.exists());
15171 assert!(
15172 backup
15173 .file_name()
15174 .unwrap()
15175 .to_str()
15176 .unwrap()
15177 .contains("backup")
15178 );
15179 }
15180
15181 #[test]
15182 fn create_backup_paths_are_unique() {
15183 let dir = TempDir::new().unwrap();
15184 let db_path = dir.path().join("test.db");
15185 std::fs::write(&db_path, b"test data").unwrap();
15186
15187 let first = create_backup(&db_path).unwrap().unwrap();
15188 let second = create_backup(&db_path).unwrap().unwrap();
15189
15190 assert_ne!(first, second);
15191 assert!(first.exists());
15192 assert!(second.exists());
15193 }
15194
15195 #[test]
15196 fn lexical_rebuild_messages_query_uses_conversation_idx_access_path() {
15197 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
15198 use std::path::PathBuf;
15199
15200 let dir = TempDir::new().unwrap();
15201 let db_path = dir.path().join("agent_search.db");
15202 let storage = SqliteStorage::open(&db_path).unwrap();
15203
15204 let agent = Agent {
15205 id: None,
15206 slug: "claude_code".into(),
15207 name: "Claude Code".into(),
15208 version: None,
15209 kind: AgentKind::Cli,
15210 };
15211 let agent_id = storage.ensure_agent(&agent).unwrap();
15212 let conversation = Conversation {
15213 id: None,
15214 agent_slug: "claude_code".into(),
15215 workspace: Some(PathBuf::from("/tmp/workspace")),
15216 external_id: Some("conv-1".into()),
15217 title: Some("Lexical rebuild".into()),
15218 source_path: PathBuf::from("/tmp/conv-1.jsonl"),
15219 started_at: Some(1_700_000_000_000),
15220 ended_at: Some(1_700_000_000_100),
15221 approx_tokens: None,
15222 metadata_json: serde_json::Value::Null,
15223 messages: vec![
15224 Message {
15225 id: None,
15226 idx: 0,
15227 role: MessageRole::User,
15228 author: Some("user".into()),
15229 created_at: Some(1_700_000_000_010),
15230 content: "first".into(),
15231 extra_json: serde_json::Value::Null,
15232 snippets: Vec::new(),
15233 },
15234 Message {
15235 id: None,
15236 idx: 1,
15237 role: MessageRole::Agent,
15238 author: Some("assistant".into()),
15239 created_at: Some(1_700_000_000_020),
15240 content: "second".into(),
15241 extra_json: serde_json::Value::Null,
15242 snippets: Vec::new(),
15243 },
15244 ],
15245 source_id: LOCAL_SOURCE_ID.into(),
15246 origin_host: None,
15247 };
15248 storage
15249 .insert_conversation_tree(agent_id, None, &conversation)
15250 .unwrap();
15251 let conversation_id = storage
15252 .conn
15253 .query_row_map(
15254 "SELECT id FROM conversations WHERE external_id = ?1",
15255 fparams!["conv-1"],
15256 |row| row.get_typed::<i64>(0),
15257 )
15258 .unwrap();
15259
15260 let opcodes: Vec<String> = storage
15261 .conn
15262 .query_map_collect(
15263 "EXPLAIN \
15264 SELECT id, idx, role, author, created_at, content \
15265 FROM messages \
15266 WHERE conversation_id = ?1 ORDER BY idx",
15267 fparams![conversation_id],
15268 |row| row.get_typed(1),
15269 )
15270 .unwrap();
15271
15272 assert!(
15273 opcodes.iter().any(|opcode| opcode == "SeekGE"),
15274 "expected lexical rebuild message fetch to seek into the conversation_id/idx access path, got {opcodes:?}"
15275 );
15276 assert!(
15277 !opcodes.iter().any(|opcode| opcode == "SorterOpen"),
15278 "expected lexical rebuild message fetch to avoid sorter temp b-trees, got {opcodes:?}"
15279 );
15280 }
15281
15282 #[test]
15283 fn schema_check_rebuild_classification_ignores_transient_errors() {
15284 assert!(!schema_check_error_requires_rebuild(
15285 &frankensqlite::FrankenError::Busy
15286 ));
15287 assert!(!schema_check_error_requires_rebuild(
15288 &frankensqlite::FrankenError::DatabaseLocked {
15289 path: PathBuf::from("/tmp/test.db"),
15290 }
15291 ));
15292 assert!(!schema_check_error_requires_rebuild(
15293 &frankensqlite::FrankenError::CannotOpen {
15294 path: PathBuf::from("/tmp/test.db"),
15295 }
15296 ));
15297 assert!(!schema_check_error_requires_rebuild(
15298 &frankensqlite::FrankenError::Io(std::io::Error::other("disk hiccup"))
15299 ));
15300 }
15301
15302 #[test]
15303 fn schema_check_rebuild_classification_keeps_corruption_errors() {
15304 assert!(schema_check_error_requires_rebuild(
15305 &frankensqlite::FrankenError::DatabaseCorrupt {
15306 detail: "bad header".to_string(),
15307 }
15308 ));
15309 assert!(schema_check_error_requires_rebuild(
15310 &frankensqlite::FrankenError::WalCorrupt {
15311 detail: "bad wal".to_string(),
15312 }
15313 ));
15314 assert!(schema_check_error_requires_rebuild(
15315 &frankensqlite::FrankenError::NotADatabase {
15316 path: PathBuf::from("/tmp/test.db"),
15317 }
15318 ));
15319 assert!(schema_check_error_requires_rebuild(
15320 &frankensqlite::FrankenError::ShortRead {
15321 expected: 4096,
15322 actual: 64,
15323 }
15324 ));
15325 }
15326
15327 #[test]
15328 fn create_backup_refuses_raw_copy_after_retryable_vacuum_errors() {
15329 let retryable_errors = [
15330 frankensqlite::FrankenError::Busy,
15331 frankensqlite::FrankenError::BusyRecovery,
15332 frankensqlite::FrankenError::BusySnapshot {
15333 conflicting_pages: "1,2".to_string(),
15334 },
15335 frankensqlite::FrankenError::DatabaseLocked {
15336 path: PathBuf::from("/tmp/test.db"),
15337 },
15338 frankensqlite::FrankenError::LockFailed {
15339 detail: "fcntl lock still held".to_string(),
15340 },
15341 frankensqlite::FrankenError::WriteConflict { page: 7, holder: 9 },
15342 frankensqlite::FrankenError::SerializationFailure { page: 11 },
15343 frankensqlite::FrankenError::Internal("database is locked".to_string()),
15344 ];
15345
15346 for err in retryable_errors {
15347 assert!(
15348 backup_vacuum_error_requires_consistent_retry(&err),
15349 "retryable VACUUM failure must not fall back to raw bundle copy: {err}"
15350 );
15351 }
15352
15353 assert!(!backup_vacuum_error_requires_consistent_retry(
15354 &frankensqlite::FrankenError::NotADatabase {
15355 path: PathBuf::from("/tmp/test.db")
15356 }
15357 ));
15358 assert!(!backup_vacuum_error_requires_consistent_retry(
15359 &frankensqlite::FrankenError::DatabaseCorrupt {
15360 detail: "bad header".to_string()
15361 }
15362 ));
15363 }
15364
15365 #[test]
15366 fn create_backup_uses_hidden_vacuum_stage_path() {
15367 let backup_path = PathBuf::from("/tmp/test.db.backup.123.456.0");
15368 let stage_path = vacuum_stage_backup_path(&backup_path);
15369 let stage_name = stage_path
15370 .file_name()
15371 .and_then(|name| name.to_str())
15372 .unwrap_or_default();
15373
15374 assert!(stage_name.starts_with('.'));
15375 assert!(stage_name.ends_with(".vacuum-in-progress"));
15376 assert!(
15377 !is_backup_root_name(stage_name, "test.db.backup."),
15378 "incomplete VACUUM output must not be discoverable as a backup root"
15379 );
15380 }
15381
15382 #[test]
15383 fn create_backup_preserves_content() {
15384 let dir = TempDir::new().unwrap();
15385 let db_path = dir.path().join("test.db");
15386 let original_content = b"test database content 12345";
15387 std::fs::write(&db_path, original_content).unwrap();
15388
15389 let backup_path = create_backup(&db_path).unwrap().unwrap();
15390 let backup_content = std::fs::read(&backup_path).unwrap();
15391 assert_eq!(backup_content, original_content);
15392 }
15393
15394 #[test]
15395 fn create_backup_copies_sidecars_when_present() {
15396 let dir = TempDir::new().unwrap();
15397 let db_path = dir.path().join("test.db");
15398 std::fs::write(&db_path, b"db").unwrap();
15399 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
15400 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
15401
15402 let backup_path = create_backup(&db_path).unwrap().unwrap();
15403
15404 assert_eq!(
15405 std::fs::read(database_sidecar_path(&backup_path, "-wal")).unwrap(),
15406 b"wal"
15407 );
15408 assert_eq!(
15409 std::fs::read(database_sidecar_path(&backup_path, "-shm")).unwrap(),
15410 b"shm"
15411 );
15412 }
15413
15414 #[test]
15415 #[cfg(unix)]
15416 fn create_backup_rejects_symlink_root_during_raw_fallback() {
15417 use std::os::unix::fs::symlink;
15418
15419 let dir = TempDir::new().unwrap();
15420 let outside_db = dir.path().join("outside.db");
15421 let db_path = dir.path().join("test.db");
15422 std::fs::write(&outside_db, b"not sqlite").unwrap();
15423 symlink(&outside_db, &db_path).unwrap();
15424
15425 let err = create_backup(&db_path).unwrap_err();
15426
15427 assert!(
15428 err.to_string().contains("bundle symlink"),
15429 "unexpected error: {err:#}"
15430 );
15431 assert_eq!(std::fs::read(&outside_db).unwrap(), b"not sqlite");
15432 let backup_roots: Vec<_> = std::fs::read_dir(dir.path())
15433 .unwrap()
15434 .filter_map(|entry| entry.ok())
15435 .map(|entry| entry.file_name().to_string_lossy().into_owned())
15436 .filter(|name| name.starts_with("test.db.backup."))
15437 .collect();
15438 assert!(
15439 backup_roots.is_empty(),
15440 "symlinked backup source must not publish backup roots: {backup_roots:?}"
15441 );
15442 }
15443
15444 #[test]
15445 #[cfg(unix)]
15446 fn create_backup_rejects_symlink_sidecar_without_partial_backup() {
15447 use std::os::unix::fs::symlink;
15448
15449 let dir = TempDir::new().unwrap();
15450 let db_path = dir.path().join("test.db");
15451 let outside_wal = dir.path().join("outside.wal");
15452 let wal_path = database_sidecar_path(&db_path, "-wal");
15453 std::fs::write(&db_path, b"not sqlite").unwrap();
15454 std::fs::write(&outside_wal, b"outside wal").unwrap();
15455 symlink(&outside_wal, &wal_path).unwrap();
15456
15457 let err = create_backup(&db_path).unwrap_err();
15458
15459 assert!(
15460 err.to_string().contains("bundle symlink"),
15461 "unexpected error: {err:#}"
15462 );
15463 assert_eq!(std::fs::read(&outside_wal).unwrap(), b"outside wal");
15464 let backup_roots: Vec<_> = std::fs::read_dir(dir.path())
15465 .unwrap()
15466 .filter_map(|entry| entry.ok())
15467 .map(|entry| entry.file_name().to_string_lossy().into_owned())
15468 .filter(|name| name.starts_with("test.db.backup."))
15469 .collect();
15470 assert!(
15471 backup_roots.is_empty(),
15472 "sidecar preflight failure must not leave a partial backup root: {backup_roots:?}"
15473 );
15474 }
15475
15476 #[test]
15481 fn cleanup_old_backups_keeps_recent() {
15482 let dir = TempDir::new().unwrap();
15483 let db_path = dir.path().join("test.db");
15484
15485 for i in 0..5 {
15487 let backup_name = format!("test.db.backup.{}", 1000 + i);
15488 std::fs::write(dir.path().join(&backup_name), format!("backup {i}")).unwrap();
15489 }
15490
15491 cleanup_old_backups(&db_path, 3).unwrap();
15492
15493 let backups: Vec<_> = std::fs::read_dir(dir.path())
15495 .unwrap()
15496 .filter_map(|e| e.ok())
15497 .filter(|e| e.file_name().to_str().unwrap_or("").contains("backup"))
15498 .collect();
15499
15500 assert_eq!(backups.len(), 3);
15501 }
15502
15503 #[test]
15504 fn cleanup_old_backups_ignores_wal_and_shm_sidecars() {
15505 let dir = TempDir::new().unwrap();
15506 let db_path = dir.path().join("test.db");
15507
15508 for i in 0..3 {
15509 let backup_name = format!("test.db.backup.{}", 1000 + i);
15510 let backup_path = dir.path().join(&backup_name);
15511 std::fs::write(&backup_path, format!("backup {i}")).unwrap();
15512 std::fs::write(format!("{}-wal", backup_path.display()), b"wal").unwrap();
15513 std::fs::write(format!("{}-shm", backup_path.display()), b"shm").unwrap();
15514 std::thread::sleep(std::time::Duration::from_millis(20));
15515 }
15516
15517 cleanup_old_backups(&db_path, 2).unwrap();
15518
15519 let mut roots = Vec::new();
15520 let mut wals = Vec::new();
15521 let mut shms = Vec::new();
15522 for entry in std::fs::read_dir(dir.path())
15523 .unwrap()
15524 .filter_map(|e| e.ok())
15525 {
15526 let name = entry.file_name().to_string_lossy().into_owned();
15527 if name.ends_with("-wal") {
15528 wals.push(name);
15529 } else if name.ends_with("-shm") {
15530 shms.push(name);
15531 } else if name.contains("backup") {
15532 roots.push(name);
15533 }
15534 }
15535
15536 assert_eq!(roots.len(), 2, "should keep two backup roots");
15537 assert_eq!(
15538 wals.len(),
15539 2,
15540 "should keep WAL sidecars only for retained backups"
15541 );
15542 assert_eq!(
15543 shms.len(),
15544 2,
15545 "should keep SHM sidecars only for retained backups"
15546 );
15547 }
15548
15549 #[test]
15550 fn move_database_bundle_moves_database_and_sidecars() {
15551 let dir = TempDir::new().unwrap();
15552 let db_path = dir.path().join("test.db");
15553 let backup_path = dir.path().join("test.db.corrupt");
15554
15555 std::fs::write(&db_path, b"db").unwrap();
15556 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
15557 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
15558
15559 let moved = move_database_bundle(&db_path, &backup_path).unwrap();
15560 assert_eq!(
15561 moved,
15562 DatabaseBundleMoveResult {
15563 database: true,
15564 wal: true,
15565 shm: true
15566 }
15567 );
15568 assert!(moved.moved_any());
15569
15570 assert!(!db_path.exists());
15571 assert!(!database_sidecar_path(&db_path, "-wal").exists());
15572 assert!(!database_sidecar_path(&db_path, "-shm").exists());
15573
15574 assert_eq!(std::fs::read(&backup_path).unwrap(), b"db");
15575 assert_eq!(
15576 std::fs::read(database_sidecar_path(&backup_path, "-wal")).unwrap(),
15577 b"wal"
15578 );
15579 assert_eq!(
15580 std::fs::read(database_sidecar_path(&backup_path, "-shm")).unwrap(),
15581 b"shm"
15582 );
15583 }
15584
15585 #[test]
15586 fn move_database_bundle_preserves_orphan_sidecars_without_main_db() {
15587 let dir = TempDir::new().unwrap();
15588 let db_path = dir.path().join("test.db");
15589 let backup_path = dir.path().join("test.db.corrupt");
15590
15591 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
15592 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
15593
15594 let moved = move_database_bundle(&db_path, &backup_path).unwrap();
15595 assert_eq!(
15596 moved,
15597 DatabaseBundleMoveResult {
15598 database: false,
15599 wal: true,
15600 shm: true
15601 }
15602 );
15603 assert!(moved.moved_any());
15604 assert!(!db_path.exists());
15605 assert!(!database_sidecar_path(&db_path, "-wal").exists());
15606 assert!(!database_sidecar_path(&db_path, "-shm").exists());
15607 assert_eq!(
15608 std::fs::read(database_sidecar_path(&backup_path, "-wal")).unwrap(),
15609 b"wal"
15610 );
15611 assert_eq!(
15612 std::fs::read(database_sidecar_path(&backup_path, "-shm")).unwrap(),
15613 b"shm"
15614 );
15615 }
15616
15617 #[test]
15618 #[cfg(unix)]
15619 fn move_database_bundle_moves_dangling_symlink_database_root() {
15620 use std::os::unix::fs::symlink;
15621
15622 let dir = TempDir::new().unwrap();
15623 let db_path = dir.path().join("test.db");
15624 let backup_path = dir.path().join("test.db.corrupt");
15625 let missing_target = dir.path().join("missing-target.db");
15626
15627 symlink(&missing_target, &db_path).unwrap();
15628
15629 let moved = move_database_bundle(&db_path, &backup_path).unwrap();
15630
15631 assert_eq!(
15632 moved,
15633 DatabaseBundleMoveResult {
15634 database: true,
15635 wal: false,
15636 shm: false
15637 }
15638 );
15639 assert!(std::fs::symlink_metadata(&db_path).is_err());
15640 assert!(
15641 std::fs::symlink_metadata(&backup_path)
15642 .unwrap()
15643 .file_type()
15644 .is_symlink()
15645 );
15646 assert!(!missing_target.exists());
15647 }
15648
15649 #[test]
15650 #[cfg(unix)]
15651 fn move_database_bundle_moves_dangling_symlink_sidecars_without_main_db() {
15652 use std::os::unix::fs::symlink;
15653
15654 let dir = TempDir::new().unwrap();
15655 let db_path = dir.path().join("test.db");
15656 let backup_path = dir.path().join("test.db.corrupt");
15657 let missing_wal_target = dir.path().join("missing-wal");
15658 let missing_shm_target = dir.path().join("missing-shm");
15659 let wal_path = database_sidecar_path(&db_path, "-wal");
15660 let shm_path = database_sidecar_path(&db_path, "-shm");
15661
15662 symlink(&missing_wal_target, &wal_path).unwrap();
15663 symlink(&missing_shm_target, &shm_path).unwrap();
15664
15665 let moved = move_database_bundle(&db_path, &backup_path).unwrap();
15666
15667 assert_eq!(
15668 moved,
15669 DatabaseBundleMoveResult {
15670 database: false,
15671 wal: true,
15672 shm: true
15673 }
15674 );
15675 assert!(std::fs::symlink_metadata(&wal_path).is_err());
15676 assert!(std::fs::symlink_metadata(&shm_path).is_err());
15677 assert!(
15678 std::fs::symlink_metadata(database_sidecar_path(&backup_path, "-wal"))
15679 .unwrap()
15680 .file_type()
15681 .is_symlink()
15682 );
15683 assert!(
15684 std::fs::symlink_metadata(database_sidecar_path(&backup_path, "-shm"))
15685 .unwrap()
15686 .file_type()
15687 .is_symlink()
15688 );
15689 assert!(!missing_wal_target.exists());
15690 assert!(!missing_shm_target.exists());
15691 }
15692
15693 #[test]
15694 fn copy_database_bundle_copies_database_and_sidecars() {
15695 let dir = TempDir::new().unwrap();
15696 let db_path = dir.path().join("test.db");
15697 let copied_path = dir.path().join("copy.db");
15698
15699 std::fs::write(&db_path, b"db").unwrap();
15700 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
15701 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
15702
15703 copy_database_bundle(&db_path, &copied_path).unwrap();
15704
15705 assert_eq!(std::fs::read(&copied_path).unwrap(), b"db");
15706 assert_eq!(
15707 std::fs::read(database_sidecar_path(&copied_path, "-wal")).unwrap(),
15708 b"wal"
15709 );
15710 assert_eq!(
15711 std::fs::read(database_sidecar_path(&copied_path, "-shm")).unwrap(),
15712 b"shm"
15713 );
15714 assert_eq!(std::fs::read(&db_path).unwrap(), b"db");
15715 }
15716
15717 #[test]
15718 fn copy_database_bundle_creates_destination_parent() {
15719 let dir = TempDir::new().unwrap();
15720 let db_path = dir.path().join("test.db");
15721 let copied_path = dir.path().join("nested/copies/copy.db");
15722
15723 std::fs::write(&db_path, b"db").unwrap();
15724 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
15725
15726 copy_database_bundle(&db_path, &copied_path).unwrap();
15727
15728 assert!(copied_path.parent().unwrap().is_dir());
15729 assert_eq!(std::fs::read(&copied_path).unwrap(), b"db");
15730 assert_eq!(
15731 std::fs::read(database_sidecar_path(&copied_path, "-wal")).unwrap(),
15732 b"wal"
15733 );
15734 }
15735
15736 #[test]
15737 #[cfg(unix)]
15738 fn copy_database_bundle_rejects_symlink_source_root() {
15739 use std::os::unix::fs::symlink;
15740
15741 let dir = TempDir::new().unwrap();
15742 let outside_db = dir.path().join("outside.db");
15743 let db_path = dir.path().join("test.db");
15744 let copied_path = dir.path().join("copy.db");
15745
15746 std::fs::write(&outside_db, b"outside").unwrap();
15747 symlink(&outside_db, &db_path).unwrap();
15748
15749 let err = copy_database_bundle(&db_path, &copied_path).unwrap_err();
15750
15751 assert!(
15752 err.to_string().contains("bundle symlink"),
15753 "unexpected error: {err:#}"
15754 );
15755 assert!(!copied_path.exists());
15756 assert_eq!(std::fs::read(&outside_db).unwrap(), b"outside");
15757 }
15758
15759 #[test]
15760 #[cfg(unix)]
15761 fn copy_database_bundle_rejects_symlink_sidecar() {
15762 use std::os::unix::fs::symlink;
15763
15764 let dir = TempDir::new().unwrap();
15765 let db_path = dir.path().join("test.db");
15766 let copied_path = dir.path().join("copy.db");
15767 let outside_wal = dir.path().join("outside.wal");
15768 let wal_path = database_sidecar_path(&db_path, "-wal");
15769
15770 std::fs::write(&db_path, b"db").unwrap();
15771 std::fs::write(&outside_wal, b"outside wal").unwrap();
15772 symlink(&outside_wal, &wal_path).unwrap();
15773
15774 let err = copy_database_bundle(&db_path, &copied_path).unwrap_err();
15775
15776 assert!(
15777 err.to_string().contains("bundle symlink"),
15778 "unexpected error: {err:#}"
15779 );
15780 assert_eq!(std::fs::read(&outside_wal).unwrap(), b"outside wal");
15781 assert!(!copied_path.exists());
15782 assert!(!database_sidecar_path(&copied_path, "-wal").exists());
15783 }
15784
15785 #[test]
15786 fn move_database_bundle_creates_destination_parent_and_moves_sidecars() {
15787 let dir = TempDir::new().unwrap();
15788 let db_path = dir.path().join("test.db");
15789 let backup_path = dir.path().join("nested/backups/test.db.corrupt");
15790
15791 std::fs::write(&db_path, b"db").unwrap();
15792 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
15793 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
15794
15795 let moved = move_database_bundle(&db_path, &backup_path).unwrap();
15796 assert_eq!(
15797 moved,
15798 DatabaseBundleMoveResult {
15799 database: true,
15800 wal: true,
15801 shm: true
15802 }
15803 );
15804 assert!(backup_path.parent().unwrap().is_dir());
15805 assert_eq!(std::fs::read(&backup_path).unwrap(), b"db");
15806 assert_eq!(
15807 std::fs::read(database_sidecar_path(&backup_path, "-wal")).unwrap(),
15808 b"wal"
15809 );
15810 assert_eq!(
15811 std::fs::read(database_sidecar_path(&backup_path, "-shm")).unwrap(),
15812 b"shm"
15813 );
15814 }
15815
15816 #[test]
15817 fn remove_database_files_removes_orphan_sidecars_without_main_db() {
15818 let dir = TempDir::new().unwrap();
15819 let db_path = dir.path().join("test.db");
15820
15821 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
15822 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
15823
15824 remove_database_files(&db_path).unwrap();
15825
15826 assert!(!db_path.exists());
15827 assert!(!database_sidecar_path(&db_path, "-wal").exists());
15828 assert!(!database_sidecar_path(&db_path, "-shm").exists());
15829 }
15830
15831 #[test]
15832 fn cleanup_old_backups_ignores_backup_named_directories() {
15833 let dir = TempDir::new().unwrap();
15834 let db_path = dir.path().join("test.db");
15835
15836 for i in 0..3 {
15837 let backup_name = format!("test.db.backup.{}", 1000 + i);
15838 std::fs::write(dir.path().join(&backup_name), format!("backup {i}")).unwrap();
15839 }
15840 std::fs::create_dir(dir.path().join("test.db.backup.directory")).unwrap();
15841
15842 cleanup_old_backups(&db_path, 2).unwrap();
15843
15844 let mut backup_files = Vec::new();
15845 let mut backup_dirs = Vec::new();
15846 for entry in std::fs::read_dir(dir.path())
15847 .unwrap()
15848 .filter_map(|e| e.ok())
15849 {
15850 let name = entry.file_name().to_string_lossy().into_owned();
15851 if !name.starts_with("test.db.backup.") {
15852 continue;
15853 }
15854 if entry.path().is_dir() {
15855 backup_dirs.push(name);
15856 } else {
15857 backup_files.push(name);
15858 }
15859 }
15860
15861 assert_eq!(
15862 backup_files.len(),
15863 2,
15864 "only real backup files count toward retention"
15865 );
15866 assert_eq!(
15867 backup_dirs.len(),
15868 1,
15869 "backup-named directories should be ignored"
15870 );
15871 }
15872
15873 #[test]
15878 fn open_creates_new_database() {
15879 let dir = TempDir::new().unwrap();
15880 let db_path = dir.path().join("new.db");
15881 assert!(!db_path.exists());
15882
15883 let storage = SqliteStorage::open(&db_path).unwrap();
15884 assert!(db_path.exists());
15885 storage.close().unwrap();
15886 }
15887
15888 #[test]
15889 fn open_readonly_fails_for_nonexistent() {
15890 let dir = TempDir::new().unwrap();
15891 let db_path = dir.path().join("nonexistent.db");
15892 let result = SqliteStorage::open_readonly(&db_path);
15893 assert!(result.is_err());
15894 }
15895
15896 #[test]
15897 fn open_readonly_succeeds_for_existing() {
15898 let dir = TempDir::new().unwrap();
15899 let db_path = dir.path().join("existing.db");
15900
15901 let _storage = SqliteStorage::open(&db_path).unwrap();
15903 drop(_storage);
15904
15905 let storage = SqliteStorage::open_readonly(&db_path).unwrap();
15907 assert!(storage.schema_version().is_ok());
15908 }
15909
15910 #[test]
15911 fn reopen_existing_current_schema_is_idempotent() {
15912 let dir = TempDir::new().unwrap();
15913 let db_path = dir.path().join("existing.db");
15914
15915 {
15917 let storage = SqliteStorage::open(&db_path).unwrap();
15918 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
15919 }
15920
15921 let reopened = SqliteStorage::open(&db_path).unwrap();
15923 assert_eq!(
15924 reopened.schema_version().unwrap(),
15925 CURRENT_SCHEMA_VERSION,
15926 "reopening current schema DB should be idempotent"
15927 );
15928 }
15929
15930 #[test]
15931 fn open_or_rebuild_current_schema_does_not_trigger_rebuild() {
15932 let dir = TempDir::new().unwrap();
15933 let db_path = dir.path().join("existing.db");
15934
15935 {
15937 let storage = SqliteStorage::open(&db_path).unwrap();
15938 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
15939 }
15940
15941 let reopened = SqliteStorage::open_or_rebuild(&db_path)
15943 .expect("current schema DB should open without rebuild");
15944 assert_eq!(reopened.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
15945 }
15946
15947 #[test]
15948 fn open_or_rebuild_does_not_treat_non_database_paths_as_corruption() {
15949 let dir = TempDir::new().unwrap();
15950 let db_path = dir.path().join("db_dir");
15951 std::fs::create_dir(&db_path).unwrap();
15952
15953 let result = SqliteStorage::open_or_rebuild(&db_path);
15954
15955 match result {
15956 Err(MigrationError::Database(_)) | Err(MigrationError::Io(_)) => {}
15957 Err(MigrationError::RebuildRequired { reason, .. }) => {
15958 panic!("should not rebuild non-database path: {reason}")
15959 }
15960 Err(MigrationError::Other(msg)) => {
15961 panic!("should preserve underlying open error, got Other: {msg}")
15962 }
15963 Ok(_) => panic!("directory path must not open as a database"),
15964 }
15965
15966 assert!(
15967 db_path.is_dir(),
15968 "non-database directory must be left in place"
15969 );
15970 }
15971
15972 #[test]
15977 fn schema_version_returns_current() {
15978 let dir = TempDir::new().unwrap();
15979 let db_path = dir.path().join("test.db");
15980 let storage = SqliteStorage::open(&db_path).unwrap();
15981 let version = storage.schema_version().unwrap();
15982 assert!(version >= 5, "Schema version should be at least 5");
15983 }
15984
15985 #[test]
15990 fn migration_v13_creates_analytics_tables() {
15991 let dir = TempDir::new().unwrap();
15992 let db_path = dir.path().join("test.db");
15993 let storage = SqliteStorage::open(&db_path).unwrap();
15994
15995 let version = storage.schema_version().unwrap();
15997 assert_eq!(
15998 version, CURRENT_SCHEMA_VERSION,
15999 "Schema version must match CURRENT_SCHEMA_VERSION after migration"
16000 );
16001
16002 let conn = storage.raw();
16003
16004 fn col_names(conn: &FrankenConnection, table: &str) -> Vec<String> {
16006 conn.query_map_collect(
16007 &format!("PRAGMA table_info({})", table),
16008 fparams![],
16009 |row: &FrankenRow| row.get_typed(1),
16010 )
16011 .unwrap()
16012 }
16013
16014 fn idx_names(conn: &FrankenConnection, table: &str) -> Vec<String> {
16016 conn.query_map_collect(
16017 &format!("PRAGMA index_list({})", table),
16018 fparams![],
16019 |row: &FrankenRow| row.get_typed(1),
16020 )
16021 .unwrap()
16022 }
16023
16024 let mm_cols = col_names(conn, "message_metrics");
16026 for expected in &[
16027 "message_id",
16028 "hour_id",
16029 "day_id",
16030 "content_tokens_est",
16031 "model_name",
16032 "model_family",
16033 "model_tier",
16034 "provider",
16035 "api_input_tokens",
16036 "has_plan",
16037 "agent_slug",
16038 "role",
16039 "api_data_source",
16040 ] {
16041 assert!(
16042 mm_cols.contains(&expected.to_string()),
16043 "message_metrics missing column: {expected}"
16044 );
16045 }
16046
16047 let uh_cols = col_names(conn, "usage_hourly");
16049 for expected in &[
16050 "hour_id",
16051 "plan_message_count",
16052 "plan_content_tokens_est_total",
16053 "plan_api_tokens_total",
16054 "api_coverage_message_count",
16055 "content_tokens_est_user",
16056 "api_thinking_tokens_total",
16057 ] {
16058 assert!(
16059 uh_cols.contains(&expected.to_string()),
16060 "usage_hourly missing column: {expected}"
16061 );
16062 }
16063
16064 let ud_cols = col_names(conn, "usage_daily");
16066 for expected in &[
16067 "day_id",
16068 "plan_content_tokens_est_total",
16069 "plan_api_tokens_total",
16070 "api_thinking_tokens_total",
16071 "content_tokens_est_assistant",
16072 "message_count",
16073 ] {
16074 assert!(
16075 ud_cols.contains(&expected.to_string()),
16076 "usage_daily missing column: {expected}"
16077 );
16078 }
16079
16080 let umd_cols = col_names(conn, "usage_models_daily");
16082 for expected in &[
16083 "day_id",
16084 "model_family",
16085 "model_tier",
16086 "message_count",
16087 "api_tokens_total",
16088 "api_coverage_message_count",
16089 ] {
16090 assert!(
16091 umd_cols.contains(&expected.to_string()),
16092 "usage_models_daily missing column: {expected}"
16093 );
16094 }
16095
16096 let mm_idxs = idx_names(conn, "message_metrics");
16098 assert!(
16099 mm_idxs.iter().any(|n| n.contains("idx_mm_hour")),
16100 "message_metrics must have hour index"
16101 );
16102 assert!(
16103 mm_idxs.iter().any(|n| n.contains("idx_mm_agent_day")),
16104 "message_metrics must have agent+day index"
16105 );
16106 assert!(
16107 mm_idxs
16108 .iter()
16109 .any(|n| n.contains("idx_mm_model_family_day")),
16110 "message_metrics must have model_family+day index"
16111 );
16112
16113 let uh_idxs = idx_names(conn, "usage_hourly");
16115 assert!(
16116 uh_idxs.iter().any(|n| n.contains("idx_uh_agent")),
16117 "usage_hourly must have agent index"
16118 );
16119
16120 let ud_idxs = idx_names(conn, "usage_daily");
16122 assert!(
16123 ud_idxs.iter().any(|n| n.contains("idx_ud_agent")),
16124 "usage_daily must have agent index"
16125 );
16126
16127 let umd_idxs = idx_names(conn, "usage_models_daily");
16129 assert!(
16130 umd_idxs.iter().any(|n| n.contains("idx_umd_model_day")),
16131 "usage_models_daily must have model+day index"
16132 );
16133
16134 let conversation_cols = col_names(conn, "conversations");
16135 assert!(
16136 conversation_cols.contains(&"last_message_idx".to_string())
16137 && conversation_cols.contains(&"last_message_created_at".to_string()),
16138 "fresh schema must include V15 tail columns without ALTER TABLE on conversations"
16139 );
16140 let fts_schema_rows: i64 = conn
16141 .query_row_map(
16142 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
16143 fparams![],
16144 |row: &FrankenRow| row.get_typed(0),
16145 )
16146 .unwrap();
16147 assert_eq!(
16148 fts_schema_rows, 0,
16149 "fresh schema should not create and immediately drop derived fts_messages"
16150 );
16151 let integrity: Vec<String> = conn
16152 .query_map_collect("PRAGMA integrity_check;", fparams![], |row: &FrankenRow| {
16153 row.get_typed(0)
16154 })
16155 .unwrap();
16156 assert_eq!(
16157 integrity,
16158 vec!["ok".to_string()],
16159 "fresh schema must pass SQLite integrity_check"
16160 );
16161 }
16162
16163 #[test]
16164 fn hour_id_round_trip() {
16165 let ts_ms = 1_770_508_800_000_i64;
16167 let hour_id = SqliteStorage::hour_id_from_millis(ts_ms);
16168 let day_id = SqliteStorage::day_id_from_millis(ts_ms);
16169
16170 assert_eq!(hour_id / 24, day_id, "hour_id/24 should equal day_id");
16172
16173 let back = SqliteStorage::millis_from_hour_id(hour_id);
16175 assert!(
16176 back <= ts_ms && ts_ms - back < 3_600_000,
16177 "Round-trip should land within the same hour"
16178 );
16179 }
16180
16181 #[test]
16182 fn day_and_hour_ids_floor_negative_millis() {
16183 let ts_ms = -1_i64;
16186 let expected_secs = -1_i64;
16187 let epoch_2020_secs = 1_577_836_800_i64;
16188
16189 assert_eq!(
16190 SqliteStorage::day_id_from_millis(ts_ms),
16191 (expected_secs - epoch_2020_secs).div_euclid(86_400)
16192 );
16193 assert_eq!(
16194 SqliteStorage::hour_id_from_millis(ts_ms),
16195 (expected_secs - epoch_2020_secs).div_euclid(3_600)
16196 );
16197 }
16198
16199 #[test]
16200 fn migration_v13_from_v10() {
16201 let dir = TempDir::new().unwrap();
16202 let db_path = dir.path().join("test.db");
16203
16204 {
16206 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
16207 conn.execute_batch("PRAGMA journal_mode=WAL;").unwrap();
16208 conn.execute_batch(
16209 "CREATE TABLE IF NOT EXISTS meta (key TEXT PRIMARY KEY, value TEXT);",
16210 )
16211 .unwrap();
16212 conn.execute("INSERT OR REPLACE INTO meta(key, value) VALUES('schema_version', '10')")
16213 .unwrap();
16214 conn.execute_batch(MIGRATION_V1).unwrap();
16219 conn.execute_batch(MIGRATION_V2).unwrap();
16220 conn.execute_batch(MIGRATION_V4).unwrap();
16221 conn.execute_batch(MIGRATION_V5).unwrap();
16222 conn.execute_batch(MIGRATION_V6).unwrap();
16223 conn.execute_batch(MIGRATION_V7).unwrap();
16224 conn.execute_batch(MIGRATION_V8).unwrap();
16225 conn.execute_batch(MIGRATION_V9).unwrap();
16226 conn.execute_batch(MIGRATION_V10).unwrap();
16227 conn.execute("UPDATE meta SET value = '10' WHERE key = 'schema_version'")
16228 .unwrap();
16229 }
16230 materialize_fresh_fts_schema_via_rusqlite(&db_path).unwrap();
16231
16232 let storage = SqliteStorage::open(&db_path).unwrap();
16234 let version = storage.schema_version().unwrap();
16235 assert_eq!(
16236 version, CURRENT_SCHEMA_VERSION,
16237 "Should have migrated from v10 to the current schema"
16238 );
16239
16240 let count: i64 = storage
16242 .raw()
16243 .query_row_map(
16244 "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name IN ('message_metrics', 'usage_hourly', 'usage_daily', 'usage_models_daily')",
16245 &[],
16246 |row: &FrankenRow| row.get_typed::<i64>(0),
16247 )
16248 .unwrap();
16249 assert_eq!(count, 4, "All 4 analytics tables should exist");
16250 }
16251
16252 #[test]
16257 fn analytics_ingest_populates_metrics_and_rollups() {
16258 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
16259 use std::path::PathBuf;
16260
16261 let dir = TempDir::new().unwrap();
16262 let db_path = dir.path().join("test.db");
16263 let storage = SqliteStorage::open(&db_path).unwrap();
16264
16265 let agent = Agent {
16267 id: None,
16268 slug: "claude_code".into(),
16269 name: "Claude Code".into(),
16270 version: Some("1.0".into()),
16271 kind: AgentKind::Cli,
16272 };
16273 let agent_id = storage.ensure_agent(&agent).unwrap();
16274
16275 let ts_ms = 1_770_551_400_000_i64;
16278 let expected_day = SqliteStorage::day_id_from_millis(ts_ms);
16279 let expected_hour = SqliteStorage::hour_id_from_millis(ts_ms);
16280
16281 let usage_json = serde_json::json!({
16283 "message": {
16284 "model": "claude-opus-4-6",
16285 "usage": {
16286 "input_tokens": 100,
16287 "output_tokens": 50,
16288 "cache_read_input_tokens": 200,
16289 "cache_creation_input_tokens": 30,
16290 "service_tier": "standard"
16291 }
16292 }
16293 });
16294
16295 let conv = Conversation {
16296 id: None,
16297 agent_slug: "claude_code".into(),
16298 workspace: None,
16299 external_id: Some("test-conv-1".into()),
16300 title: Some("Test conversation".into()),
16301 source_path: PathBuf::from("/tmp/test.jsonl"),
16302 started_at: Some(ts_ms),
16303 ended_at: Some(ts_ms + 60_000),
16304 approx_tokens: None,
16305 metadata_json: serde_json::Value::Null,
16306 messages: vec![
16307 Message {
16308 id: None,
16309 idx: 0,
16310 role: MessageRole::User,
16311 author: None,
16312 created_at: Some(ts_ms),
16313 content: "Hello, can you help me with a plan?".into(),
16314 extra_json: serde_json::Value::Null,
16315 snippets: vec![],
16316 },
16317 Message {
16318 id: None,
16319 idx: 1,
16320 role: MessageRole::Agent,
16321 author: None,
16322 created_at: Some(ts_ms + 30_000),
16323 content: "## Plan\n\n1. First step\n2. Second step\n3. Third step".into(),
16324 extra_json: usage_json,
16325 snippets: vec![],
16326 },
16327 Message {
16328 id: None,
16329 idx: 2,
16330 role: MessageRole::User,
16331 author: None,
16332 created_at: Some(ts_ms + 60_000),
16333 content: "Great, let's proceed!".into(),
16334 extra_json: serde_json::Value::Null,
16335 snippets: vec![],
16336 },
16337 ],
16338 source_id: "local".into(),
16339 origin_host: None,
16340 };
16341
16342 let outcomes = storage
16343 .insert_conversations_batched(&[(agent_id, None, &conv)])
16344 .unwrap();
16345 assert_eq!(outcomes.len(), 1);
16346 assert_eq!(outcomes[0].inserted_indices.len(), 3);
16347
16348 let conn = storage.raw();
16349
16350 let mm_count: i64 = conn
16352 .query_row_map("SELECT COUNT(*) FROM message_metrics", &[], |row| {
16353 row.get_typed::<i64>(0)
16354 })
16355 .unwrap();
16356 assert_eq!(mm_count, 3, "Should have 3 message_metrics rows");
16357
16358 #[allow(clippy::type_complexity)]
16360 let rows: Vec<(i64, i64, String, i64, i64, String, String, String, String)> = conn
16361 .query_map_collect(
16362 "SELECT hour_id, day_id, role, content_tokens_est, has_plan, api_data_source, model_family, model_tier, provider FROM message_metrics ORDER BY message_id",
16363 fparams![],
16364 |row: &FrankenRow| {
16365 Ok((
16366 row.get_typed(0)?,
16367 row.get_typed(1)?,
16368 row.get_typed(2)?,
16369 row.get_typed(3)?,
16370 row.get_typed(4)?,
16371 row.get_typed(5)?,
16372 row.get_typed(6)?,
16373 row.get_typed(7)?,
16374 row.get_typed(8)?,
16375 ))
16376 },
16377 )
16378 .unwrap();
16379
16380 assert_eq!(rows.len(), 3);
16381 assert_eq!(rows[0].0, expected_hour);
16383 assert_eq!(rows[0].1, expected_day);
16384 assert_eq!(rows[0].2, "user");
16386 assert_eq!(
16388 rows[1].4, 1,
16389 "Assistant message with plan should have has_plan=1"
16390 );
16391 assert_eq!(
16393 rows[1].5, "api",
16394 "Claude Code assistant message should have api data source"
16395 );
16396 assert_eq!(rows[0].5, "estimated");
16398 assert_eq!(rows[2].5, "estimated");
16399 assert_eq!(rows[1].6, "claude");
16400 assert_eq!(rows[1].7, "opus");
16401 assert_eq!(rows[1].8, "anthropic");
16402 assert_eq!(rows[0].6, "unknown");
16403 let user_chars = "Hello, can you help me with a plan?".len() as i64;
16405 assert_eq!(rows[0].3, user_chars / 4);
16406
16407 let (uh_msg, uh_user, uh_asst, uh_plan, uh_plan_content, uh_plan_api, uh_api_cov): (
16409 i64,
16410 i64,
16411 i64,
16412 i64,
16413 i64,
16414 i64,
16415 i64,
16416 ) = conn
16417 .query_row_map(
16418 "SELECT message_count, user_message_count, assistant_message_count, plan_message_count,
16419 plan_content_tokens_est_total, plan_api_tokens_total, api_coverage_message_count
16420 FROM usage_hourly WHERE hour_id = ?",
16421 fparams![expected_hour],
16422 |row: &FrankenRow| {
16423 Ok((
16424 row.get_typed(0)?,
16425 row.get_typed(1)?,
16426 row.get_typed(2)?,
16427 row.get_typed(3)?,
16428 row.get_typed(4)?,
16429 row.get_typed(5)?,
16430 row.get_typed(6)?,
16431 ))
16432 },
16433 )
16434 .unwrap();
16435 assert_eq!(uh_msg, 3, "Hourly rollup should have 3 messages");
16436 assert_eq!(uh_user, 2, "Hourly rollup should have 2 user messages");
16437 assert_eq!(uh_asst, 1, "Hourly rollup should have 1 assistant message");
16438 assert_eq!(uh_plan, 1, "Hourly rollup should have 1 plan message");
16439 assert!(
16440 uh_plan_content > 0,
16441 "Hourly rollup should include plan content tokens"
16442 );
16443 assert!(
16444 uh_plan_api > 0,
16445 "Hourly rollup should include plan API tokens"
16446 );
16447 assert_eq!(
16448 uh_api_cov, 1,
16449 "Hourly rollup should have 1 API-covered message"
16450 );
16451
16452 let (ud_msg, ud_api_cov): (i64, i64) = conn
16454 .query_row_map(
16455 "SELECT message_count, api_coverage_message_count FROM usage_daily WHERE day_id = ?",
16456 fparams![expected_day],
16457 |row: &FrankenRow| Ok((row.get_typed(0)?, row.get_typed(1)?)),
16458 )
16459 .unwrap();
16460 assert_eq!(ud_msg, 3, "Daily rollup should match hourly");
16461 assert_eq!(
16462 ud_api_cov, 1,
16463 "Daily api_coverage should be 1 (only assistant msg has real API data)"
16464 );
16465
16466 let api_only_input: i64 = conn
16468 .query_row_map(
16469 "SELECT COALESCE(SUM(api_input_tokens), 0) FROM message_metrics WHERE day_id = ? AND api_data_source = 'api'",
16470 fparams![expected_day],
16471 |row: &FrankenRow| row.get_typed::<i64>(0),
16472 )
16473 .unwrap();
16474 assert_eq!(
16475 api_only_input, 100,
16476 "Only API-sourced input tokens should be 100"
16477 );
16478
16479 let mm_total_content_est: i64 = conn
16481 .query_row_map(
16482 "SELECT SUM(content_tokens_est) FROM message_metrics WHERE day_id = ?",
16483 fparams![expected_day],
16484 |row| row.get_typed::<i64>(0),
16485 )
16486 .unwrap();
16487 let mm_plan_content_est: i64 = conn
16488 .query_row_map(
16489 "SELECT COALESCE(SUM(content_tokens_est), 0) FROM message_metrics WHERE day_id = ? AND has_plan = 1",
16490 fparams![expected_day],
16491 |row: &FrankenRow| row.get_typed::<i64>(0),
16492 )
16493 .unwrap();
16494 let mm_plan_api_total: i64 = conn
16495 .query_row_map(
16496 "SELECT COALESCE(SUM(COALESCE(api_input_tokens, 0) + COALESCE(api_output_tokens, 0) + COALESCE(api_cache_read_tokens, 0) + COALESCE(api_cache_creation_tokens, 0) + COALESCE(api_thinking_tokens, 0)), 0)
16497 FROM message_metrics WHERE day_id = ? AND has_plan = 1 AND api_data_source = 'api'",
16498 fparams![expected_day],
16499 |row: &FrankenRow| row.get_typed::<i64>(0),
16500 )
16501 .unwrap();
16502 let ud_content_est: i64 = conn
16503 .query_row_map(
16504 "SELECT content_tokens_est_total FROM usage_daily WHERE day_id = ?",
16505 fparams![expected_day],
16506 |row| row.get_typed::<i64>(0),
16507 )
16508 .unwrap();
16509 let (ud_plan_content_est, ud_plan_api_total): (i64, i64) = conn
16510 .query_row_map(
16511 "SELECT plan_content_tokens_est_total, plan_api_tokens_total FROM usage_daily WHERE day_id = ?",
16512 fparams![expected_day],
16513 |row: &FrankenRow| Ok((row.get_typed(0)?, row.get_typed(1)?)),
16514 )
16515 .unwrap();
16516 assert_eq!(
16517 mm_total_content_est, ud_content_est,
16518 "Daily rollup content_tokens_est_total must equal SUM of message_metrics"
16519 );
16520 assert_eq!(
16521 mm_plan_content_est, ud_plan_content_est,
16522 "Daily rollup plan_content_tokens_est_total must equal planned message_metrics content sum"
16523 );
16524 assert_eq!(
16525 mm_plan_api_total, ud_plan_api_total,
16526 "Daily rollup plan_api_tokens_total must equal planned message_metrics API token sum"
16527 );
16528
16529 let (claude_msg, claude_user, claude_asst, claude_api_total, claude_api_cov): (
16531 i64,
16532 i64,
16533 i64,
16534 i64,
16535 i64,
16536 ) = conn
16537 .query_row_map(
16538 "SELECT message_count, user_message_count, assistant_message_count, api_tokens_total, api_coverage_message_count
16539 FROM usage_models_daily
16540 WHERE day_id = ? AND model_family = 'claude' AND model_tier = 'opus'",
16541 fparams![expected_day],
16542 |row: &FrankenRow| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?, row.get_typed(3)?, row.get_typed(4)?)),
16543 )
16544 .unwrap();
16545 assert_eq!(claude_msg, 1);
16546 assert_eq!(claude_user, 0);
16547 assert_eq!(claude_asst, 1);
16548 assert_eq!(claude_api_total, 380);
16549 assert_eq!(claude_api_cov, 1);
16550
16551 let unknown_msg: i64 = conn
16552 .query_row_map(
16553 "SELECT message_count FROM usage_models_daily
16554 WHERE day_id = ? AND model_family = 'unknown' AND model_tier = 'unknown'",
16555 fparams![expected_day],
16556 |row| row.get_typed(0),
16557 )
16558 .unwrap();
16559 assert_eq!(
16560 unknown_msg, 2,
16561 "user messages should land in unknown model bucket"
16562 );
16563 }
16564
16565 #[test]
16566 fn has_plan_heuristic_detects_plans() {
16567 assert!(has_plan_heuristic(
16568 "## Plan\n\n1. First step\n2. Second step"
16569 ));
16570 assert!(has_plan_heuristic(
16571 "# Plan\nHere is what we will do:\n1. Step one\n2. Step two"
16572 ));
16573 assert!(has_plan_heuristic(
16574 "Plan:\n- Gather baseline\n- Implement changes\n- Validate with tests"
16575 ));
16576 assert!(has_plan_heuristic(
16577 "Next steps:\n1. Update schema\n2. Rebuild rollups"
16578 ));
16579 assert!(!has_plan_heuristic("Hello world"));
16580 assert!(!has_plan_heuristic("Short"));
16581 assert!(!has_plan_heuristic(
16582 "This is a regular message without plans"
16583 ));
16584 assert!(!has_plan_heuristic(
16585 "```json\n{\"tool\":\"shell\",\"stdout\":\"1. install\\n2. run\"}\n```"
16586 ));
16587 }
16588
16589 #[test]
16590 fn has_plan_for_role_only_counts_assistant_messages() {
16591 let plan_text = "## Plan\n1. First\n2. Second";
16592 assert!(has_plan_for_role("assistant", plan_text));
16593 assert!(has_plan_for_role("agent", plan_text));
16594 assert!(has_plan_for_role("Assistant", plan_text));
16595 assert!(!has_plan_for_role("user", plan_text));
16596 assert!(!has_plan_for_role("tool", plan_text));
16597 }
16598
16599 #[test]
16600 fn api_rollups_require_api_data_source() {
16601 let mut agg = AnalyticsRollupAggregator::new();
16602
16603 let estimated_plan = MessageMetricsEntry {
16604 message_id: 1,
16605 created_at_ms: 0,
16606 hour_id: 1,
16607 day_id: 1,
16608 agent_slug: "codex".into(),
16609 workspace_id: 0,
16610 source_id: "local".into(),
16611 role: "assistant".into(),
16612 content_chars: 120,
16613 content_tokens_est: 30,
16614 model_name: None,
16615 model_family: "unknown".into(),
16616 model_tier: "unknown".into(),
16617 provider: "unknown".into(),
16618 api_input_tokens: Some(100),
16619 api_output_tokens: Some(50),
16620 api_cache_read_tokens: Some(0),
16621 api_cache_creation_tokens: Some(0),
16622 api_thinking_tokens: Some(0),
16623 api_service_tier: None,
16624 api_data_source: "estimated".into(),
16625 tool_call_count: 0,
16626 has_tool_calls: false,
16627 has_plan: true,
16628 };
16629 agg.record(&estimated_plan);
16630
16631 let api_plan = MessageMetricsEntry {
16632 message_id: 2,
16633 created_at_ms: 0,
16634 hour_id: 1,
16635 day_id: 1,
16636 agent_slug: "codex".into(),
16637 workspace_id: 0,
16638 source_id: "local".into(),
16639 role: "assistant".into(),
16640 content_chars: 80,
16641 content_tokens_est: 20,
16642 model_name: None,
16643 model_family: "unknown".into(),
16644 model_tier: "unknown".into(),
16645 provider: "unknown".into(),
16646 api_input_tokens: Some(40),
16647 api_output_tokens: Some(10),
16648 api_cache_read_tokens: Some(0),
16649 api_cache_creation_tokens: Some(0),
16650 api_thinking_tokens: Some(0),
16651 api_service_tier: None,
16652 api_data_source: "api".into(),
16653 tool_call_count: 0,
16654 has_tool_calls: false,
16655 has_plan: true,
16656 };
16657 agg.record(&api_plan);
16658
16659 let key = (1_i64, "codex".to_string(), 0_i64, "local".to_string());
16660 let hourly = agg.hourly.get(&key).expect("hourly rollup key must exist");
16661 let daily = agg.daily.get(&key).expect("daily rollup key must exist");
16662 let model_key = (
16663 1_i64,
16664 "codex".to_string(),
16665 0_i64,
16666 "local".to_string(),
16667 "unknown".to_string(),
16668 "unknown".to_string(),
16669 );
16670 let models_daily = agg
16671 .models_daily
16672 .get(&model_key)
16673 .expect("model rollup key must exist");
16674
16675 assert_eq!(hourly.plan_message_count, 2);
16677 assert_eq!(hourly.plan_content_tokens_est_total, 50);
16678 assert_eq!(hourly.plan_api_tokens_total, 50);
16680 assert_eq!(daily.plan_api_tokens_total, 50);
16681 assert_eq!(models_daily.plan_api_tokens_total, 50);
16682 assert_eq!(hourly.api_tokens_total, 50);
16684 assert_eq!(hourly.api_input_tokens_total, 40);
16685 assert_eq!(hourly.api_output_tokens_total, 10);
16686 assert_eq!(hourly.api_coverage_message_count, 1);
16687 assert_eq!(daily.api_tokens_total, 50);
16688 assert_eq!(models_daily.api_tokens_total, 50);
16689 }
16690
16691 #[test]
16692 fn has_plan_heuristic_curated_corpus_thresholds() {
16693 let positives = [
16695 "## Plan\n1. Inspect current schema\n2. Add migration\n3. Verify rebuild",
16696 "Plan:\n1) Reproduce\n2) Patch\n3) Add tests",
16697 "Implementation plan:\n- Parse inputs\n- Update rollups\n- Run checks",
16698 "Next steps:\n1. Reserve file\n2. Implement\n3. Report status",
16699 "# Plan\n1. Gather requirements\n2. Ship changes",
16700 "Action plan:\n- Identify root cause\n- Fix it\n- Validate",
16701 ];
16702
16703 let negatives = [
16705 "The plan is to move fast and fix things later.",
16706 "```json\n{\"tool\":\"shell\",\"stdout\":\"1. ls\\n2. cat\"}\n```",
16707 "stdout:\n1. Build started\n2. Build finished\nexit code: 0",
16708 "I can help with that request. Let me know if you want details.",
16709 "Here is a list:\n- apples\n- oranges",
16710 "Status update: completed tasks and blockers below.",
16711 ];
16712
16713 let tp = positives
16714 .iter()
16715 .filter(|msg| has_plan_heuristic(msg))
16716 .count();
16717 let fp = negatives
16718 .iter()
16719 .filter(|msg| has_plan_heuristic(msg))
16720 .count();
16721
16722 let recall = tp as f64 / positives.len() as f64;
16723 let false_positive_rate = fp as f64 / negatives.len() as f64;
16724
16725 assert!(
16726 recall >= 0.80,
16727 "plan heuristic recall too low: got {recall:.2}"
16728 );
16729 assert!(
16730 false_positive_rate <= 0.20,
16731 "plan heuristic false-positive rate too high: got {false_positive_rate:.2}"
16732 );
16733 }
16734
16735 #[test]
16736 fn rebuild_analytics_repopulates_from_messages() {
16737 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
16738 use std::path::PathBuf;
16739
16740 let dir = TempDir::new().unwrap();
16741 let db_path = dir.path().join("test.db");
16742 let storage = SqliteStorage::open(&db_path).unwrap();
16743
16744 let agent = Agent {
16746 id: None,
16747 slug: "claude_code".into(),
16748 name: "Claude Code".into(),
16749 version: Some("1.0".into()),
16750 kind: AgentKind::Cli,
16751 };
16752 let agent_id = storage.ensure_agent(&agent).unwrap();
16753
16754 let ts_ms = 1_770_551_400_000_i64;
16756 let expected_day = SqliteStorage::day_id_from_millis(ts_ms);
16757 let expected_hour = SqliteStorage::hour_id_from_millis(ts_ms);
16758
16759 let usage_json = serde_json::json!({
16760 "message": {
16761 "model": "claude-opus-4-6",
16762 "usage": {
16763 "input_tokens": 100,
16764 "output_tokens": 50,
16765 "cache_read_input_tokens": 200,
16766 "cache_creation_input_tokens": 30,
16767 "service_tier": "standard"
16768 }
16769 }
16770 });
16771
16772 let conv = Conversation {
16773 id: None,
16774 agent_slug: "claude_code".into(),
16775 workspace: None,
16776 external_id: Some("test-rebuild-1".into()),
16777 title: Some("Test conversation".into()),
16778 source_path: PathBuf::from("/tmp/test.jsonl"),
16779 started_at: Some(ts_ms),
16780 ended_at: Some(ts_ms + 60_000),
16781 approx_tokens: None,
16782 metadata_json: serde_json::Value::Null,
16783 messages: vec![
16784 Message {
16785 id: None,
16786 idx: 0,
16787 role: MessageRole::User,
16788 author: None,
16789 created_at: Some(ts_ms),
16790 content: "Hello, can you help me with a plan?".into(),
16791 extra_json: serde_json::Value::Null,
16792 snippets: vec![],
16793 },
16794 Message {
16795 id: None,
16796 idx: 1,
16797 role: MessageRole::Agent,
16798 author: None,
16799 created_at: Some(ts_ms + 30_000),
16800 content: "## Plan\n\n1. First step\n2. Second step\n3. Third step".into(),
16801 extra_json: usage_json,
16802 snippets: vec![],
16803 },
16804 Message {
16805 id: None,
16806 idx: 2,
16807 role: MessageRole::User,
16808 author: None,
16809 created_at: Some(ts_ms + 60_000),
16810 content: "Great, let's proceed!".into(),
16811 extra_json: serde_json::Value::Null,
16812 snippets: vec![],
16813 },
16814 ],
16815 source_id: "local".into(),
16816 origin_host: None,
16817 };
16818
16819 storage
16820 .insert_conversations_batched(&[(agent_id, None, &conv)])
16821 .unwrap();
16822
16823 let conn = storage.raw();
16825 let orig_mm: i64 = conn
16826 .query_row_map("SELECT COUNT(*) FROM message_metrics", &[], |row| {
16827 row.get_typed(0)
16828 })
16829 .unwrap();
16830 let orig_hourly: i64 = conn
16831 .query_row_map("SELECT COUNT(*) FROM usage_hourly", &[], |row| {
16832 row.get_typed(0)
16833 })
16834 .unwrap();
16835 let orig_daily: i64 = conn
16836 .query_row_map("SELECT COUNT(*) FROM usage_daily", &[], |row| {
16837 row.get_typed(0)
16838 })
16839 .unwrap();
16840 let orig_models_daily: i64 = conn
16841 .query_row_map("SELECT COUNT(*) FROM usage_models_daily", &[], |row| {
16842 row.get_typed(0)
16843 })
16844 .unwrap();
16845 let orig_api_input: i64 = conn
16846 .query_row_map(
16847 "SELECT COALESCE(SUM(api_input_tokens), 0) FROM message_metrics WHERE api_data_source = 'api'",
16848 &[],
16849 |row: &FrankenRow| row.get_typed(0),
16850 )
16851 .unwrap();
16852
16853 assert_eq!(orig_mm, 3);
16854 assert!(orig_hourly > 0);
16855 assert!(orig_daily > 0);
16856 assert!(orig_models_daily > 0);
16857
16858 conn.execute("DELETE FROM message_metrics").unwrap();
16860 conn.execute("DELETE FROM usage_hourly").unwrap();
16861 conn.execute("DELETE FROM usage_daily").unwrap();
16862 conn.execute("DELETE FROM usage_models_daily").unwrap();
16863
16864 let zero: i64 = conn
16866 .query_row_map("SELECT COUNT(*) FROM message_metrics", &[], |row| {
16867 row.get_typed(0)
16868 })
16869 .unwrap();
16870 assert_eq!(zero, 0);
16871
16872 let result = storage.rebuild_analytics().unwrap();
16874
16875 assert_eq!(result.message_metrics_rows, 3);
16876 assert!(result.usage_hourly_rows > 0);
16877 assert!(result.usage_daily_rows > 0);
16878 assert!(result.usage_models_daily_rows > 0);
16879 assert!(
16880 result.elapsed_ms < 10_000,
16881 "Rebuild should be fast for 3 msgs"
16882 );
16883
16884 let conn = storage.raw();
16886 let rebuilt_mm: i64 = conn
16887 .query_row_map("SELECT COUNT(*) FROM message_metrics", &[], |row| {
16888 row.get_typed(0)
16889 })
16890 .unwrap();
16891 assert_eq!(
16892 rebuilt_mm, orig_mm,
16893 "Rebuilt message_metrics count should match"
16894 );
16895
16896 let rebuilt_hourly: i64 = conn
16897 .query_row_map("SELECT COUNT(*) FROM usage_hourly", &[], |row| {
16898 row.get_typed(0)
16899 })
16900 .unwrap();
16901 assert_eq!(
16902 rebuilt_hourly, orig_hourly,
16903 "Rebuilt hourly rows should match"
16904 );
16905
16906 let rebuilt_daily: i64 = conn
16907 .query_row_map("SELECT COUNT(*) FROM usage_daily", &[], |row| {
16908 row.get_typed(0)
16909 })
16910 .unwrap();
16911 assert_eq!(rebuilt_daily, orig_daily, "Rebuilt daily rows should match");
16912
16913 let rebuilt_models_daily: i64 = conn
16914 .query_row_map("SELECT COUNT(*) FROM usage_models_daily", &[], |row| {
16915 row.get_typed(0)
16916 })
16917 .unwrap();
16918 assert_eq!(
16919 rebuilt_models_daily, orig_models_daily,
16920 "Rebuilt model rollup rows should match"
16921 );
16922
16923 let rebuilt_api_input: i64 = conn
16925 .query_row_map(
16926 "SELECT COALESCE(SUM(api_input_tokens), 0) FROM message_metrics WHERE api_data_source = 'api'",
16927 &[],
16928 |row: &FrankenRow| row.get_typed(0),
16929 )
16930 .unwrap();
16931 assert_eq!(
16932 rebuilt_api_input, orig_api_input,
16933 "Rebuilt API input tokens should match original"
16934 );
16935
16936 let (uh_msg, uh_user, uh_asst, uh_plan, uh_plan_content, uh_plan_api): (
16938 i64,
16939 i64,
16940 i64,
16941 i64,
16942 i64,
16943 i64,
16944 ) = conn
16945 .query_row_map(
16946 "SELECT message_count, user_message_count, assistant_message_count, plan_message_count,
16947 plan_content_tokens_est_total, plan_api_tokens_total
16948 FROM usage_hourly WHERE hour_id = ?",
16949 fparams![expected_hour],
16950 |row: &FrankenRow| {
16951 Ok((
16952 row.get_typed(0)?,
16953 row.get_typed(1)?,
16954 row.get_typed(2)?,
16955 row.get_typed(3)?,
16956 row.get_typed(4)?,
16957 row.get_typed(5)?,
16958 ))
16959 },
16960 )
16961 .unwrap();
16962 assert_eq!(uh_msg, 3);
16963 assert_eq!(uh_user, 2);
16964 assert_eq!(uh_asst, 1);
16965 assert_eq!(uh_plan, 1);
16966 assert!(uh_plan_content > 0);
16967 assert!(uh_plan_api > 0);
16968
16969 let ud_msg: i64 = conn
16970 .query_row_map(
16971 "SELECT message_count FROM usage_daily WHERE day_id = ?",
16972 fparams![expected_day],
16973 |row| row.get_typed(0),
16974 )
16975 .unwrap();
16976 assert_eq!(ud_msg, 3);
16977 }
16978
16979 #[test]
16980 fn insert_conversations_batched_flushes_large_fts_batches() {
16981 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
16982 use std::path::PathBuf;
16983
16984 let dir = TempDir::new().unwrap();
16985 let db_path = dir.path().join("test.db");
16986 let storage = SqliteStorage::open(&db_path).unwrap();
16987 storage
16992 .ensure_search_fallback_fts_consistency()
16993 .expect("ensure FTS consistency before insert");
16994
16995 let agent = Agent {
16996 id: None,
16997 slug: "codex".into(),
16998 name: "Codex".into(),
16999 version: Some("0.2.3".into()),
17000 kind: AgentKind::Cli,
17001 };
17002 let agent_id = storage.ensure_agent(&agent).unwrap();
17003
17004 let content = "y".repeat((FTS_ENTRY_BATCH_MAX_CHARS / 2) + 1);
17005 let messages: Vec<_> = (0_i64..2)
17006 .map(|i| Message {
17007 id: None,
17008 idx: i,
17009 role: MessageRole::Agent,
17010 author: None,
17011 created_at: Some(1_700_000_000_000 + i),
17012 content: format!("{i}-{content}"),
17013 extra_json: serde_json::Value::Null,
17014 snippets: Vec::new(),
17015 })
17016 .collect();
17017 let conv = Conversation {
17018 id: None,
17019 agent_slug: "codex".into(),
17020 workspace: Some(PathBuf::from("/tmp/workspace")),
17021 external_id: Some("fts-large-batch".into()),
17022 title: Some("FTS Large Batch".into()),
17023 source_path: PathBuf::from("/tmp/rollout.jsonl"),
17024 started_at: Some(1_700_000_000_000),
17025 ended_at: Some(1_700_000_000_999),
17026 approx_tokens: None,
17027 metadata_json: serde_json::Value::Null,
17028 messages,
17029 source_id: "local".into(),
17030 origin_host: None,
17031 };
17032
17033 let outcomes = storage
17034 .insert_conversations_batched(&[(agent_id, None, &conv)])
17035 .unwrap();
17036 assert_eq!(outcomes.len(), 1);
17037 assert_eq!(outcomes[0].inserted_indices.len(), conv.messages.len());
17038
17039 let message_count: i64 = storage
17040 .conn
17041 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
17042 row.get_typed(0)
17043 })
17044 .unwrap();
17045 let fts_count: i64 = storage
17046 .conn
17047 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
17048 row.get_typed(0)
17049 })
17050 .unwrap();
17051
17052 assert_eq!(message_count, conv.messages.len() as i64);
17053 assert_eq!(fts_count, conv.messages.len() as i64);
17054 }
17055
17056 fn make_profiled_storage_remote_conversation(
17057 external_id: i64,
17058 msg_count: usize,
17059 ) -> Conversation {
17060 Conversation {
17061 id: None,
17062 agent_slug: "codex".into(),
17063 workspace: Some(PathBuf::from("/ws/profiled-storage-remote")),
17064 external_id: Some(format!("profiled-storage-remote-{external_id}")),
17065 title: Some(format!(
17066 "Profiled storage remote conversation {external_id}"
17067 )),
17068 source_path: PathBuf::from(format!("/log/profiled-storage-remote-{external_id}.jsonl")),
17069 started_at: Some(10_000 + external_id * 100),
17070 ended_at: Some(10_000 + external_id * 100 + msg_count as i64),
17071 approx_tokens: Some(msg_count as i64 * 32),
17072 metadata_json: serde_json::json!({ "bench": true }),
17073 messages: (0..msg_count)
17074 .map(|idx| Message {
17075 id: None,
17076 idx: idx as i64,
17077 role: if idx % 2 == 0 {
17078 MessageRole::User
17079 } else {
17080 MessageRole::Agent
17081 },
17082 author: Some("tester".into()),
17083 created_at: Some(20_000 + external_id * 100 + idx as i64),
17084 content: format!(
17085 "profiled storage remote content ext={external_id} idx={idx} {}",
17086 "x".repeat(64)
17087 ),
17088 extra_json: serde_json::json!({ "idx": idx }),
17089 snippets: Vec::new(),
17090 })
17091 .collect(),
17092 source_id: "profiled-storage-remote-source".into(),
17093 origin_host: Some("builder-profile".into()),
17094 }
17095 }
17096
17097 fn make_profiled_append_remote_merge_conversation(
17098 external_id: i64,
17099 msg_count: usize,
17100 ) -> Conversation {
17101 let base_ts = 100_000 + external_id * 1_000;
17102 Conversation {
17103 id: None,
17104 agent_slug: "codex".into(),
17105 workspace: Some(PathBuf::from("/ws/profiled-append-remote")),
17106 external_id: Some(format!("profiled-append-remote-{external_id}")),
17107 title: Some(format!("Profiled append remote conversation {external_id}")),
17108 source_path: PathBuf::from(format!("/log/profiled-append-remote-{external_id}.jsonl")),
17109 started_at: Some(base_ts),
17110 ended_at: Some(base_ts + msg_count as i64),
17111 approx_tokens: Some(msg_count as i64 * 50),
17112 metadata_json: serde_json::json!({ "bench": true }),
17113 messages: (0..msg_count)
17114 .map(|idx| Message {
17115 id: None,
17116 idx: idx as i64,
17117 role: if idx % 2 == 0 {
17118 MessageRole::User
17119 } else {
17120 MessageRole::Agent
17121 },
17122 author: Some(format!("model-{}", external_id % 5)),
17123 created_at: Some(base_ts + idx as i64),
17124 content: format!(
17125 "Profiled append remote conversation {} message {}: Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
17126 external_id, idx
17127 ),
17128 extra_json: serde_json::json!({ "bench": true }),
17129 snippets: Vec::new(),
17130 })
17131 .collect(),
17132 source_id: "profiled-append-remote-source".into(),
17133 origin_host: Some("builder-profile".into()),
17134 }
17135 }
17136
17137 #[test]
17138 fn insert_conversation_tree_batched_new_message_ids_match_snippet_rows() {
17139 let dir = TempDir::new().unwrap();
17140 let db_path = dir.path().join("batched-message-ids.db");
17141 let storage = SqliteStorage::open(&db_path).unwrap();
17142 let agent_id = storage
17143 .ensure_agent(&Agent {
17144 id: None,
17145 slug: "codex".into(),
17146 name: "Codex".into(),
17147 version: None,
17148 kind: AgentKind::Cli,
17149 })
17150 .unwrap();
17151 let workspace_id = storage
17152 .ensure_workspace(&PathBuf::from("/ws/profiled-storage-remote"), None)
17153 .unwrap();
17154 let mut conv = make_profiled_storage_remote_conversation(42, 5);
17155 for (idx, msg) in conv.messages.iter_mut().enumerate() {
17156 msg.snippets.push(Snippet {
17157 id: None,
17158 file_path: Some(PathBuf::from(format!("src/file_{idx}.rs"))),
17159 start_line: Some((idx + 1) as i64),
17160 end_line: Some((idx + 2) as i64),
17161 language: Some("rust".into()),
17162 snippet_text: Some(format!("fn snippet_{idx}() {{}}")),
17163 });
17164 }
17165 let outcome = storage
17166 .insert_conversation_tree(agent_id, Some(workspace_id), &conv)
17167 .unwrap();
17168
17169 let message_count: i64 = storage
17170 .conn
17171 .query_row_map(
17172 "SELECT COUNT(*) FROM messages WHERE conversation_id = ?1",
17173 fparams![outcome.conversation_id],
17174 |row| row.get_typed(0),
17175 )
17176 .unwrap();
17177 let joined_snippet_count: i64 = storage
17178 .conn
17179 .query_row_map(
17180 "SELECT COUNT(*)
17181 FROM snippets s
17182 JOIN messages m ON s.message_id = m.id
17183 WHERE m.conversation_id = ?1",
17184 fparams![outcome.conversation_id],
17185 |row| row.get_typed(0),
17186 )
17187 .unwrap();
17188
17189 assert_eq!(message_count, conv.messages.len() as i64);
17190 assert_eq!(joined_snippet_count, conv.messages.len() as i64);
17191 }
17192
17193 #[test]
17194 fn insert_conversation_tree_batched_appended_message_ids_match_snippet_rows() {
17195 let dir = TempDir::new().unwrap();
17196 let db_path = dir.path().join("batched-append-message-ids.db");
17197 let storage = SqliteStorage::open(&db_path).unwrap();
17198 let agent_id = storage
17199 .ensure_agent(&Agent {
17200 id: None,
17201 slug: "codex".into(),
17202 name: "Codex".into(),
17203 version: None,
17204 kind: AgentKind::Cli,
17205 })
17206 .unwrap();
17207 let workspace_id = storage
17208 .ensure_workspace(&PathBuf::from("/ws/profiled-storage-remote"), None)
17209 .unwrap();
17210
17211 let mut initial = make_profiled_storage_remote_conversation(77, 2);
17212 for (idx, msg) in initial.messages.iter_mut().enumerate() {
17213 msg.snippets.push(Snippet {
17214 id: None,
17215 file_path: Some(PathBuf::from(format!("src/append_initial_{idx}.rs"))),
17216 start_line: Some((idx + 1) as i64),
17217 end_line: Some((idx + 2) as i64),
17218 language: Some("rust".into()),
17219 snippet_text: Some(format!("fn append_initial_{idx}() {{}}")),
17220 });
17221 }
17222 let first = storage
17223 .insert_conversation_tree(agent_id, Some(workspace_id), &initial)
17224 .unwrap();
17225 assert_eq!(first.inserted_indices, vec![0, 1]);
17226
17227 let mut appended = make_profiled_storage_remote_conversation(77, 5);
17228 for (idx, msg) in appended.messages.iter_mut().enumerate() {
17229 msg.snippets.push(Snippet {
17230 id: None,
17231 file_path: Some(PathBuf::from(format!("src/append_full_{idx}.rs"))),
17232 start_line: Some((idx + 10) as i64),
17233 end_line: Some((idx + 11) as i64),
17234 language: Some("rust".into()),
17235 snippet_text: Some(format!("fn append_full_{idx}() {{}}")),
17236 });
17237 }
17238 let second = storage
17239 .insert_conversation_tree(agent_id, Some(workspace_id), &appended)
17240 .unwrap();
17241 assert_eq!(second.conversation_id, first.conversation_id);
17242 assert_eq!(second.inserted_indices, vec![2, 3, 4]);
17243
17244 let message_count: i64 = storage
17245 .conn
17246 .query_row_map(
17247 "SELECT COUNT(*) FROM messages WHERE conversation_id = ?1",
17248 fparams![first.conversation_id],
17249 |row| row.get_typed(0),
17250 )
17251 .unwrap();
17252 let joined_snippets: Vec<(i64, String)> = storage
17253 .conn
17254 .query_map_collect(
17255 "SELECT m.idx, s.file_path
17256 FROM snippets s
17257 JOIN messages m ON s.message_id = m.id
17258 WHERE m.conversation_id = ?1
17259 ORDER BY m.idx, s.id",
17260 fparams![first.conversation_id],
17261 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
17262 )
17263 .unwrap();
17264
17265 assert_eq!(message_count, 5);
17266 assert_eq!(
17267 joined_snippets,
17268 vec![
17269 (0, "src/append_initial_0.rs".to_string()),
17270 (1, "src/append_initial_1.rs".to_string()),
17271 (2, "src/append_full_2.rs".to_string()),
17272 (3, "src/append_full_3.rs".to_string()),
17273 (4, "src/append_full_4.rs".to_string()),
17274 ]
17275 );
17276 }
17277
17278 #[test]
17279 fn insert_conversation_tree_rehydrates_external_lookup_after_manual_clear() {
17280 let dir = TempDir::new().unwrap();
17281 let db_path = dir.path().join("external-lookup-rehydrate.db");
17282 let storage = SqliteStorage::open(&db_path).unwrap();
17283 let agent_id = storage
17284 .ensure_agent(&Agent {
17285 id: None,
17286 slug: "codex".into(),
17287 name: "Codex".into(),
17288 version: None,
17289 kind: AgentKind::Cli,
17290 })
17291 .unwrap();
17292 let workspace_id = storage
17293 .ensure_workspace(&PathBuf::from("/ws/profiled-storage-remote"), None)
17294 .unwrap();
17295
17296 let initial = make_profiled_storage_remote_conversation(88, 2);
17297 let first = storage
17298 .insert_conversation_tree(agent_id, Some(workspace_id), &initial)
17299 .unwrap();
17300 let external_id = initial.external_id.as_deref().unwrap();
17301 let lookup_key =
17302 conversation_external_lookup_key(&initial.source_id, agent_id, external_id);
17303 let lookup_id: i64 = storage
17304 .conn
17305 .query_row_map(
17306 "SELECT conversation_id
17307 FROM conversation_external_tail_lookup
17308 WHERE lookup_key = ?1",
17309 fparams![lookup_key.as_str()],
17310 |row| row.get_typed(0),
17311 )
17312 .unwrap();
17313 assert_eq!(lookup_id, first.conversation_id);
17314
17315 storage
17316 .conn
17317 .execute_compat(
17318 "DELETE FROM conversation_external_tail_lookup WHERE lookup_key = ?1",
17319 fparams![lookup_key.as_str()],
17320 )
17321 .unwrap();
17322
17323 let appended = make_profiled_storage_remote_conversation(88, 4);
17324 let second = storage
17325 .insert_conversation_tree(agent_id, Some(workspace_id), &appended)
17326 .unwrap();
17327 assert_eq!(second.conversation_id, first.conversation_id);
17328 assert_eq!(second.inserted_indices, vec![2, 3]);
17329
17330 let conversation_count: i64 = storage
17331 .conn
17332 .query_row_map(
17333 "SELECT COUNT(*)
17334 FROM conversations
17335 WHERE source_id = ?1 AND agent_id = ?2 AND external_id = ?3",
17336 fparams![initial.source_id.as_str(), agent_id, external_id],
17337 |row| row.get_typed(0),
17338 )
17339 .unwrap();
17340 let restored_lookup: (i64, Option<i64>, Option<i64>, Option<i64>) = storage
17341 .conn
17342 .query_row_map(
17343 "SELECT conversation_id, ended_at, last_message_idx, last_message_created_at
17344 FROM conversation_external_tail_lookup
17345 WHERE lookup_key = ?1",
17346 fparams![lookup_key.as_str()],
17347 |row| {
17348 Ok((
17349 row.get_typed(0)?,
17350 row.get_typed(1)?,
17351 row.get_typed(2)?,
17352 row.get_typed(3)?,
17353 ))
17354 },
17355 )
17356 .unwrap();
17357 let tail_state: (Option<i64>, Option<i64>, Option<i64>) = storage
17358 .conn
17359 .query_row_map(
17360 "SELECT ended_at, last_message_idx, last_message_created_at
17361 FROM conversation_tail_state
17362 WHERE conversation_id = ?1",
17363 fparams![first.conversation_id],
17364 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
17365 )
17366 .unwrap();
17367 assert_eq!(conversation_count, 1);
17368 assert_eq!(
17369 restored_lookup,
17370 (
17371 first.conversation_id,
17372 tail_state.0,
17373 tail_state.1,
17374 tail_state.2
17375 )
17376 );
17377 assert_eq!(
17378 tail_state,
17379 (
17380 appended.messages[3].created_at,
17381 Some(3),
17382 appended.messages[3].created_at
17383 )
17384 );
17385 }
17386
17387 #[test]
17388 fn insert_conversation_tree_recreates_daily_stats_after_manual_clear() {
17389 let dir = TempDir::new().unwrap();
17390 let db_path = dir.path().join("test.db");
17391 let storage = SqliteStorage::open(&db_path).unwrap();
17392 let agent_id = storage
17393 .ensure_agent(&Agent {
17394 id: None,
17395 slug: "codex".into(),
17396 name: "Codex".into(),
17397 version: None,
17398 kind: AgentKind::Cli,
17399 })
17400 .unwrap();
17401 let workspace = PathBuf::from("/ws/profiled-storage-remote");
17402 let workspace_id = storage.ensure_workspace(&workspace, None).unwrap();
17403
17404 storage
17405 .insert_conversation_tree(
17406 agent_id,
17407 Some(workspace_id),
17408 &make_profiled_storage_remote_conversation(0, 3),
17409 )
17410 .unwrap();
17411 storage.conn.execute("DELETE FROM daily_stats").unwrap();
17412
17413 storage
17414 .insert_conversation_tree(
17415 agent_id,
17416 Some(workspace_id),
17417 &make_profiled_storage_remote_conversation(1, 2),
17418 )
17419 .unwrap();
17420
17421 let row_count: i64 = storage
17422 .conn
17423 .query_row_map("SELECT COUNT(*) FROM daily_stats", fparams![], |row| {
17424 row.get_typed(0)
17425 })
17426 .unwrap();
17427 let (session_count, message_count): (i64, i64) = storage
17428 .conn
17429 .query_row_map(
17430 "SELECT session_count, message_count
17431 FROM daily_stats
17432 WHERE agent_slug = 'all' AND source_id = 'all'",
17433 fparams![],
17434 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
17435 )
17436 .unwrap();
17437
17438 assert_eq!(row_count, 4);
17439 assert_eq!(session_count, 1);
17440 assert_eq!(message_count, 2);
17441 }
17442
17443 #[test]
17444 #[serial]
17445 fn insert_conversation_tree_stage_profile_tracks_steady_state_remote_reuse() {
17446 let _defer_guard = set_env_var("CASS_DEFER_LEXICAL_UPDATES", "0");
17447
17448 for &(msg_count, iterations) in &[(5usize, 80usize), (20, 50), (50, 24)] {
17449 let dir = TempDir::new().unwrap();
17450 let db_path = dir.path().join(format!("profile-{msg_count}.db"));
17451 let storage = SqliteStorage::open(&db_path).unwrap();
17452 let agent_id = storage
17453 .ensure_agent(&Agent {
17454 id: None,
17455 slug: "codex".into(),
17456 name: "Codex".into(),
17457 version: None,
17458 kind: AgentKind::Cli,
17459 })
17460 .unwrap();
17461 let workspace = PathBuf::from("/ws/profiled-storage-remote");
17462 let workspace_id = storage.ensure_workspace(&workspace, None).unwrap();
17463
17464 storage
17465 .insert_conversation_tree(
17466 agent_id,
17467 Some(workspace_id),
17468 &make_profiled_storage_remote_conversation(0, msg_count),
17469 )
17470 .unwrap();
17471
17472 let mut profile = InsertConversationTreePerfProfile::default();
17473 for external_id in 1..=iterations {
17474 storage
17475 .insert_conversation_tree_with_profile(
17476 agent_id,
17477 Some(workspace_id),
17478 &make_profiled_storage_remote_conversation(external_id as i64, msg_count),
17479 &mut profile,
17480 )
17481 .unwrap();
17482 }
17483
17484 let accounted_duration = profile.source_duration
17485 + profile.tx_open_duration
17486 + profile.existing_lookup_duration
17487 + profile.conversation_row_duration
17488 + profile.message_insert_duration
17489 + profile.snippet_insert_duration
17490 + profile.fts_entry_duration
17491 + profile.fts_flush_duration
17492 + profile.analytics_duration
17493 + profile.commit_duration;
17494 assert_eq!(profile.invocations, iterations);
17495 assert_eq!(profile.messages, iterations * msg_count);
17496 assert_eq!(profile.inserted_messages, iterations * msg_count);
17497 assert!(
17498 profile.total_duration >= accounted_duration,
17499 "accounted stage durations cannot exceed total duration"
17500 );
17501
17502 profile.log_summary(&format!("remote_reuse_{msg_count}_msgs"));
17503 }
17504 }
17505
17506 #[test]
17507 #[serial]
17508 fn insert_conversation_tree_stage_profile_tracks_append_remote_source_merge() {
17509 let _defer_guard = set_env_var("CASS_DEFER_LEXICAL_UPDATES", "0");
17510
17511 for &(msg_count, iterations) in &[(5usize, 80usize), (20, 50), (50, 24)] {
17512 let dir = TempDir::new().unwrap();
17513 let db_path = dir.path().join(format!("append-profile-{msg_count}.db"));
17514 let storage = SqliteStorage::open(&db_path).unwrap();
17515 let agent_id = storage
17516 .ensure_agent(&Agent {
17517 id: None,
17518 slug: "codex".into(),
17519 name: "Codex".into(),
17520 version: None,
17521 kind: AgentKind::Cli,
17522 })
17523 .unwrap();
17524 let workspace = PathBuf::from("/ws/profiled-append-remote");
17525 let workspace_id = storage.ensure_workspace(&workspace, None).unwrap();
17526
17527 for external_id in 0..iterations {
17528 storage
17529 .insert_conversation_tree(
17530 agent_id,
17531 Some(workspace_id),
17532 &make_profiled_append_remote_merge_conversation(
17533 external_id as i64,
17534 msg_count,
17535 ),
17536 )
17537 .unwrap();
17538 }
17539
17540 let mut profile = InsertConversationTreePerfProfile::default();
17541 for external_id in 0..iterations {
17542 storage
17543 .append_existing_conversation_with_profile(
17544 agent_id,
17545 Some(workspace_id),
17546 &make_profiled_append_remote_merge_conversation(
17547 external_id as i64,
17548 msg_count * 2,
17549 ),
17550 &mut profile,
17551 )
17552 .unwrap();
17553 }
17554
17555 let accounted_duration = profile.source_duration
17556 + profile.tx_open_duration
17557 + profile.existing_lookup_duration
17558 + profile.existing_idx_lookup_duration
17559 + profile.existing_replay_lookup_duration
17560 + profile.dedupe_filter_duration
17561 + profile.conversation_row_duration
17562 + profile.message_insert_duration
17563 + profile.snippet_insert_duration
17564 + profile.fts_entry_duration
17565 + profile.fts_flush_duration
17566 + profile.analytics_duration
17567 + profile.commit_duration;
17568 assert_eq!(profile.invocations, iterations);
17569 assert_eq!(profile.messages, iterations * msg_count * 2);
17570 assert_eq!(profile.inserted_messages, iterations * msg_count);
17571 assert!(
17572 profile.total_duration >= accounted_duration,
17573 "accounted append stage durations cannot exceed total duration"
17574 );
17575
17576 profile.log_summary(&format!("append_remote_merge_{msg_count}_msgs"));
17577 }
17578 }
17579
17580 #[test]
17581 fn rebuild_daily_stats_recomputes_materialized_totals_without_monolithic_group_by() {
17582 let dir = TempDir::new().unwrap();
17583 let db_path = dir.path().join("test.db");
17584 let storage = SqliteStorage::open(&db_path).unwrap();
17585 let started_at = 1_700_000_000_000_i64;
17586 let day_id = FrankenStorage::day_id_from_millis(started_at);
17587 let hour_id = FrankenStorage::hour_id_from_millis(started_at);
17588
17589 storage
17590 .conn
17591 .execute_compat(
17592 "INSERT INTO agents (id, slug, name, version, kind, created_at, updated_at)
17593 VALUES (?1, ?2, ?3, NULL, ?4, 0, 0)",
17594 fparams![1_i64, "codex", "Codex", "cli"],
17595 )
17596 .unwrap();
17597 storage
17598 .conn
17599 .execute_compat(
17600 "INSERT INTO agents (id, slug, name, version, kind, created_at, updated_at)
17601 VALUES (?1, ?2, ?3, NULL, ?4, 0, 0)",
17602 fparams![2_i64, "claude", "Claude", "cli"],
17603 )
17604 .unwrap();
17605
17606 storage
17607 .conn
17608 .execute_compat(
17609 "INSERT INTO conversations (
17610 id, agent_id, workspace_id, source_id, external_id, title, source_path,
17611 started_at, ended_at, approx_tokens, metadata_json, origin_host, metadata_bin
17612 ) VALUES (?1, ?2, NULL, ?3, ?4, ?5, ?6, ?7, ?8, NULL, ?9, NULL, NULL)",
17613 fparams![
17614 1_i64,
17615 1_i64,
17616 LOCAL_SOURCE_ID,
17617 "daily-a",
17618 "Daily A",
17619 "/tmp/daily-a.jsonl",
17620 started_at,
17621 started_at + 200,
17622 "{}"
17623 ],
17624 )
17625 .unwrap();
17626 storage
17627 .conn
17628 .execute_compat(
17629 "INSERT INTO conversations (
17630 id, agent_id, workspace_id, source_id, external_id, title, source_path,
17631 started_at, ended_at, approx_tokens, metadata_json, origin_host, metadata_bin
17632 ) VALUES (?1, ?2, NULL, ?3, ?4, ?5, ?6, ?7, ?8, NULL, ?9, NULL, NULL)",
17633 fparams![
17634 2_i64,
17635 2_i64,
17636 LOCAL_SOURCE_ID,
17637 "daily-b",
17638 "Daily B",
17639 "/tmp/daily-b.jsonl",
17640 started_at,
17641 started_at + 300,
17642 "{}"
17643 ],
17644 )
17645 .unwrap();
17646
17647 storage
17648 .conn
17649 .execute_compat(
17650 "INSERT INTO messages (
17651 id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin
17652 ) VALUES (?1, ?2, ?3, ?4, NULL, ?5, ?6, NULL, NULL)",
17653 fparams![1_i64, 1_i64, 0_i64, "user", started_at, "hello"],
17654 )
17655 .unwrap();
17656 storage
17657 .conn
17658 .execute_compat(
17659 "INSERT INTO messages (
17660 id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin
17661 ) VALUES (?1, ?2, ?3, ?4, NULL, ?5, ?6, NULL, NULL)",
17662 fparams![2_i64, 1_i64, 1_i64, "assistant", started_at + 100, "response"],
17663 )
17664 .unwrap();
17665 storage
17666 .conn
17667 .execute_compat(
17668 "INSERT INTO messages (
17669 id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin
17670 ) VALUES (?1, ?2, ?3, ?4, NULL, ?5, ?6, NULL, NULL)",
17671 fparams![3_i64, 2_i64, 0_i64, "user", started_at + 50, "abc"],
17672 )
17673 .unwrap();
17674
17675 for (message_id, agent_slug, role, content_len) in [
17676 (1_i64, "codex", "user", 5_i64),
17677 (2_i64, "codex", "assistant", 8_i64),
17678 (3_i64, "claude", "user", 3_i64),
17679 ] {
17680 storage
17681 .conn
17682 .execute_compat(
17683 "INSERT INTO message_metrics (
17684 message_id, created_at_ms, hour_id, day_id, agent_slug, workspace_id, source_id,
17685 role, content_chars, content_tokens_est, api_input_tokens, api_output_tokens,
17686 api_cache_read_tokens, api_cache_creation_tokens, api_thinking_tokens,
17687 api_service_tier, api_data_source, tool_call_count, has_tool_calls, has_plan,
17688 model_name, model_family, model_tier, provider
17689 ) VALUES (
17690 ?1, ?2, ?3, ?4, ?5, ?6, ?7,
17691 ?8, ?9, ?10, ?11, ?12,
17692 ?13, ?14, ?15,
17693 ?16, ?17, ?18, ?19, ?20,
17694 ?21, ?22, ?23, ?24
17695 )",
17696 fparams![
17697 message_id,
17698 started_at,
17699 hour_id,
17700 day_id,
17701 agent_slug,
17702 0_i64,
17703 LOCAL_SOURCE_ID,
17704 role,
17705 content_len,
17706 content_len / 4,
17707 0_i64,
17708 0_i64,
17709 0_i64,
17710 0_i64,
17711 0_i64,
17712 "",
17713 "estimated",
17714 0_i64,
17715 0_i64,
17716 0_i64,
17717 "",
17718 "unknown",
17719 "unknown",
17720 "unknown"
17721 ],
17722 )
17723 .unwrap();
17724 }
17725
17726 storage.conn.execute("DELETE FROM daily_stats").unwrap();
17727
17728 let rebuilt = storage.rebuild_daily_stats().unwrap();
17729 assert_eq!(rebuilt.total_sessions, 2);
17730
17731 let health = storage.daily_stats_health().unwrap();
17732 assert_eq!(health.conversation_count, 2);
17733 assert_eq!(health.materialized_total, 2);
17734 assert_eq!(health.drift, 0);
17735
17736 let total_messages: i64 = storage
17737 .conn
17738 .query_row_map(
17739 "SELECT message_count FROM daily_stats WHERE agent_slug = 'all' AND source_id = 'all'",
17740 fparams![],
17741 |row| row.get_typed(0),
17742 )
17743 .unwrap();
17744 assert_eq!(total_messages, 3);
17745 }
17746
17747 #[test]
17748 fn rebuild_daily_stats_preserves_byte_counts_with_message_metrics() {
17749 let dir = TempDir::new().unwrap();
17750 let db_path = dir.path().join("test.db");
17751 let storage = SqliteStorage::open(&db_path).unwrap();
17752
17753 let content = "ASCII🙂é漢字";
17754 let expected_bytes = content.len() as i64;
17755 let started_at = 1_704_067_200_000_i64;
17756 let day_id = FrankenStorage::day_id_from_millis(started_at);
17757 let hour_id = FrankenStorage::hour_id_from_millis(started_at);
17758
17759 storage
17760 .conn
17761 .execute_compat(
17762 "INSERT INTO agents (id, slug, name, version, kind, created_at, updated_at)
17763 VALUES (?1, ?2, ?3, NULL, ?4, 0, 0)",
17764 fparams![1_i64, "tester", "Tester", "cli"],
17765 )
17766 .unwrap();
17767 storage
17768 .conn
17769 .execute_compat(
17770 "INSERT INTO conversations (
17771 id, agent_id, workspace_id, source_id, external_id, title, source_path,
17772 started_at, ended_at, approx_tokens, metadata_json, origin_host, metadata_bin
17773 ) VALUES (?1, ?2, NULL, ?3, ?4, ?5, ?6, ?7, NULL, NULL, ?8, NULL, NULL)",
17774 fparams![
17775 1_i64,
17776 1_i64,
17777 LOCAL_SOURCE_ID,
17778 "unicode-metrics",
17779 "Unicode Metrics",
17780 "/tmp/unicode-metrics.jsonl",
17781 started_at,
17782 "{}"
17783 ],
17784 )
17785 .unwrap();
17786 storage
17787 .conn
17788 .execute_compat(
17789 "INSERT INTO messages (
17790 id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin
17791 ) VALUES (?1, ?2, ?3, ?4, NULL, ?5, ?6, NULL, NULL)",
17792 fparams![1_i64, 1_i64, 0_i64, "user", started_at, content],
17793 )
17794 .unwrap();
17795 storage
17796 .conn
17797 .execute_compat(
17798 "INSERT INTO message_metrics (
17799 message_id, created_at_ms, hour_id, day_id, agent_slug, workspace_id, source_id,
17800 role, content_chars, content_tokens_est, api_input_tokens, api_output_tokens,
17801 api_cache_read_tokens, api_cache_creation_tokens, api_thinking_tokens,
17802 api_service_tier, api_data_source, tool_call_count, has_tool_calls, has_plan,
17803 model_name, model_family, model_tier, provider
17804 ) VALUES (
17805 ?1, ?2, ?3, ?4, ?5, ?6, ?7,
17806 ?8, ?9, ?10, ?11, ?12,
17807 ?13, ?14, ?15,
17808 ?16, ?17, ?18, ?19, ?20,
17809 ?21, ?22, ?23, ?24
17810 )",
17811 fparams![
17812 1_i64,
17813 started_at,
17814 hour_id,
17815 day_id,
17816 "tester",
17817 0_i64,
17818 LOCAL_SOURCE_ID,
17819 "user",
17820 expected_bytes,
17821 expected_bytes / 4,
17822 0_i64,
17823 0_i64,
17824 0_i64,
17825 0_i64,
17826 0_i64,
17827 "",
17828 "estimated",
17829 0_i64,
17830 0_i64,
17831 0_i64,
17832 "",
17833 "unknown",
17834 "unknown",
17835 "unknown"
17836 ],
17837 )
17838 .unwrap();
17839
17840 let mut tx = storage.conn.transaction().unwrap();
17841 franken_update_daily_stats_in_tx(
17842 &storage,
17843 &tx,
17844 "tester",
17845 LOCAL_SOURCE_ID,
17846 Some(started_at),
17847 StatsDelta {
17848 session_count_delta: 1,
17849 message_count_delta: 1,
17850 total_chars_delta: expected_bytes,
17851 },
17852 )
17853 .unwrap();
17854 tx.commit().unwrap();
17855
17856 let inline_total: i64 = storage
17857 .conn
17858 .query_row_map(
17859 "SELECT total_chars FROM daily_stats WHERE agent_slug = 'all' AND source_id = 'all'",
17860 fparams![],
17861 |row| row.get_typed(0),
17862 )
17863 .unwrap();
17864 assert_eq!(inline_total, expected_bytes);
17865
17866 storage.conn.execute("DELETE FROM daily_stats").unwrap();
17867
17868 let rebuilt = storage.rebuild_daily_stats().unwrap();
17869 assert_eq!(rebuilt.total_sessions, 1);
17870
17871 let rebuilt_total: i64 = storage
17872 .conn
17873 .query_row_map(
17874 "SELECT total_chars FROM daily_stats WHERE agent_slug = 'all' AND source_id = 'all'",
17875 fparams![],
17876 |row| row.get_typed(0),
17877 )
17878 .unwrap();
17879 assert_eq!(rebuilt_total, expected_bytes);
17880 }
17881
17882 #[test]
17883 fn rebuild_daily_stats_raw_fallback_preserves_byte_counts() {
17884 let dir = TempDir::new().unwrap();
17885 let db_path = dir.path().join("test.db");
17886 let storage = SqliteStorage::open(&db_path).unwrap();
17887
17888 let content = "fallback🙂é漢字";
17889 let expected_bytes = content.len() as i64;
17890 let started_at = 1_704_067_200_000_i64;
17891 storage
17892 .conn
17893 .execute_compat(
17894 "INSERT INTO agents (id, slug, name, version, kind, created_at, updated_at)
17895 VALUES (?1, ?2, ?3, NULL, ?4, 0, 0)",
17896 fparams![1_i64, "tester", "Tester", "cli"],
17897 )
17898 .unwrap();
17899 storage
17900 .conn
17901 .execute_compat(
17902 "INSERT INTO conversations (
17903 id, agent_id, workspace_id, source_id, external_id, title, source_path,
17904 started_at, ended_at, approx_tokens, metadata_json, origin_host, metadata_bin
17905 ) VALUES (?1, ?2, NULL, ?3, ?4, ?5, ?6, ?7, NULL, NULL, ?8, NULL, NULL)",
17906 fparams![
17907 1_i64,
17908 1_i64,
17909 LOCAL_SOURCE_ID,
17910 "unicode-fallback",
17911 "Unicode Fallback",
17912 "/tmp/unicode-fallback.jsonl",
17913 started_at,
17914 "{}"
17915 ],
17916 )
17917 .unwrap();
17918 storage
17919 .conn
17920 .execute_compat(
17921 "INSERT INTO messages (
17922 id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin
17923 ) VALUES (?1, ?2, ?3, ?4, NULL, ?5, ?6, NULL, NULL)",
17924 fparams![1_i64, 1_i64, 0_i64, "assistant", started_at, content],
17925 )
17926 .unwrap();
17927
17928 let mut tx = storage.conn.transaction().unwrap();
17929 franken_update_daily_stats_in_tx(
17930 &storage,
17931 &tx,
17932 "tester",
17933 LOCAL_SOURCE_ID,
17934 Some(started_at),
17935 StatsDelta {
17936 session_count_delta: 1,
17937 message_count_delta: 1,
17938 total_chars_delta: expected_bytes,
17939 },
17940 )
17941 .unwrap();
17942 tx.commit().unwrap();
17943
17944 storage.conn.execute("DELETE FROM daily_stats").unwrap();
17945
17946 let rebuilt = storage.rebuild_daily_stats().unwrap();
17947 assert_eq!(rebuilt.total_sessions, 1);
17948
17949 let rebuilt_total: i64 = storage
17950 .conn
17951 .query_row_map(
17952 "SELECT total_chars FROM daily_stats WHERE agent_slug = 'all' AND source_id = 'all'",
17953 fparams![],
17954 |row| row.get_typed(0),
17955 )
17956 .unwrap();
17957 assert_eq!(rebuilt_total, expected_bytes);
17958 }
17959
17960 #[test]
17961 fn insert_conversations_batched_appends_duplicate_external_id() {
17962 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
17963 use std::path::PathBuf;
17964
17965 let dir = TempDir::new().unwrap();
17966 let db_path = dir.path().join("test.db");
17967 let storage = SqliteStorage::open(&db_path).unwrap();
17968
17969 let agent = Agent {
17970 id: None,
17971 slug: "codex".into(),
17972 name: "Codex".into(),
17973 version: Some("0.2.3".into()),
17974 kind: AgentKind::Cli,
17975 };
17976 let agent_id = storage.ensure_agent(&agent).unwrap();
17977
17978 let base_conv = |messages: Vec<Message>| Conversation {
17979 id: None,
17980 agent_slug: "codex".into(),
17981 workspace: Some(PathBuf::from("/tmp/workspace")),
17982 external_id: Some("shared-session".into()),
17983 title: Some("Shared Session".into()),
17984 source_path: PathBuf::from("/tmp/rollout.jsonl"),
17985 started_at: Some(1_700_000_000_000),
17986 ended_at: Some(1_700_000_000_999),
17987 approx_tokens: None,
17988 metadata_json: serde_json::Value::Null,
17989 messages,
17990 source_id: "local".into(),
17991 origin_host: None,
17992 };
17993
17994 let conv_a = base_conv(vec![
17995 Message {
17996 id: None,
17997 idx: 0,
17998 role: MessageRole::User,
17999 author: None,
18000 created_at: Some(1_700_000_000_000),
18001 content: "first".into(),
18002 extra_json: serde_json::Value::Null,
18003 snippets: Vec::new(),
18004 },
18005 Message {
18006 id: None,
18007 idx: 1,
18008 role: MessageRole::Agent,
18009 author: None,
18010 created_at: Some(1_700_000_000_100),
18011 content: "second".into(),
18012 extra_json: serde_json::Value::Null,
18013 snippets: Vec::new(),
18014 },
18015 ]);
18016 let conv_b = base_conv(vec![
18017 Message {
18018 id: None,
18019 idx: 0,
18020 role: MessageRole::User,
18021 author: None,
18022 created_at: Some(1_700_000_000_000),
18023 content: "first".into(),
18024 extra_json: serde_json::Value::Null,
18025 snippets: Vec::new(),
18026 },
18027 Message {
18028 id: None,
18029 idx: 1,
18030 role: MessageRole::Agent,
18031 author: None,
18032 created_at: Some(1_700_000_000_100),
18033 content: "second".into(),
18034 extra_json: serde_json::Value::Null,
18035 snippets: Vec::new(),
18036 },
18037 Message {
18038 id: None,
18039 idx: 2,
18040 role: MessageRole::User,
18041 author: None,
18042 created_at: Some(1_700_000_000_200),
18043 content: "third".into(),
18044 extra_json: serde_json::Value::Null,
18045 snippets: Vec::new(),
18046 },
18047 Message {
18048 id: None,
18049 idx: 3,
18050 role: MessageRole::Agent,
18051 author: None,
18052 created_at: Some(1_700_000_000_300),
18053 content: "fourth".into(),
18054 extra_json: serde_json::Value::Null,
18055 snippets: Vec::new(),
18056 },
18057 ]);
18058
18059 let outcomes = storage
18060 .insert_conversations_batched(&[(agent_id, None, &conv_a), (agent_id, None, &conv_b)])
18061 .unwrap();
18062 assert_eq!(outcomes.len(), 2);
18063 assert_eq!(outcomes[0].inserted_indices, vec![0, 1]);
18064 assert_eq!(outcomes[1].inserted_indices, vec![2, 3]);
18065 assert_eq!(outcomes[0].conversation_id, outcomes[1].conversation_id);
18066
18067 let conversation_count: i64 = storage
18068 .conn
18069 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
18070 row.get_typed(0)
18071 })
18072 .unwrap();
18073 let conversation_count_not_indexed: i64 = storage
18074 .conn
18075 .query_row_map(
18076 "SELECT COUNT(*) FROM conversations NOT INDEXED",
18077 fparams![],
18078 |row| row.get_typed(0),
18079 )
18080 .unwrap();
18081 let conversation_count_source_index: i64 = storage
18082 .conn
18083 .query_row_map(
18084 "SELECT COUNT(*) FROM conversations INDEXED BY idx_conversations_source_id",
18085 fparams![],
18086 |row| row.get_typed(0),
18087 )
18088 .unwrap();
18089 let message_count: i64 = storage
18090 .conn
18091 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
18092 row.get_typed(0)
18093 })
18094 .unwrap();
18095 let reopened_storage = SqliteStorage::open(&db_path).unwrap();
18096 let reopened_conversation_count: i64 = reopened_storage
18097 .conn
18098 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
18099 row.get_typed(0)
18100 })
18101 .unwrap();
18102 let reopened_conversation_count_not_indexed: i64 = reopened_storage
18103 .conn
18104 .query_row_map(
18105 "SELECT COUNT(*) FROM conversations NOT INDEXED",
18106 fparams![],
18107 |row| row.get_typed(0),
18108 )
18109 .unwrap();
18110 let reopened_conversation_ids: Vec<i64> = reopened_storage
18111 .conn
18112 .query_map_collect(
18113 "SELECT id FROM conversations ORDER BY id",
18114 fparams![],
18115 |row| row.get_typed(0),
18116 )
18117 .unwrap();
18118 let reopened_conversation_ids_not_indexed: Vec<i64> = reopened_storage
18119 .conn
18120 .query_map_collect(
18121 "SELECT id FROM conversations NOT INDEXED ORDER BY id",
18122 fparams![],
18123 |row| row.get_typed(0),
18124 )
18125 .unwrap();
18126 let reopened_conversation_ids_source_index: Vec<i64> = reopened_storage
18127 .conn
18128 .query_map_collect(
18129 "SELECT id FROM conversations INDEXED BY idx_conversations_source_id ORDER BY id",
18130 fparams![],
18131 |row| row.get_typed(0),
18132 )
18133 .unwrap();
18134
18135 assert_eq!(reopened_conversation_ids, vec![outcomes[0].conversation_id]);
18136 assert_eq!(
18137 reopened_conversation_ids_not_indexed,
18138 vec![outcomes[0].conversation_id]
18139 );
18140 assert_eq!(
18141 reopened_conversation_ids_source_index,
18142 vec![outcomes[0].conversation_id]
18143 );
18144 assert_eq!(reopened_conversation_count, 1);
18145 assert_eq!(reopened_conversation_count_not_indexed, 1);
18146 assert_eq!(conversation_count_not_indexed, 1);
18147 assert_eq!(conversation_count_source_index, 1);
18148 assert_eq!(conversation_count, 1);
18149 assert_eq!(message_count, 4);
18150 }
18151
18152 #[test]
18153 fn franken_insert_conversation_or_get_existing_recovers_unique_conflict() {
18154 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18155 use std::path::PathBuf;
18156
18157 let dir = TempDir::new().unwrap();
18158 let db_path = dir.path().join("test.db");
18159 let storage = SqliteStorage::open(&db_path).unwrap();
18160
18161 let agent = Agent {
18162 id: None,
18163 slug: "codex".into(),
18164 name: "Codex".into(),
18165 version: Some("0.2.3".into()),
18166 kind: AgentKind::Cli,
18167 };
18168 let agent_id = storage.ensure_agent(&agent).unwrap();
18169
18170 let conv = Conversation {
18171 id: None,
18172 agent_slug: "codex".into(),
18173 workspace: Some(PathBuf::from("/tmp/workspace")),
18174 external_id: Some("recover-duplicate".into()),
18175 title: Some("Recover Duplicate".into()),
18176 source_path: PathBuf::from("/tmp/rollout.jsonl"),
18177 started_at: Some(1_700_000_000_000),
18178 ended_at: Some(1_700_000_000_100),
18179 approx_tokens: None,
18180 metadata_json: serde_json::Value::Null,
18181 messages: vec![Message {
18182 id: None,
18183 idx: 0,
18184 role: MessageRole::User,
18185 author: None,
18186 created_at: Some(1_700_000_000_000),
18187 content: "hello".into(),
18188 extra_json: serde_json::Value::Null,
18189 snippets: Vec::new(),
18190 }],
18191 source_id: "local".into(),
18192 origin_host: None,
18193 };
18194
18195 let tx = storage.conn.transaction().unwrap();
18196 let inserted_id = franken_insert_conversation(&tx, agent_id, None, &conv)
18197 .unwrap()
18198 .expect("first insert should succeed");
18199
18200 let conversation_key = conversation_merge_key(agent_id, &conv);
18201 let resolved = franken_insert_conversation_or_get_existing_after_miss(
18202 &tx,
18203 agent_id,
18204 None,
18205 &conv,
18206 &conversation_key,
18207 )
18208 .unwrap();
18209
18210 match resolved {
18211 ConversationInsertStatus::Existing(existing_id) => {
18212 assert_eq!(existing_id, inserted_id);
18213 }
18214 ConversationInsertStatus::Inserted(new_id) => {
18215 panic!("expected existing conversation id, got freshly inserted {new_id}");
18216 }
18217 }
18218
18219 let conversation_count: i64 = tx
18220 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
18221 row.get_typed(0)
18222 })
18223 .unwrap();
18224 assert_eq!(conversation_count, 1);
18225 }
18226
18227 #[test]
18228 fn insert_conversations_batched_merges_duplicate_external_id_with_gaps() {
18229 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18230 use std::path::PathBuf;
18231
18232 let dir = TempDir::new().unwrap();
18233 let db_path = dir.path().join("test.db");
18234 let storage = SqliteStorage::open(&db_path).unwrap();
18235
18236 let agent = Agent {
18237 id: None,
18238 slug: "codex".into(),
18239 name: "Codex".into(),
18240 version: Some("0.2.3".into()),
18241 kind: AgentKind::Cli,
18242 };
18243 let agent_id = storage.ensure_agent(&agent).unwrap();
18244
18245 let base_conv = |messages: Vec<Message>| Conversation {
18246 id: None,
18247 agent_slug: "codex".into(),
18248 workspace: Some(PathBuf::from("/tmp/workspace")),
18249 external_id: Some("shared-session-gap".into()),
18250 title: Some("Shared Session Gap".into()),
18251 source_path: PathBuf::from("/tmp/rollout.jsonl"),
18252 started_at: Some(1_700_000_000_000),
18253 ended_at: Some(1_700_000_000_999),
18254 approx_tokens: None,
18255 metadata_json: serde_json::Value::Null,
18256 messages,
18257 source_id: "local".into(),
18258 origin_host: None,
18259 };
18260
18261 let conv_a = base_conv(vec![
18262 Message {
18263 id: None,
18264 idx: 2,
18265 role: MessageRole::User,
18266 author: None,
18267 created_at: Some(1_700_000_000_200),
18268 content: "third".into(),
18269 extra_json: serde_json::Value::Null,
18270 snippets: Vec::new(),
18271 },
18272 Message {
18273 id: None,
18274 idx: 3,
18275 role: MessageRole::Agent,
18276 author: None,
18277 created_at: Some(1_700_000_000_300),
18278 content: "fourth".into(),
18279 extra_json: serde_json::Value::Null,
18280 snippets: Vec::new(),
18281 },
18282 ]);
18283 let conv_b = base_conv(vec![
18284 Message {
18285 id: None,
18286 idx: 0,
18287 role: MessageRole::User,
18288 author: None,
18289 created_at: Some(1_700_000_000_000),
18290 content: "first".into(),
18291 extra_json: serde_json::Value::Null,
18292 snippets: Vec::new(),
18293 },
18294 Message {
18295 id: None,
18296 idx: 1,
18297 role: MessageRole::Agent,
18298 author: None,
18299 created_at: Some(1_700_000_000_100),
18300 content: "second".into(),
18301 extra_json: serde_json::Value::Null,
18302 snippets: Vec::new(),
18303 },
18304 Message {
18305 id: None,
18306 idx: 3,
18307 role: MessageRole::Agent,
18308 author: None,
18309 created_at: Some(1_700_000_000_300),
18310 content: "fourth".into(),
18311 extra_json: serde_json::Value::Null,
18312 snippets: Vec::new(),
18313 },
18314 ]);
18315
18316 let outcomes = storage
18317 .insert_conversations_batched(&[(agent_id, None, &conv_a), (agent_id, None, &conv_b)])
18318 .unwrap();
18319 assert_eq!(outcomes.len(), 2);
18320 assert_eq!(outcomes[0].inserted_indices, vec![2, 3]);
18321 assert_eq!(outcomes[1].inserted_indices, vec![0, 1]);
18322 assert_eq!(outcomes[0].conversation_id, outcomes[1].conversation_id);
18323
18324 let stored_indices: Vec<i64> = storage
18325 .conn
18326 .query_map_collect("SELECT idx FROM messages ORDER BY idx", fparams![], |row| {
18327 row.get_typed(0)
18328 })
18329 .unwrap();
18330 assert_eq!(stored_indices, vec![0, 1, 2, 3]);
18331 }
18332
18333 #[test]
18334 fn insert_conversations_batched_refreshes_partial_pending_message_lookup() {
18335 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18336 use std::path::PathBuf;
18337
18338 let dir = TempDir::new().unwrap();
18339 let db_path = dir.path().join("test.db");
18340 let storage = SqliteStorage::open(&db_path).unwrap();
18341
18342 let agent = Agent {
18343 id: None,
18344 slug: "codex".into(),
18345 name: "Codex".into(),
18346 version: Some("0.2.3".into()),
18347 kind: AgentKind::Cli,
18348 };
18349 let agent_id = storage.ensure_agent(&agent).unwrap();
18350
18351 let make_message = |idx: i64, content: &str| Message {
18352 id: None,
18353 idx,
18354 role: if idx == 0 {
18355 MessageRole::User
18356 } else {
18357 MessageRole::Agent
18358 },
18359 author: None,
18360 created_at: Some(1_700_000_000_000 + idx),
18361 content: content.into(),
18362 extra_json: serde_json::Value::Null,
18363 snippets: Vec::new(),
18364 };
18365
18366 let base_conv = |messages: Vec<Message>| Conversation {
18367 id: None,
18368 agent_slug: "codex".into(),
18369 workspace: Some(PathBuf::from("/tmp/workspace")),
18370 external_id: Some("partial-cache-session".into()),
18371 title: Some("Partial cache session".into()),
18372 source_path: PathBuf::from("/tmp/partial-cache.jsonl"),
18373 started_at: Some(1_700_000_000_000),
18374 ended_at: Some(1_700_000_000_100),
18375 approx_tokens: None,
18376 metadata_json: serde_json::Value::Null,
18377 messages,
18378 source_id: "local".into(),
18379 origin_host: None,
18380 };
18381
18382 let canonical = base_conv(vec![
18383 make_message(0, "canonical zero"),
18384 make_message(20, "canonical twenty"),
18385 ]);
18386 storage
18387 .insert_conversation_tree(agent_id, None, &canonical)
18388 .unwrap();
18389
18390 let exact_prefix = base_conv(vec![make_message(0, "canonical zero")]);
18391 let conflicting_tail = base_conv(vec![make_message(20, "conflicting twenty")]);
18392
18393 let outcomes = storage
18394 .insert_conversations_batched(&[
18395 (agent_id, None, &exact_prefix),
18396 (agent_id, None, &conflicting_tail),
18397 ])
18398 .unwrap();
18399
18400 assert_eq!(outcomes.len(), 2);
18401 assert!(outcomes[0].inserted_indices.is_empty());
18402 assert!(
18403 outcomes[1].inserted_indices.is_empty(),
18404 "the second batch item must refresh the partial pending lookup and retain the canonical idx=20 row"
18405 );
18406
18407 let stored_messages: Vec<(i64, String)> = storage
18408 .conn
18409 .query_map_collect(
18410 "SELECT idx, content FROM messages ORDER BY idx",
18411 fparams![],
18412 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
18413 )
18414 .unwrap();
18415 assert_eq!(
18416 stored_messages,
18417 vec![
18418 (0, "canonical zero".to_string()),
18419 (20, "canonical twenty".to_string()),
18420 ]
18421 );
18422 }
18423
18424 #[test]
18425 fn insert_conversations_batched_reprocessing_conversation_is_idempotent() {
18426 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18427 use std::path::PathBuf;
18428
18429 const MESSAGE_COUNT: i64 = 64;
18430
18431 let dir = TempDir::new().unwrap();
18432 let db_path = dir.path().join("test.db");
18433 let storage = SqliteStorage::open(&db_path).unwrap();
18434
18435 let agent = Agent {
18436 id: None,
18437 slug: "codex".into(),
18438 name: "Codex".into(),
18439 version: Some("0.2.3".into()),
18440 kind: AgentKind::Cli,
18441 };
18442 let agent_id = storage.ensure_agent(&agent).unwrap();
18443
18444 let messages: Vec<Message> = (0..MESSAGE_COUNT)
18445 .map(|idx| Message {
18446 id: None,
18447 idx,
18448 role: if idx % 2 == 0 {
18449 MessageRole::User
18450 } else {
18451 MessageRole::Agent
18452 },
18453 author: None,
18454 created_at: Some(1_700_000_000_000 + idx),
18455 content: format!("message {idx}"),
18456 extra_json: serde_json::Value::Null,
18457 snippets: Vec::new(),
18458 })
18459 .collect();
18460
18461 let conversation = Conversation {
18462 id: None,
18463 agent_slug: "codex".into(),
18464 workspace: Some(PathBuf::from("/tmp/workspace")),
18465 external_id: Some("large-reprocess-session".into()),
18466 title: Some("Large Reprocess Session".into()),
18467 source_path: PathBuf::from("/tmp/large-reprocess-session.jsonl"),
18468 started_at: Some(1_700_000_000_000),
18469 ended_at: Some(1_700_000_000_000 + MESSAGE_COUNT - 1),
18470 approx_tokens: None,
18471 metadata_json: serde_json::Value::Null,
18472 messages,
18473 source_id: "local".into(),
18474 origin_host: None,
18475 };
18476
18477 let first = storage
18478 .insert_conversations_batched(&[(agent_id, None, &conversation)])
18479 .unwrap();
18480 let second = storage
18481 .insert_conversations_batched(&[(agent_id, None, &conversation)])
18482 .unwrap();
18483
18484 assert_eq!(first.len(), 1);
18485 assert_eq!(second.len(), 1);
18486 assert_eq!(first[0].inserted_indices.len(), MESSAGE_COUNT as usize);
18487 assert!(
18488 second[0].inserted_indices.is_empty(),
18489 "full reprocessing of a large conversation must not attempt duplicate idx inserts"
18490 );
18491 assert_eq!(first[0].conversation_id, second[0].conversation_id);
18492
18493 let conversation_count: i64 = storage
18494 .conn
18495 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
18496 row.get_typed(0)
18497 })
18498 .unwrap();
18499 let message_count: i64 = storage
18500 .conn
18501 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
18502 row.get_typed(0)
18503 })
18504 .unwrap();
18505
18506 assert_eq!(conversation_count, 1);
18507 assert_eq!(message_count, MESSAGE_COUNT);
18508 }
18509
18510 #[test]
18511 fn parallel_insert_conversation_tree_keeps_unique_external_ids_distinct() {
18512 use crate::connectors::{NormalizedConversation, NormalizedMessage};
18513 use crate::indexer::persist::map_to_internal;
18514 use crate::model::types::{Agent, AgentKind};
18515 use frankensqlite::compat::{ConnectionExt, RowExt};
18516 use rand::RngExt;
18517 use rayon::prelude::*;
18518
18519 fn retryable_franken_error(err: &anyhow::Error) -> bool {
18520 err.downcast_ref::<frankensqlite::FrankenError>()
18521 .or_else(|| {
18522 err.root_cause()
18523 .downcast_ref::<frankensqlite::FrankenError>()
18524 })
18525 .is_some_and(|inner| {
18526 matches!(
18527 inner,
18528 frankensqlite::FrankenError::Busy
18529 | frankensqlite::FrankenError::BusyRecovery
18530 | frankensqlite::FrankenError::BusySnapshot { .. }
18531 | frankensqlite::FrankenError::WriteConflict { .. }
18532 | frankensqlite::FrankenError::SerializationFailure { .. }
18533 )
18534 })
18535 }
18536
18537 fn with_retry<F, T>(mut f: F) -> anyhow::Result<T>
18538 where
18539 F: FnMut() -> anyhow::Result<T>,
18540 {
18541 let mut rng = rand::rng();
18542 let mut backoff_ms = 4_u64;
18543 for attempt in 0..=24 {
18544 match f() {
18545 Ok(value) => return Ok(value),
18546 Err(err) if attempt < 24 && retryable_franken_error(&err) => {
18547 let sleep_ms = backoff_ms + rng.random_range(0..=backoff_ms);
18548 std::thread::sleep(Duration::from_millis(sleep_ms));
18549 backoff_ms = (backoff_ms * 2).min(512);
18550 }
18551 Err(err) => return Err(err),
18552 }
18553 }
18554 unreachable!("retry loop must return on success or final failure")
18555 }
18556
18557 let dir = TempDir::new().unwrap();
18558 let db_path = dir.path().join("parallel_insert_conversation_tree.db");
18559 let seed = FrankenStorage::open(&db_path).unwrap();
18560 drop(seed);
18561
18562 let conversations: Vec<NormalizedConversation> = (0..10)
18563 .map(|i| NormalizedConversation {
18564 agent_slug: format!("agent-{}", i % 3),
18565 external_id: Some(format!("conv-{i}")),
18566 title: Some(format!("Conversation {i}")),
18567 workspace: Some(PathBuf::from(format!("/ws/{i}"))),
18568 source_path: PathBuf::from(format!("/log/{i}.jsonl")),
18569 started_at: Some(1_000 + i * 100),
18570 ended_at: Some(1_000 + i * 100 + 50),
18571 metadata: serde_json::json!({}),
18572 messages: (0..3)
18573 .map(|j| NormalizedMessage {
18574 idx: j,
18575 role: if j % 2 == 0 { "user" } else { "assistant" }.to_string(),
18576 author: Some("tester".into()),
18577 created_at: Some(1_000 + i * 100 + j * 10),
18578 content: format!("parallel-distinct-test conv={i} msg={j}"),
18579 extra: serde_json::json!({}),
18580 snippets: vec![],
18581 invocations: Vec::new(),
18582 })
18583 .collect(),
18584 })
18585 .collect();
18586
18587 let mut outcomes: Vec<(String, i64, Vec<i64>)> = conversations
18588 .par_chunks(3)
18589 .map(|chunk| {
18590 let storage = FrankenStorage::open_writer(&db_path).unwrap();
18591 let mut agent_cache: HashMap<String, i64> = HashMap::new();
18592 let mut workspace_cache: HashMap<PathBuf, i64> = HashMap::new();
18593 let mut chunk_outcomes = Vec::with_capacity(chunk.len());
18594
18595 for conv in chunk {
18596 let agent_slug = conv.agent_slug.clone();
18597 let workspace = conv.workspace.clone();
18598 let external_id = conv.external_id.clone().expect("external id");
18599 let internal = map_to_internal(conv);
18600 let outcome = with_retry(|| {
18601 let agent_id = if let Some(id) = agent_cache.get(&agent_slug) {
18602 *id
18603 } else {
18604 let agent = Agent {
18605 id: None,
18606 slug: agent_slug.clone(),
18607 name: agent_slug.clone(),
18608 version: None,
18609 kind: AgentKind::Cli,
18610 };
18611 let id = storage.ensure_agent(&agent)?;
18612 agent_cache.insert(agent_slug.clone(), id);
18613 id
18614 };
18615 let workspace_id = if let Some(path) = &workspace {
18616 if let Some(id) = workspace_cache.get(path) {
18617 Some(*id)
18618 } else {
18619 let id = storage.ensure_workspace(path, None)?;
18620 workspace_cache.insert(path.clone(), id);
18621 Some(id)
18622 }
18623 } else {
18624 None
18625 };
18626 storage.insert_conversation_tree(agent_id, workspace_id, &internal)
18627 })
18628 .unwrap();
18629 chunk_outcomes.push((
18630 external_id,
18631 outcome.conversation_id,
18632 outcome.inserted_indices,
18633 ));
18634 }
18635
18636 storage.close().unwrap();
18637 chunk_outcomes
18638 })
18639 .flatten()
18640 .collect();
18641 outcomes.sort_by(|left, right| left.0.cmp(&right.0));
18642
18643 assert!(
18644 outcomes
18645 .iter()
18646 .all(|(_, _, inserted_indices)| inserted_indices == &vec![0, 1, 2]),
18647 "unique external ids must not be routed through the existing-conversation merge path: {outcomes:?}"
18648 );
18649
18650 let distinct_ids: HashSet<i64> = outcomes
18651 .iter()
18652 .map(|(_, conversation_id, _)| *conversation_id)
18653 .collect();
18654 assert_eq!(
18655 distinct_ids.len(),
18656 conversations.len(),
18657 "unique external ids must produce distinct conversation ids: {outcomes:?}"
18658 );
18659
18660 let reader = FrankenStorage::open(&db_path).unwrap();
18661 let stored_rows: Vec<(i64, String)> = reader
18662 .raw()
18663 .query_map_collect(
18664 "SELECT id, external_id FROM conversations ORDER BY id",
18665 &[],
18666 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
18667 )
18668 .unwrap();
18669 let stored_count: i64 = reader
18670 .raw()
18671 .query_row_map("SELECT COUNT(*) FROM conversations", &[], |row| {
18672 row.get_typed(0)
18673 })
18674 .unwrap();
18675
18676 assert_eq!(
18677 stored_count as usize,
18678 conversations.len(),
18679 "parallel distinct inserts must persist one row per external id; rows={stored_rows:?}; outcomes={outcomes:?}"
18680 );
18681 assert_eq!(
18682 stored_rows.len(),
18683 conversations.len(),
18684 "parallel distinct inserts must remain visible after reopening; rows={stored_rows:?}; outcomes={outcomes:?}"
18685 );
18686 }
18687
18688 #[test]
18689 fn insert_conversation_tree_merges_duplicate_external_id_with_gaps() {
18690 use crate::connectors::{NormalizedConversation, NormalizedMessage};
18691 use crate::indexer::persist::map_to_internal;
18692 use crate::model::types::{Agent, AgentKind};
18693 use std::path::PathBuf;
18694
18695 let dir = TempDir::new().unwrap();
18696 let db_path = dir.path().join("test.db");
18697 let storage = SqliteStorage::open(&db_path).unwrap();
18698
18699 let agent = Agent {
18700 id: None,
18701 slug: "codex".into(),
18702 name: "Codex".into(),
18703 version: Some("0.2.3".into()),
18704 kind: AgentKind::Cli,
18705 };
18706 let agent_id = storage.ensure_agent(&agent).unwrap();
18707
18708 let base_conv = |messages: Vec<NormalizedMessage>| NormalizedConversation {
18709 agent_slug: "codex".into(),
18710 workspace: Some(PathBuf::from("/tmp/workspace")),
18711 external_id: Some("tree-gap-session".into()),
18712 title: Some("Tree Gap Session".into()),
18713 source_path: PathBuf::from("/tmp/tree.jsonl"),
18714 started_at: Some(1_700_000_000_000),
18715 ended_at: Some(1_700_000_000_999),
18716 metadata: serde_json::Value::Null,
18717 messages,
18718 };
18719
18720 let conv_a = map_to_internal(&base_conv(vec![
18721 NormalizedMessage {
18722 idx: 2,
18723 role: "user".into(),
18724 author: None,
18725 created_at: Some(1_700_000_000_200),
18726 content: "third".into(),
18727 extra: serde_json::Value::Null,
18728 snippets: Vec::new(),
18729 invocations: Vec::new(),
18730 },
18731 NormalizedMessage {
18732 idx: 3,
18733 role: "assistant".into(),
18734 author: None,
18735 created_at: Some(1_700_000_000_300),
18736 content: "fourth".into(),
18737 extra: serde_json::Value::Null,
18738 snippets: Vec::new(),
18739 invocations: Vec::new(),
18740 },
18741 ]));
18742 let conv_b = map_to_internal(&base_conv(vec![
18743 NormalizedMessage {
18744 idx: 0,
18745 role: "user".into(),
18746 author: None,
18747 created_at: Some(1_700_000_000_000),
18748 content: "first".into(),
18749 extra: serde_json::Value::Null,
18750 snippets: Vec::new(),
18751 invocations: Vec::new(),
18752 },
18753 NormalizedMessage {
18754 idx: 1,
18755 role: "assistant".into(),
18756 author: None,
18757 created_at: Some(1_700_000_000_100),
18758 content: "second".into(),
18759 extra: serde_json::Value::Null,
18760 snippets: Vec::new(),
18761 invocations: Vec::new(),
18762 },
18763 NormalizedMessage {
18764 idx: 3,
18765 role: "assistant".into(),
18766 author: None,
18767 created_at: Some(1_700_000_000_300),
18768 content: "fourth".into(),
18769 extra: serde_json::Value::Null,
18770 snippets: Vec::new(),
18771 invocations: Vec::new(),
18772 },
18773 ]));
18774
18775 let first = storage
18776 .insert_conversation_tree(agent_id, None, &conv_a)
18777 .unwrap();
18778 let second = storage
18779 .insert_conversation_tree(agent_id, None, &conv_b)
18780 .unwrap();
18781
18782 assert_eq!(first.inserted_indices, vec![2, 3]);
18783 assert_eq!(second.inserted_indices, vec![0, 1]);
18784 assert_eq!(first.conversation_id, second.conversation_id);
18785
18786 let stored_indices: Vec<i64> = storage
18787 .conn
18788 .query_map_collect("SELECT idx FROM messages ORDER BY idx", fparams![], |row| {
18789 row.get_typed(0)
18790 })
18791 .unwrap();
18792 assert_eq!(stored_indices, vec![0, 1, 2, 3]);
18793 }
18794
18795 #[test]
18796 fn insert_conversation_tree_skips_duplicate_message_indices_for_new_conversation() {
18797 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18798 use std::path::PathBuf;
18799
18800 let dir = TempDir::new().unwrap();
18801 let db_path = dir.path().join("test.db");
18802 let storage = SqliteStorage::open(&db_path).unwrap();
18803
18804 let agent = Agent {
18805 id: None,
18806 slug: "codex".into(),
18807 name: "Codex".into(),
18808 version: Some("0.2.3".into()),
18809 kind: AgentKind::Cli,
18810 };
18811 let agent_id = storage.ensure_agent(&agent).unwrap();
18812
18813 let conversation = Conversation {
18814 id: None,
18815 agent_slug: "codex".into(),
18816 workspace: Some(PathBuf::from("/tmp/workspace")),
18817 external_id: Some("duplicate-new-session".into()),
18818 title: Some("Duplicate New Session".into()),
18819 source_path: PathBuf::from("/tmp/duplicate-new-session.jsonl"),
18820 started_at: Some(1_700_000_000_000),
18821 ended_at: Some(1_700_000_000_999),
18822 approx_tokens: None,
18823 metadata_json: serde_json::Value::Null,
18824 messages: vec![
18825 Message {
18826 id: None,
18827 idx: 0,
18828 role: MessageRole::User,
18829 author: None,
18830 created_at: Some(1_700_000_000_000),
18831 content: "first canonical".into(),
18832 extra_json: serde_json::Value::Null,
18833 snippets: Vec::new(),
18834 },
18835 Message {
18836 id: None,
18837 idx: 0,
18838 role: MessageRole::User,
18839 author: None,
18840 created_at: Some(1_700_000_000_001),
18841 content: "duplicate idx should be skipped".into(),
18842 extra_json: serde_json::Value::Null,
18843 snippets: Vec::new(),
18844 },
18845 Message {
18846 id: None,
18847 idx: 1,
18848 role: MessageRole::Agent,
18849 author: None,
18850 created_at: Some(1_700_000_000_100),
18851 content: "second".into(),
18852 extra_json: serde_json::Value::Null,
18853 snippets: Vec::new(),
18854 },
18855 ],
18856 source_id: "local".into(),
18857 origin_host: None,
18858 };
18859
18860 let outcome = storage
18861 .insert_conversation_tree(agent_id, None, &conversation)
18862 .unwrap();
18863
18864 assert_eq!(outcome.inserted_indices, vec![0, 1]);
18865
18866 let stored_messages: Vec<(i64, String)> = storage
18867 .conn
18868 .query_map_collect(
18869 "SELECT idx, content FROM messages ORDER BY idx",
18870 fparams![],
18871 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
18872 )
18873 .unwrap();
18874 assert_eq!(
18875 stored_messages,
18876 vec![
18877 (0, "first canonical".to_string()),
18878 (1, "second".to_string())
18879 ]
18880 );
18881 }
18882
18883 #[test]
18884 fn insert_conversation_tree_merges_duplicate_source_path_without_external_id() {
18885 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18886 use std::path::PathBuf;
18887
18888 let dir = TempDir::new().unwrap();
18889 let db_path = dir.path().join("test.db");
18890 let storage = SqliteStorage::open(&db_path).unwrap();
18891
18892 let agent = Agent {
18893 id: None,
18894 slug: "codex".into(),
18895 name: "Codex".into(),
18896 version: Some("0.2.3".into()),
18897 kind: AgentKind::Cli,
18898 };
18899 let agent_id = storage.ensure_agent(&agent).unwrap();
18900
18901 let base_conv = |messages: Vec<Message>| Conversation {
18902 id: None,
18903 agent_slug: "codex".into(),
18904 workspace: Some(PathBuf::from("/tmp/workspace")),
18905 external_id: None,
18906 title: Some("Source Path Merge".into()),
18907 source_path: PathBuf::from("/tmp/shared-session.jsonl"),
18908 started_at: Some(1_700_000_000_000),
18909 ended_at: Some(1_700_000_000_999),
18910 approx_tokens: None,
18911 metadata_json: serde_json::Value::Null,
18912 messages,
18913 source_id: "local".into(),
18914 origin_host: None,
18915 };
18916
18917 let first = storage
18918 .insert_conversation_tree(
18919 agent_id,
18920 None,
18921 &base_conv(vec![
18922 Message {
18923 id: None,
18924 idx: 0,
18925 role: MessageRole::User,
18926 author: None,
18927 created_at: Some(1_700_000_000_000),
18928 content: "first".into(),
18929 extra_json: serde_json::Value::Null,
18930 snippets: Vec::new(),
18931 },
18932 Message {
18933 id: None,
18934 idx: 1,
18935 role: MessageRole::Agent,
18936 author: None,
18937 created_at: Some(1_700_000_000_100),
18938 content: "second".into(),
18939 extra_json: serde_json::Value::Null,
18940 snippets: Vec::new(),
18941 },
18942 ]),
18943 )
18944 .unwrap();
18945
18946 let second = storage
18947 .insert_conversation_tree(
18948 agent_id,
18949 None,
18950 &base_conv(vec![
18951 Message {
18952 id: None,
18953 idx: 1,
18954 role: MessageRole::Agent,
18955 author: None,
18956 created_at: Some(1_700_000_000_100),
18957 content: "second".into(),
18958 extra_json: serde_json::Value::Null,
18959 snippets: Vec::new(),
18960 },
18961 Message {
18962 id: None,
18963 idx: 2,
18964 role: MessageRole::User,
18965 author: None,
18966 created_at: Some(1_700_000_000_200),
18967 content: "third".into(),
18968 extra_json: serde_json::Value::Null,
18969 snippets: Vec::new(),
18970 },
18971 ]),
18972 )
18973 .unwrap();
18974
18975 assert_eq!(first.conversation_id, second.conversation_id);
18976 assert_eq!(first.inserted_indices, vec![0, 1]);
18977 assert_eq!(second.inserted_indices, vec![2]);
18978
18979 let stored_indices: Vec<i64> = storage
18980 .conn
18981 .query_map_collect("SELECT idx FROM messages ORDER BY idx", fparams![], |row| {
18982 row.get_typed(0)
18983 })
18984 .unwrap();
18985 assert_eq!(stored_indices, vec![0, 1, 2]);
18986 }
18987
18988 #[test]
18989 fn insert_conversation_tree_merges_source_path_duplicates_with_start_drift() {
18990 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18991 use std::path::PathBuf;
18992
18993 let dir = TempDir::new().unwrap();
18994 let db_path = dir.path().join("test.db");
18995 let storage = SqliteStorage::open(&db_path).unwrap();
18996
18997 let agent = Agent {
18998 id: None,
18999 slug: "codex".into(),
19000 name: "Codex".into(),
19001 version: Some("0.2.3".into()),
19002 kind: AgentKind::Cli,
19003 };
19004 let agent_id = storage.ensure_agent(&agent).unwrap();
19005
19006 let base_conv = |started_at: Option<i64>, messages: Vec<Message>| Conversation {
19007 id: None,
19008 agent_slug: "codex".into(),
19009 workspace: Some(PathBuf::from("/tmp/workspace")),
19010 external_id: None,
19011 title: Some("Drift Merge".into()),
19012 source_path: PathBuf::from("/tmp/drift-session.jsonl"),
19013 started_at,
19014 ended_at: Some(1_700_000_000_999),
19015 approx_tokens: None,
19016 metadata_json: serde_json::Value::Null,
19017 messages,
19018 source_id: "local".into(),
19019 origin_host: None,
19020 };
19021
19022 let first = storage
19023 .insert_conversation_tree(
19024 agent_id,
19025 None,
19026 &base_conv(
19027 Some(1_700_000_000_000),
19028 vec![
19029 Message {
19030 id: None,
19031 idx: 0,
19032 role: MessageRole::User,
19033 author: None,
19034 created_at: Some(1_700_000_000_000),
19035 content: "first".into(),
19036 extra_json: serde_json::Value::Null,
19037 snippets: Vec::new(),
19038 },
19039 Message {
19040 id: None,
19041 idx: 1,
19042 role: MessageRole::Agent,
19043 author: None,
19044 created_at: Some(1_700_000_000_100),
19045 content: "second".into(),
19046 extra_json: serde_json::Value::Null,
19047 snippets: Vec::new(),
19048 },
19049 ],
19050 ),
19051 )
19052 .unwrap();
19053
19054 let second = storage
19055 .insert_conversation_tree(
19056 agent_id,
19057 None,
19058 &base_conv(
19059 Some(1_700_000_004_000),
19060 vec![
19061 Message {
19062 id: None,
19063 idx: 1,
19064 role: MessageRole::Agent,
19065 author: None,
19066 created_at: Some(1_700_000_000_100),
19067 content: "second".into(),
19068 extra_json: serde_json::Value::Null,
19069 snippets: Vec::new(),
19070 },
19071 Message {
19072 id: None,
19073 idx: 2,
19074 role: MessageRole::User,
19075 author: None,
19076 created_at: Some(1_700_000_004_200),
19077 content: "third".into(),
19078 extra_json: serde_json::Value::Null,
19079 snippets: Vec::new(),
19080 },
19081 ],
19082 ),
19083 )
19084 .unwrap();
19085
19086 assert_eq!(first.conversation_id, second.conversation_id);
19087 assert_eq!(second.inserted_indices, vec![2]);
19088 }
19089
19090 #[test]
19091 fn insert_conversation_tree_keeps_single_message_overlap_sessions_separate() {
19092 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19093 use std::path::PathBuf;
19094
19095 let dir = TempDir::new().unwrap();
19096 let db_path = dir.path().join("test.db");
19097 let storage = SqliteStorage::open(&db_path).unwrap();
19098
19099 let agent = Agent {
19100 id: None,
19101 slug: "codex".into(),
19102 name: "Codex".into(),
19103 version: Some("0.2.3".into()),
19104 kind: AgentKind::Cli,
19105 };
19106 let agent_id = storage.ensure_agent(&agent).unwrap();
19107
19108 let make_conv = |started_at: i64, idx: i64, content: &str| Conversation {
19109 id: None,
19110 agent_slug: "codex".into(),
19111 workspace: Some(PathBuf::from("/tmp/workspace")),
19112 external_id: None,
19113 title: Some("Partial overlap".into()),
19114 source_path: PathBuf::from("/tmp/reused-session.jsonl"),
19115 started_at: Some(started_at),
19116 ended_at: Some(started_at + 500),
19117 approx_tokens: None,
19118 metadata_json: serde_json::Value::Null,
19119 messages: vec![Message {
19120 id: None,
19121 idx,
19122 role: MessageRole::User,
19123 author: None,
19124 created_at: Some(started_at),
19125 content: content.into(),
19126 extra_json: serde_json::Value::Null,
19127 snippets: Vec::new(),
19128 }],
19129 source_id: "local".into(),
19130 origin_host: None,
19131 };
19132
19133 storage
19134 .insert_conversation_tree(
19135 agent_id,
19136 None,
19137 &Conversation {
19138 messages: vec![
19139 Message {
19140 id: None,
19141 idx: 0,
19142 role: MessageRole::User,
19143 author: None,
19144 created_at: Some(1_700_000_000_000),
19145 content: "shared opener".into(),
19146 extra_json: serde_json::Value::Null,
19147 snippets: Vec::new(),
19148 },
19149 Message {
19150 id: None,
19151 idx: 1,
19152 role: MessageRole::Agent,
19153 author: None,
19154 created_at: Some(1_700_000_000_100),
19155 content: "first session unique".into(),
19156 extra_json: serde_json::Value::Null,
19157 snippets: Vec::new(),
19158 },
19159 ],
19160 ..make_conv(1_700_000_000_000, 0, "unused")
19161 },
19162 )
19163 .unwrap();
19164 storage
19165 .insert_conversation_tree(
19166 agent_id,
19167 None,
19168 &make_conv(1_700_000_900_000, 0, "shared opener"),
19169 )
19170 .unwrap();
19171
19172 let conversation_count: i64 = storage
19173 .conn
19174 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
19175 row.get_typed(0)
19176 })
19177 .unwrap();
19178 assert_eq!(conversation_count, 2);
19179 }
19180
19181 #[test]
19182 fn insert_conversation_tree_keeps_distinct_source_path_sessions_separate() {
19183 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19184 use std::path::PathBuf;
19185
19186 let dir = TempDir::new().unwrap();
19187 let db_path = dir.path().join("test.db");
19188 let storage = SqliteStorage::open(&db_path).unwrap();
19189
19190 let agent = Agent {
19191 id: None,
19192 slug: "codex".into(),
19193 name: "Codex".into(),
19194 version: Some("0.2.3".into()),
19195 kind: AgentKind::Cli,
19196 };
19197 let agent_id = storage.ensure_agent(&agent).unwrap();
19198
19199 let make_conv = |started_at: i64, created_at: i64, content: &str| Conversation {
19200 id: None,
19201 agent_slug: "codex".into(),
19202 workspace: Some(PathBuf::from("/tmp/workspace")),
19203 external_id: None,
19204 title: Some("Same Path Different Session".into()),
19205 source_path: PathBuf::from("/tmp/reused-session.jsonl"),
19206 started_at: Some(started_at),
19207 ended_at: Some(started_at + 500),
19208 approx_tokens: None,
19209 metadata_json: serde_json::Value::Null,
19210 messages: vec![Message {
19211 id: None,
19212 idx: 0,
19213 role: MessageRole::User,
19214 author: None,
19215 created_at: Some(created_at),
19216 content: content.into(),
19217 extra_json: serde_json::Value::Null,
19218 snippets: Vec::new(),
19219 }],
19220 source_id: "local".into(),
19221 origin_host: None,
19222 };
19223
19224 storage
19225 .insert_conversation_tree(
19226 agent_id,
19227 None,
19228 &make_conv(1_700_000_000_000, 1_700_000_000_000, "first session"),
19229 )
19230 .unwrap();
19231 storage
19232 .insert_conversation_tree(
19233 agent_id,
19234 None,
19235 &make_conv(1_700_000_900_000, 1_700_000_900_000, "second session"),
19236 )
19237 .unwrap();
19238
19239 let conversation_count: i64 = storage
19240 .conn
19241 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
19242 row.get_typed(0)
19243 })
19244 .unwrap();
19245 assert_eq!(conversation_count, 2);
19246 }
19247
19248 #[test]
19249 fn insert_conversation_tree_merges_replay_equivalent_messages_with_shifted_idx() {
19250 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19251 use std::path::PathBuf;
19252
19253 let dir = TempDir::new().unwrap();
19254 let db_path = dir.path().join("test.db");
19255 let storage = SqliteStorage::open(&db_path).unwrap();
19256
19257 let agent = Agent {
19258 id: None,
19259 slug: "codex".into(),
19260 name: "Codex".into(),
19261 version: Some("0.2.3".into()),
19262 kind: AgentKind::Cli,
19263 };
19264 let agent_id = storage.ensure_agent(&agent).unwrap();
19265
19266 let make_conv = |started_at: i64, messages: Vec<Message>| Conversation {
19267 id: None,
19268 agent_slug: "codex".into(),
19269 workspace: Some(PathBuf::from("/tmp/workspace")),
19270 external_id: None,
19271 title: Some("Shifted replay".into()),
19272 source_path: PathBuf::from("/tmp/replay-session.jsonl"),
19273 started_at: Some(started_at),
19274 ended_at: Some(started_at + 500),
19275 approx_tokens: None,
19276 metadata_json: serde_json::Value::Null,
19277 messages,
19278 source_id: "local".into(),
19279 origin_host: None,
19280 };
19281
19282 let first = storage
19283 .insert_conversation_tree(
19284 agent_id,
19285 None,
19286 &make_conv(
19287 1_700_000_000_000,
19288 vec![
19289 Message {
19290 id: None,
19291 idx: 0,
19292 role: MessageRole::User,
19293 author: None,
19294 created_at: Some(1_700_000_000_000),
19295 content: "first".into(),
19296 extra_json: serde_json::Value::Null,
19297 snippets: Vec::new(),
19298 },
19299 Message {
19300 id: None,
19301 idx: 1,
19302 role: MessageRole::Agent,
19303 author: None,
19304 created_at: Some(1_700_000_000_100),
19305 content: "second".into(),
19306 extra_json: serde_json::Value::Null,
19307 snippets: Vec::new(),
19308 },
19309 ],
19310 ),
19311 )
19312 .unwrap();
19313
19314 let second = storage
19315 .insert_conversation_tree(
19316 agent_id,
19317 None,
19318 &make_conv(
19319 1_700_000_900_000,
19320 vec![
19321 Message {
19322 id: None,
19323 idx: 10,
19324 role: MessageRole::User,
19325 author: None,
19326 created_at: Some(1_700_000_000_000),
19327 content: "first".into(),
19328 extra_json: serde_json::Value::Null,
19329 snippets: Vec::new(),
19330 },
19331 Message {
19332 id: None,
19333 idx: 11,
19334 role: MessageRole::Agent,
19335 author: None,
19336 created_at: Some(1_700_000_000_100),
19337 content: "second".into(),
19338 extra_json: serde_json::Value::Null,
19339 snippets: Vec::new(),
19340 },
19341 Message {
19342 id: None,
19343 idx: 12,
19344 role: MessageRole::User,
19345 author: None,
19346 created_at: Some(1_700_000_000_200),
19347 content: "third".into(),
19348 extra_json: serde_json::Value::Null,
19349 snippets: Vec::new(),
19350 },
19351 ],
19352 ),
19353 )
19354 .unwrap();
19355
19356 assert_eq!(first.conversation_id, second.conversation_id);
19357 assert_eq!(second.inserted_indices, vec![12]);
19358
19359 let stored_indices: Vec<i64> = storage
19360 .conn
19361 .query_map_collect(
19362 "SELECT idx FROM messages WHERE conversation_id = ?1 ORDER BY idx",
19363 fparams![first.conversation_id],
19364 |row| row.get_typed(0),
19365 )
19366 .unwrap();
19367 assert_eq!(stored_indices, vec![0, 1, 12]);
19368 }
19369
19370 #[test]
19371 fn salvage_historical_databases_imports_backups_once_and_merges_overlap() {
19372 use crate::model::types::{Conversation, Message, MessageRole};
19373 use std::path::PathBuf;
19374
19375 fn base_conv(source_path: &str, messages: Vec<Message>) -> Conversation {
19376 Conversation {
19377 id: None,
19378 agent_slug: "codex".into(),
19379 workspace: Some(PathBuf::from("/tmp/workspace")),
19380 external_id: None,
19381 title: Some("Recovered".into()),
19382 source_path: PathBuf::from(source_path),
19383 started_at: Some(1_700_000_000_000),
19384 ended_at: Some(1_700_000_000_999),
19385 approx_tokens: None,
19386 metadata_json: serde_json::Value::Null,
19387 messages,
19388 source_id: "local".into(),
19389 origin_host: None,
19390 }
19391 }
19392
19393 let dir = TempDir::new().unwrap();
19394 let canonical_db = dir.path().join("agent_search.db");
19395 let storage = SqliteStorage::open(&canonical_db).unwrap();
19396
19397 let overlapping_a = base_conv(
19398 "/tmp/shared-history.jsonl",
19399 vec![
19400 Message {
19401 id: None,
19402 idx: 0,
19403 role: MessageRole::User,
19404 author: None,
19405 created_at: Some(1_700_000_000_000),
19406 content: "first".into(),
19407 extra_json: serde_json::Value::Null,
19408 snippets: Vec::new(),
19409 },
19410 Message {
19411 id: None,
19412 idx: 1,
19413 role: MessageRole::Agent,
19414 author: None,
19415 created_at: Some(1_700_000_000_100),
19416 content: "second".into(),
19417 extra_json: serde_json::Value::Null,
19418 snippets: Vec::new(),
19419 },
19420 ],
19421 );
19422 let overlapping_b = base_conv(
19423 "/tmp/shared-history.jsonl",
19424 vec![
19425 Message {
19426 id: None,
19427 idx: 1,
19428 role: MessageRole::Agent,
19429 author: None,
19430 created_at: Some(1_700_000_000_100),
19431 content: "second".into(),
19432 extra_json: serde_json::Value::Null,
19433 snippets: Vec::new(),
19434 },
19435 Message {
19436 id: None,
19437 idx: 2,
19438 role: MessageRole::User,
19439 author: None,
19440 created_at: Some(1_700_000_000_200),
19441 content: "third".into(),
19442 extra_json: serde_json::Value::Null,
19443 snippets: Vec::new(),
19444 },
19445 ],
19446 );
19447 let unique = Conversation {
19448 source_path: PathBuf::from("/tmp/unique-history.jsonl"),
19449 messages: vec![Message {
19450 id: None,
19451 idx: 0,
19452 role: MessageRole::User,
19453 author: None,
19454 created_at: Some(1_700_000_001_000),
19455 content: "unique".into(),
19456 extra_json: serde_json::Value::Null,
19457 snippets: Vec::new(),
19458 }],
19459 started_at: Some(1_700_000_001_000),
19460 ended_at: Some(1_700_000_001_100),
19461 ..base_conv("/tmp/unique-history.jsonl", Vec::new())
19462 };
19463
19464 seed_historical_db_direct(
19465 &dir.path()
19466 .join("backups/agent_search.db.20260322T020200.bak"),
19467 std::slice::from_ref(&overlapping_a),
19468 );
19469 seed_historical_db_direct(
19470 &dir.path().join("agent_search.corrupt.20260324_212907"),
19471 &[overlapping_b, unique],
19472 );
19473
19474 let first = storage.salvage_historical_databases(&canonical_db).unwrap();
19475 assert_eq!(first.bundles_considered, 2);
19476 assert_eq!(first.bundles_imported, 2);
19477 assert_eq!(first.messages_imported, 4);
19478
19479 let conversations = storage.list_conversations(10, 0).unwrap();
19480 assert_eq!(conversations.len(), 2);
19481
19482 let shared_id = conversations
19483 .iter()
19484 .find(|conv| conv.source_path == std::path::Path::new("/tmp/shared-history.jsonl"))
19485 .and_then(|conv| conv.id)
19486 .unwrap();
19487 let shared_indices: Vec<i64> = storage
19488 .fetch_messages(shared_id)
19489 .unwrap()
19490 .into_iter()
19491 .map(|msg| msg.idx)
19492 .collect();
19493 assert_eq!(shared_indices, vec![0, 1, 2]);
19494
19495 let second = storage.salvage_historical_databases(&canonical_db).unwrap();
19496 assert_eq!(second.bundles_imported, 0);
19497 assert_eq!(second.messages_imported, 0);
19498 }
19499
19500 #[test]
19501 fn salvage_historical_databases_normalizes_host_only_remote_provenance() {
19502 use crate::model::types::{Conversation, Message, MessageRole};
19503 use std::path::PathBuf;
19504
19505 let dir = TempDir::new().unwrap();
19506 let canonical_db = dir.path().join("agent_search.db");
19507 let storage = SqliteStorage::open(&canonical_db).unwrap();
19508
19509 let host_only_remote = Conversation {
19510 id: None,
19511 agent_slug: "codex".into(),
19512 workspace: Some(PathBuf::from("/tmp/workspace")),
19513 external_id: None,
19514 title: Some("Recovered Host Only Remote".into()),
19515 source_path: PathBuf::from("/tmp/host-only-history.jsonl"),
19516 started_at: Some(1_700_000_000_000),
19517 ended_at: Some(1_700_000_000_999),
19518 approx_tokens: None,
19519 metadata_json: serde_json::Value::Null,
19520 messages: vec![Message {
19521 id: None,
19522 idx: 0,
19523 role: MessageRole::User,
19524 author: None,
19525 created_at: Some(1_700_000_000_000),
19526 content: "host-only remote".into(),
19527 extra_json: serde_json::Value::Null,
19528 snippets: Vec::new(),
19529 }],
19530 source_id: " ".into(),
19531 origin_host: Some("builder-5".into()),
19532 };
19533
19534 let historical_db = dir
19535 .path()
19536 .join("backups/agent_search.db.20260322T020200.bak");
19537 seed_historical_db_direct(&historical_db, std::slice::from_ref(&host_only_remote));
19538
19539 let historical_conn =
19540 FrankenConnection::open(historical_db.to_string_lossy().into_owned()).unwrap();
19541 historical_conn
19542 .execute_compat(
19543 "INSERT INTO sources(id, kind, host_label, created_at, updated_at) VALUES(?1, ?2, ?3, ?4, ?5)",
19544 fparams![" ", "ssh", "builder-5", 0_i64, 0_i64],
19545 )
19546 .unwrap();
19547 historical_conn
19548 .execute_compat(
19549 "UPDATE conversations SET source_id = ?1, origin_host = ?2 WHERE source_path = ?3",
19550 fparams![" ", "builder-5", "/tmp/host-only-history.jsonl"],
19551 )
19552 .unwrap();
19553 historical_conn
19554 .execute_compat("DELETE FROM sources WHERE id = ?1", fparams!["builder-5"])
19555 .unwrap();
19556 drop(historical_conn);
19557
19558 let first = storage.salvage_historical_databases(&canonical_db).unwrap();
19559 assert_eq!(first.bundles_imported, 1);
19560 assert_eq!(first.messages_imported, 1);
19561
19562 let source_ids = storage.get_source_ids().unwrap();
19563 assert_eq!(source_ids, vec!["builder-5".to_string()]);
19564
19565 let conversations = storage.list_conversations(10, 0).unwrap();
19566 assert_eq!(conversations.len(), 1);
19567 assert_eq!(conversations[0].source_id, "builder-5");
19568 assert_eq!(conversations[0].origin_host.as_deref(), Some("builder-5"));
19569 }
19570
19571 #[test]
19572 fn historical_salvage_retry_splits_single_conversation_until_it_fits() {
19573 use crate::model::types::{Conversation, Message, MessageRole};
19574 use std::path::PathBuf;
19575
19576 let mut attempts: Vec<Vec<usize>> = Vec::new();
19577 let entry = HistoricalBatchEntry {
19578 source_row_id: 77,
19579 agent_id: 1,
19580 workspace_id: None,
19581 conversation: Conversation {
19582 id: None,
19583 agent_slug: "gemini".into(),
19584 workspace: Some(PathBuf::from("/tmp/workspace")),
19585 external_id: Some("conv-77".into()),
19586 title: Some("Large recovered conversation".into()),
19587 source_path: PathBuf::from("/tmp/history.jsonl"),
19588 started_at: Some(1_700_000_000_000),
19589 ended_at: Some(1_700_000_000_999),
19590 approx_tokens: None,
19591 metadata_json: serde_json::Value::Null,
19592 messages: (0..4)
19593 .map(|idx| Message {
19594 id: None,
19595 idx,
19596 role: MessageRole::User,
19597 author: None,
19598 created_at: Some(1_700_000_000_000 + idx),
19599 content: format!("message-{idx}"),
19600 extra_json: serde_json::Value::Null,
19601 snippets: Vec::new(),
19602 })
19603 .collect(),
19604 source_id: LOCAL_SOURCE_ID.into(),
19605 origin_host: None,
19606 },
19607 };
19608
19609 let totals = SqliteStorage::import_historical_batch_with_retry(
19610 std::slice::from_ref(&entry),
19611 &mut |batch| {
19612 attempts.push(
19613 batch
19614 .iter()
19615 .map(|entry| entry.conversation.messages.len())
19616 .collect(),
19617 );
19618 let total_messages: usize = batch
19619 .iter()
19620 .map(|entry| entry.conversation.messages.len())
19621 .sum();
19622 if total_messages > 1 {
19623 Err(anyhow!("out of memory"))
19624 } else {
19625 Ok(HistoricalBatchImportTotals {
19626 inserted_source_rows: batch.len(),
19627 inserted_messages: total_messages,
19628 })
19629 }
19630 },
19631 )
19632 .unwrap();
19633
19634 assert_eq!(
19635 totals,
19636 HistoricalBatchImportTotals {
19637 inserted_source_rows: 1,
19638 inserted_messages: 4,
19639 }
19640 );
19641 assert_eq!(attempts.first().cloned(), Some(vec![4]));
19642 assert!(
19643 attempts.iter().filter(|sizes| sizes == &&vec![1]).count() >= 4,
19644 "expected recursive fallback to reach one-message slices"
19645 );
19646 }
19647
19648 #[test]
19649 fn salvage_historical_databases_resumes_from_progress_checkpoint() {
19650 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19651 use std::path::PathBuf;
19652
19653 fn make_conv(source_path: &str, idx_seed: i64) -> Conversation {
19654 Conversation {
19655 id: None,
19656 agent_slug: "codex".into(),
19657 workspace: Some(PathBuf::from("/tmp/workspace")),
19658 external_id: Some(format!("conv-{idx_seed}")),
19659 title: Some(format!("Recovered {idx_seed}")),
19660 source_path: PathBuf::from(source_path),
19661 started_at: Some(1_700_000_000_000 + idx_seed),
19662 ended_at: Some(1_700_000_000_100 + idx_seed),
19663 approx_tokens: None,
19664 metadata_json: serde_json::Value::Null,
19665 messages: vec![Message {
19666 id: None,
19667 idx: 0,
19668 role: MessageRole::User,
19669 author: None,
19670 created_at: Some(1_700_000_000_000 + idx_seed),
19671 content: format!("message-{idx_seed}"),
19672 extra_json: serde_json::Value::Null,
19673 snippets: Vec::new(),
19674 }],
19675 source_id: LOCAL_SOURCE_ID.into(),
19676 origin_host: None,
19677 }
19678 }
19679
19680 let dir = TempDir::new().unwrap();
19681 let canonical_db = dir.path().join("agent_search.db");
19682 let backup_db = dir
19683 .path()
19684 .join("backups/agent_search.db.20260322T020200.bak");
19685 let storage = SqliteStorage::open(&canonical_db).unwrap();
19686 let conv_a = make_conv("/tmp/one.jsonl", 1);
19687 let conv_b = make_conv("/tmp/two.jsonl", 2);
19688 let conv_c = make_conv("/tmp/three.jsonl", 3);
19689 seed_historical_db_direct(
19690 &backup_db,
19691 &[conv_a.clone(), conv_b.clone(), conv_c.clone()],
19692 );
19693
19694 let agent = Agent {
19695 id: None,
19696 slug: "codex".into(),
19697 name: "Codex".into(),
19698 version: Some("0.2.3".into()),
19699 kind: AgentKind::Cli,
19700 };
19701 let agent_id = storage.ensure_agent(&agent).unwrap();
19702 storage
19703 .insert_conversation_tree(agent_id, None, &conv_a)
19704 .unwrap();
19705
19706 let bundle = discover_historical_database_bundles(&canonical_db)
19707 .into_iter()
19708 .find(|bundle| bundle.root_path == backup_db)
19709 .unwrap();
19710 let first_row_id: i64 = FrankenConnection::open(backup_db.to_string_lossy().into_owned())
19711 .unwrap()
19712 .query_row_map(
19713 "SELECT id FROM conversations WHERE source_path = ?1",
19714 fparams!["/tmp/one.jsonl"],
19715 |row| row.get_typed(0),
19716 )
19717 .unwrap();
19718 storage
19719 .record_historical_bundle_progress(&bundle, "direct-readonly", first_row_id, 50, 99)
19720 .unwrap();
19721
19722 let outcome = storage.salvage_historical_databases(&canonical_db).unwrap();
19723 assert_eq!(outcome.bundles_imported, 1);
19724 assert_eq!(outcome.conversations_imported, 52);
19725 assert_eq!(outcome.messages_imported, 101);
19726 assert_eq!(storage.list_conversations(10, 0).unwrap().len(), 3);
19727
19728 let progress_key = SqliteStorage::historical_bundle_progress_key(&bundle);
19729 let progress_left: Option<String> = storage
19730 .conn
19731 .query_row_map(
19732 "SELECT value FROM meta WHERE key = ?1",
19733 fparams![progress_key.as_str()],
19734 |row| row.get_typed(0),
19735 )
19736 .optional()
19737 .unwrap();
19738 assert!(
19739 progress_left.is_none(),
19740 "completed salvage should clear bundle progress"
19741 );
19742
19743 let second = storage.salvage_historical_databases(&canonical_db).unwrap();
19744 assert_eq!(second.bundles_imported, 0);
19745 assert_eq!(second.messages_imported, 0);
19746 }
19747
19748 #[test]
19749 fn salvage_historical_databases_skips_bundle_when_checkpoint_covers_backup() {
19750 use crate::model::types::{Conversation, Message, MessageRole};
19756 use std::path::PathBuf;
19757
19758 fn make_conv(source_path: &str, idx_seed: i64) -> Conversation {
19759 Conversation {
19760 id: None,
19761 agent_slug: "codex".into(),
19762 workspace: Some(PathBuf::from("/tmp/workspace")),
19763 external_id: Some(format!("conv-{idx_seed}")),
19764 title: Some(format!("Recovered {idx_seed}")),
19765 source_path: PathBuf::from(source_path),
19766 started_at: Some(1_700_000_000_000 + idx_seed),
19767 ended_at: Some(1_700_000_000_100 + idx_seed),
19768 approx_tokens: None,
19769 metadata_json: serde_json::Value::Null,
19770 messages: vec![Message {
19771 id: None,
19772 idx: 0,
19773 role: MessageRole::User,
19774 author: None,
19775 created_at: Some(1_700_000_000_000 + idx_seed),
19776 content: format!("message-{idx_seed}"),
19777 extra_json: serde_json::Value::Null,
19778 snippets: Vec::new(),
19779 }],
19780 source_id: LOCAL_SOURCE_ID.into(),
19781 origin_host: None,
19782 }
19783 }
19784
19785 let dir = TempDir::new().unwrap();
19786 let canonical_db = dir.path().join("agent_search.db");
19787 let backup_db = dir
19788 .path()
19789 .join("backups/agent_search.db.20260322T020200.bak");
19790 let storage = SqliteStorage::open(&canonical_db).unwrap();
19791 seed_historical_db_direct(
19792 &backup_db,
19793 &[
19794 make_conv("/tmp/one.jsonl", 1),
19795 make_conv("/tmp/two.jsonl", 2),
19796 make_conv("/tmp/three.jsonl", 3),
19797 ],
19798 );
19799
19800 let bundle = discover_historical_database_bundles(&canonical_db)
19801 .into_iter()
19802 .find(|bundle| bundle.root_path == backup_db)
19803 .unwrap();
19804
19805 let backup_max_id: i64 = FrankenConnection::open(backup_db.to_string_lossy().into_owned())
19807 .unwrap()
19808 .query_row_map(
19809 "SELECT COALESCE(MAX(id), 0) FROM conversations",
19810 fparams![],
19811 |row| row.get_typed(0),
19812 )
19813 .unwrap();
19814 assert!(backup_max_id > 0, "seeded backup should have conversations");
19815 storage
19816 .record_historical_bundle_progress(&bundle, "direct-readonly", backup_max_id, 3, 3)
19817 .unwrap();
19818
19819 let outcome = storage.salvage_historical_databases(&canonical_db).unwrap();
19820 assert_eq!(
19821 outcome.bundles_imported, 0,
19822 "fully-checkpointed bundle must not be re-scanned"
19823 );
19824 assert_eq!(outcome.conversations_imported, 0);
19825 assert_eq!(outcome.messages_imported, 0);
19826 assert_eq!(
19827 storage.list_conversations(10, 0).unwrap().len(),
19828 0,
19829 "skip path must not import anything"
19830 );
19831 assert!(
19832 storage.historical_bundle_already_imported(&bundle).unwrap(),
19833 "skipped bundle must be ledgered as salvaged so future runs short-circuit"
19834 );
19835
19836 let progress_key = SqliteStorage::historical_bundle_progress_key(&bundle);
19837 let progress_left: Option<String> = storage
19838 .conn
19839 .query_row_map(
19840 "SELECT value FROM meta WHERE key = ?1",
19841 fparams![progress_key.as_str()],
19842 |row| row.get_typed(0),
19843 )
19844 .optional()
19845 .unwrap();
19846 assert!(
19847 progress_left.is_none(),
19848 "skip path must clear the bundle progress checkpoint"
19849 );
19850 }
19851
19852 #[test]
19853 fn list_conversations_for_lexical_rebuild_uses_stable_id_order() {
19854 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19855 use std::path::PathBuf;
19856
19857 let dir = TempDir::new().unwrap();
19858 let db_path = dir.path().join("agent_search.db");
19859 let storage = SqliteStorage::open(&db_path).unwrap();
19860 let agent = Agent {
19861 id: None,
19862 slug: "codex".into(),
19863 name: "Codex".into(),
19864 version: Some("0.2.3".into()),
19865 kind: AgentKind::Cli,
19866 };
19867 let agent_id = storage.ensure_agent(&agent).unwrap();
19868
19869 let make_conv = |source_path: &str, started_at: i64| Conversation {
19870 id: None,
19871 agent_slug: "codex".into(),
19872 workspace: Some(PathBuf::from("/tmp/workspace")),
19873 external_id: Some(source_path.to_string()),
19874 title: Some(source_path.to_string()),
19875 source_path: PathBuf::from(source_path),
19876 started_at: Some(started_at),
19877 ended_at: Some(started_at + 1),
19878 approx_tokens: None,
19879 metadata_json: serde_json::Value::Null,
19880 messages: vec![Message {
19881 id: None,
19882 idx: 0,
19883 role: MessageRole::User,
19884 author: None,
19885 created_at: Some(started_at),
19886 content: format!("message for {source_path}"),
19887 extra_json: serde_json::Value::Null,
19888 snippets: Vec::new(),
19889 }],
19890 source_id: LOCAL_SOURCE_ID.into(),
19891 origin_host: None,
19892 };
19893
19894 let conv_a = make_conv("/tmp/a.jsonl", 3_000);
19895 let conv_b = make_conv("/tmp/b.jsonl", 1_000);
19896 let conv_c = make_conv("/tmp/c.jsonl", 2_000);
19897
19898 storage
19899 .insert_conversation_tree(agent_id, None, &conv_a)
19900 .unwrap();
19901 storage
19902 .insert_conversation_tree(agent_id, None, &conv_b)
19903 .unwrap();
19904 storage
19905 .insert_conversation_tree(agent_id, None, &conv_c)
19906 .unwrap();
19907
19908 let user_order: Vec<PathBuf> = storage
19909 .list_conversations(10, 0)
19910 .unwrap()
19911 .into_iter()
19912 .map(|conv| conv.source_path)
19913 .collect();
19914 assert_eq!(
19915 user_order,
19916 vec![
19917 PathBuf::from("/tmp/a.jsonl"),
19918 PathBuf::from("/tmp/c.jsonl"),
19919 PathBuf::from("/tmp/b.jsonl"),
19920 ]
19921 );
19922
19923 let (agent_slugs, workspace_paths) = storage.build_lexical_rebuild_lookups().unwrap();
19924 let rebuild_order: Vec<PathBuf> = storage
19925 .list_conversations_for_lexical_rebuild_after_id(10, 0, &agent_slugs, &workspace_paths)
19926 .unwrap()
19927 .into_iter()
19928 .map(|conv| conv.source_path)
19929 .collect();
19930 assert_eq!(
19931 rebuild_order,
19932 vec![
19933 PathBuf::from("/tmp/a.jsonl"),
19934 PathBuf::from("/tmp/b.jsonl"),
19935 PathBuf::from("/tmp/c.jsonl"),
19936 ]
19937 );
19938
19939 let first_page = storage
19940 .list_conversations_for_lexical_rebuild_after_id(2, 0, &agent_slugs, &workspace_paths)
19941 .unwrap();
19942 let first_page_paths: Vec<PathBuf> = first_page
19943 .iter()
19944 .map(|conv| conv.source_path.clone())
19945 .collect();
19946 assert_eq!(
19947 first_page_paths,
19948 vec![PathBuf::from("/tmp/a.jsonl"), PathBuf::from("/tmp/b.jsonl")]
19949 );
19950
19951 let second_page = storage
19952 .list_conversations_for_lexical_rebuild_after_id(
19953 2,
19954 first_page
19955 .last()
19956 .and_then(|conv| conv.id)
19957 .expect("first page should include an id"),
19958 &agent_slugs,
19959 &workspace_paths,
19960 )
19961 .unwrap();
19962 let second_page_paths: Vec<PathBuf> = second_page
19963 .iter()
19964 .map(|conv| conv.source_path.clone())
19965 .collect();
19966 assert_eq!(second_page_paths, vec![PathBuf::from("/tmp/c.jsonl")]);
19967
19968 let bounded_page = storage
19969 .list_conversations_for_lexical_rebuild_after_id_through_id(
19970 10,
19971 0,
19972 first_page
19973 .last()
19974 .and_then(|conv| conv.id)
19975 .expect("first page should include an id"),
19976 &agent_slugs,
19977 &workspace_paths,
19978 )
19979 .unwrap();
19980 let bounded_paths: Vec<PathBuf> = bounded_page
19981 .iter()
19982 .map(|conv| conv.source_path.clone())
19983 .collect();
19984 assert_eq!(
19985 bounded_paths,
19986 vec![PathBuf::from("/tmp/a.jsonl"), PathBuf::from("/tmp/b.jsonl")]
19987 );
19988 }
19989
19990 #[test]
19991 fn keyset_traversal_handles_sparse_holey_conversation_ids() {
19992 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19993 use std::path::PathBuf;
19994
19995 let dir = TempDir::new().unwrap();
19996 let db_path = dir.path().join("agent_search.db");
19997 let storage = SqliteStorage::open(&db_path).unwrap();
19998 let agent = Agent {
19999 id: None,
20000 slug: "codex".into(),
20001 name: "Codex".into(),
20002 version: Some("0.2.3".into()),
20003 kind: AgentKind::Cli,
20004 };
20005 let agent_id = storage.ensure_agent(&agent).unwrap();
20006
20007 let make_conv = |label: &str, ts: i64| Conversation {
20008 id: None,
20009 agent_slug: "codex".into(),
20010 workspace: Some(PathBuf::from("/tmp/workspace")),
20011 external_id: Some(label.to_string()),
20012 title: Some(label.to_string()),
20013 source_path: PathBuf::from(format!("/tmp/{label}.jsonl")),
20014 started_at: Some(ts),
20015 ended_at: Some(ts + 1),
20016 approx_tokens: None,
20017 metadata_json: serde_json::Value::Null,
20018 messages: vec![Message {
20019 id: None,
20020 idx: 0,
20021 role: MessageRole::User,
20022 author: None,
20023 created_at: Some(ts),
20024 content: format!("msg for {label}"),
20025 extra_json: serde_json::Value::Null,
20026 snippets: Vec::new(),
20027 }],
20028 source_id: LOCAL_SOURCE_ID.into(),
20029 origin_host: None,
20030 };
20031
20032 for i in 0..6 {
20033 storage
20034 .insert_conversation_tree(
20035 agent_id,
20036 None,
20037 &make_conv(&format!("conv-{i}"), 1000 + i),
20038 )
20039 .unwrap();
20040 }
20041
20042 storage.conn.execute("PRAGMA foreign_keys = OFF").unwrap();
20043 storage
20044 .conn
20045 .execute_compat("DELETE FROM conversations WHERE id IN (2, 4)", fparams![])
20046 .unwrap();
20047 storage
20048 .conn
20049 .execute_compat(
20050 "DELETE FROM messages WHERE conversation_id IN (2, 4)",
20051 fparams![],
20052 )
20053 .unwrap();
20054 storage.conn.execute("PRAGMA foreign_keys = ON").unwrap();
20055
20056 let (agent_slugs, workspace_paths) = storage.build_lexical_rebuild_lookups().unwrap();
20057
20058 let page1 = storage
20059 .list_conversations_for_lexical_rebuild_after_id(2, 0, &agent_slugs, &workspace_paths)
20060 .unwrap();
20061 assert_eq!(page1.len(), 2);
20062 let page1_ids: Vec<i64> = page1.iter().map(|c| c.id.unwrap()).collect();
20063 assert_eq!(page1_ids, vec![1, 3]);
20064
20065 let page2 = storage
20066 .list_conversations_for_lexical_rebuild_after_id(
20067 2,
20068 *page1_ids.last().unwrap(),
20069 &agent_slugs,
20070 &workspace_paths,
20071 )
20072 .unwrap();
20073 assert_eq!(page2.len(), 2);
20074 let page2_ids: Vec<i64> = page2.iter().map(|c| c.id.unwrap()).collect();
20075 assert_eq!(page2_ids, vec![5, 6]);
20076
20077 let page3 = storage
20078 .list_conversations_for_lexical_rebuild_after_id(
20079 2,
20080 *page2_ids.last().unwrap(),
20081 &agent_slugs,
20082 &workspace_paths,
20083 )
20084 .unwrap();
20085 assert!(page3.is_empty());
20086
20087 let all_ids: Vec<i64> = page1_ids.iter().chain(page2_ids.iter()).copied().collect();
20088 assert_eq!(all_ids, vec![1, 3, 5, 6]);
20089 }
20090
20091 #[test]
20092 fn keyset_traversal_through_id_with_sparse_ranges() {
20093 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20094 use std::path::PathBuf;
20095
20096 let dir = TempDir::new().unwrap();
20097 let db_path = dir.path().join("agent_search.db");
20098 let storage = SqliteStorage::open(&db_path).unwrap();
20099 let agent = Agent {
20100 id: None,
20101 slug: "codex".into(),
20102 name: "Codex".into(),
20103 version: Some("0.2.3".into()),
20104 kind: AgentKind::Cli,
20105 };
20106 let agent_id = storage.ensure_agent(&agent).unwrap();
20107
20108 let make_conv = |label: &str, ts: i64| Conversation {
20109 id: None,
20110 agent_slug: "codex".into(),
20111 workspace: Some(PathBuf::from("/tmp/workspace")),
20112 external_id: Some(label.to_string()),
20113 title: Some(label.to_string()),
20114 source_path: PathBuf::from(format!("/tmp/{label}.jsonl")),
20115 started_at: Some(ts),
20116 ended_at: Some(ts + 1),
20117 approx_tokens: None,
20118 metadata_json: serde_json::Value::Null,
20119 messages: vec![Message {
20120 id: None,
20121 idx: 0,
20122 role: MessageRole::User,
20123 author: None,
20124 created_at: Some(ts),
20125 content: format!("msg for {label}"),
20126 extra_json: serde_json::Value::Null,
20127 snippets: Vec::new(),
20128 }],
20129 source_id: LOCAL_SOURCE_ID.into(),
20130 origin_host: None,
20131 };
20132
20133 for i in 0..10 {
20134 storage
20135 .insert_conversation_tree(
20136 agent_id,
20137 None,
20138 &make_conv(&format!("conv-{i}"), 1000 + i),
20139 )
20140 .unwrap();
20141 }
20142
20143 storage.conn.execute("PRAGMA foreign_keys = OFF").unwrap();
20144 storage
20145 .conn
20146 .execute_compat(
20147 "DELETE FROM conversations WHERE id IN (3, 5, 7, 8)",
20148 fparams![],
20149 )
20150 .unwrap();
20151 storage
20152 .conn
20153 .execute_compat(
20154 "DELETE FROM messages WHERE conversation_id IN (3, 5, 7, 8)",
20155 fparams![],
20156 )
20157 .unwrap();
20158 storage.conn.execute("PRAGMA foreign_keys = ON").unwrap();
20159
20160 let (agent_slugs, workspace_paths) = storage.build_lexical_rebuild_lookups().unwrap();
20161
20162 let through_5 = storage
20163 .list_conversations_for_lexical_rebuild_after_id_through_id(
20164 100,
20165 0,
20166 5,
20167 &agent_slugs,
20168 &workspace_paths,
20169 )
20170 .unwrap();
20171 let through_5_ids: Vec<i64> = through_5.iter().map(|c| c.id.unwrap()).collect();
20172 assert_eq!(through_5_ids, vec![1, 2, 4]);
20173
20174 let after_4_through_10 = storage
20175 .list_conversations_for_lexical_rebuild_after_id_through_id(
20176 100,
20177 4,
20178 10,
20179 &agent_slugs,
20180 &workspace_paths,
20181 )
20182 .unwrap();
20183 let ids: Vec<i64> = after_4_through_10.iter().map(|c| c.id.unwrap()).collect();
20184 assert_eq!(ids, vec![6, 9, 10]);
20185
20186 let after_10 = storage
20187 .list_conversations_for_lexical_rebuild_after_id_through_id(
20188 100,
20189 10,
20190 20,
20191 &agent_slugs,
20192 &workspace_paths,
20193 )
20194 .unwrap();
20195 assert!(after_10.is_empty());
20196 }
20197
20198 #[test]
20199 fn list_conversation_footprints_for_lexical_rebuild_estimates_bytes_and_keeps_empty_conversations()
20200 {
20201 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20202 use std::path::PathBuf;
20203
20204 let dir = TempDir::new().unwrap();
20205 let db_path = dir.path().join("agent_search.db");
20206 let storage = SqliteStorage::open(&db_path).unwrap();
20207 let agent = Agent {
20208 id: None,
20209 slug: "codex".into(),
20210 name: "Codex".into(),
20211 version: Some("0.2.3".into()),
20212 kind: AgentKind::Cli,
20213 };
20214 let agent_id = storage.ensure_agent(&agent).unwrap();
20215
20216 let insert = |external_id: &str, base_ts: i64, messages: Vec<Message>| {
20217 storage
20218 .insert_conversation_tree(
20219 agent_id,
20220 None,
20221 &Conversation {
20222 id: None,
20223 agent_slug: "codex".into(),
20224 workspace: Some(PathBuf::from("/tmp/workspace")),
20225 external_id: Some(external_id.to_string()),
20226 title: Some(external_id.to_string()),
20227 source_path: PathBuf::from(format!("/tmp/{external_id}.jsonl")),
20228 started_at: Some(base_ts),
20229 ended_at: Some(base_ts + 100),
20230 approx_tokens: None,
20231 metadata_json: serde_json::Value::Null,
20232 messages,
20233 source_id: LOCAL_SOURCE_ID.into(),
20234 origin_host: None,
20235 },
20236 )
20237 .unwrap()
20238 .conversation_id
20239 };
20240
20241 let ascii_id = insert(
20242 "footprint-ascii",
20243 1_700_000_000_000,
20244 vec![
20245 Message {
20246 id: None,
20247 idx: 0,
20248 role: MessageRole::User,
20249 author: None,
20250 created_at: Some(1_700_000_000_001),
20251 content: "abc".into(),
20252 extra_json: serde_json::Value::Null,
20253 snippets: Vec::new(),
20254 },
20255 Message {
20256 id: None,
20257 idx: 1,
20258 role: MessageRole::Agent,
20259 author: None,
20260 created_at: Some(1_700_000_000_002),
20261 content: "defg".into(),
20262 extra_json: serde_json::Value::Null,
20263 snippets: Vec::new(),
20264 },
20265 ],
20266 );
20267 let empty_id = insert("footprint-empty", 1_700_000_001_000, Vec::new());
20268 let utf8_id = insert(
20269 "footprint-utf8",
20270 1_700_000_002_000,
20271 vec![Message {
20272 id: None,
20273 idx: 0,
20274 role: MessageRole::Tool,
20275 author: None,
20276 created_at: Some(1_700_000_002_001),
20277 content: "hé🙂".into(),
20278 extra_json: serde_json::Value::Null,
20279 snippets: Vec::new(),
20280 }],
20281 );
20282 let sparse_id = insert(
20283 "footprint-sparse",
20284 1_700_000_003_000,
20285 vec![Message {
20286 id: None,
20287 idx: 10,
20288 role: MessageRole::User,
20289 author: None,
20290 created_at: Some(1_700_000_003_010),
20291 content: "sparse".into(),
20292 extra_json: serde_json::Value::Null,
20293 snippets: Vec::new(),
20294 }],
20295 );
20296 storage
20297 .conn
20298 .execute_compat(
20299 "DELETE FROM conversation_tail_state WHERE conversation_id = ?1",
20300 fparams![utf8_id],
20301 )
20302 .unwrap();
20303
20304 let footprints = storage
20305 .list_conversation_footprints_for_lexical_rebuild()
20306 .unwrap();
20307 assert_eq!(
20308 footprints,
20309 vec![
20310 LexicalRebuildConversationFootprintRow {
20311 conversation_id: ascii_id,
20312 message_count: 2,
20313 message_bytes: 2 * LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
20314 },
20315 LexicalRebuildConversationFootprintRow {
20316 conversation_id: empty_id,
20317 message_count: 0,
20318 message_bytes: 0,
20319 },
20320 LexicalRebuildConversationFootprintRow {
20321 conversation_id: utf8_id,
20322 message_count: 1,
20323 message_bytes: LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
20324 },
20325 LexicalRebuildConversationFootprintRow {
20326 conversation_id: sparse_id,
20327 message_count: 11,
20328 message_bytes: 11 * LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
20329 },
20330 ]
20331 );
20332 }
20333
20334 #[test]
20335 fn list_conversation_footprints_for_lexical_rebuild_falls_back_for_missing_tail_cache() {
20336 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20337 use std::path::PathBuf;
20338
20339 let dir = TempDir::new().unwrap();
20340 let db_path = dir.path().join("agent_search.db");
20341 let storage = SqliteStorage::open(&db_path).unwrap();
20342 let agent = Agent {
20343 id: None,
20344 slug: "codex".into(),
20345 name: "Codex".into(),
20346 version: Some("0.2.3".into()),
20347 kind: AgentKind::Cli,
20348 };
20349 let agent_id = storage.ensure_agent(&agent).unwrap();
20350 let conversation_id = storage
20351 .insert_conversation_tree(
20352 agent_id,
20353 None,
20354 &Conversation {
20355 id: None,
20356 agent_slug: "codex".into(),
20357 workspace: Some(PathBuf::from("/tmp/workspace")),
20358 external_id: Some("footprint-missing-tail".to_string()),
20359 title: Some("footprint-missing-tail".to_string()),
20360 source_path: PathBuf::from("/tmp/footprint-missing-tail.jsonl"),
20361 started_at: Some(1_700_000_000_000),
20362 ended_at: Some(1_700_000_000_100),
20363 approx_tokens: None,
20364 metadata_json: serde_json::Value::Null,
20365 messages: vec![Message {
20366 id: None,
20367 idx: 10,
20368 role: MessageRole::User,
20369 author: None,
20370 created_at: Some(1_700_000_000_010),
20371 content: "legacy sparse tail".into(),
20372 extra_json: serde_json::Value::Null,
20373 snippets: Vec::new(),
20374 }],
20375 source_id: LOCAL_SOURCE_ID.into(),
20376 origin_host: None,
20377 },
20378 )
20379 .unwrap()
20380 .conversation_id;
20381
20382 storage
20383 .conn
20384 .execute_compat(
20385 "UPDATE conversations
20386 SET last_message_idx = NULL, last_message_created_at = NULL
20387 WHERE id = ?1",
20388 fparams![conversation_id],
20389 )
20390 .unwrap();
20391 storage
20392 .conn
20393 .execute_compat(
20394 "DELETE FROM conversation_tail_state WHERE conversation_id = ?1",
20395 fparams![conversation_id],
20396 )
20397 .unwrap();
20398
20399 let footprints = storage
20400 .list_conversation_footprints_for_lexical_rebuild()
20401 .unwrap();
20402
20403 assert_eq!(
20404 footprints,
20405 vec![LexicalRebuildConversationFootprintRow {
20406 conversation_id,
20407 message_count: 11,
20408 message_bytes: 11 * LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
20409 }],
20410 "missing tail-cache metadata should fall back to messages MAX(idx) instead of treating legacy conversations as empty"
20411 );
20412 }
20413
20414 #[test]
20415 fn list_conversation_footprints_for_lexical_rebuild_raises_stale_low_tail_cache() {
20416 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20417 use std::path::PathBuf;
20418
20419 let dir = TempDir::new().unwrap();
20420 let db_path = dir.path().join("agent_search.db");
20421 let storage = SqliteStorage::open(&db_path).unwrap();
20422 let agent = Agent {
20423 id: None,
20424 slug: "codex".into(),
20425 name: "Codex".into(),
20426 version: Some("0.2.3".into()),
20427 kind: AgentKind::Cli,
20428 };
20429 let agent_id = storage.ensure_agent(&agent).unwrap();
20430 let conversation_id = storage
20431 .insert_conversation_tree(
20432 agent_id,
20433 None,
20434 &Conversation {
20435 id: None,
20436 agent_slug: "codex".into(),
20437 workspace: Some(PathBuf::from("/tmp/workspace")),
20438 external_id: Some("footprint-stale-tail".to_string()),
20439 title: Some("footprint-stale-tail".to_string()),
20440 source_path: PathBuf::from("/tmp/footprint-stale-tail.jsonl"),
20441 started_at: Some(1_700_000_000_000),
20442 ended_at: Some(1_700_000_000_100),
20443 approx_tokens: None,
20444 metadata_json: serde_json::Value::Null,
20445 messages: (0..3)
20446 .map(|idx| Message {
20447 id: None,
20448 idx,
20449 role: MessageRole::User,
20450 author: None,
20451 created_at: Some(1_700_000_000_010 + idx),
20452 content: format!("message {idx}"),
20453 extra_json: serde_json::Value::Null,
20454 snippets: Vec::new(),
20455 })
20456 .collect(),
20457 source_id: LOCAL_SOURCE_ID.into(),
20458 origin_host: None,
20459 },
20460 )
20461 .unwrap()
20462 .conversation_id;
20463
20464 storage
20465 .conn
20466 .execute_compat(
20467 "UPDATE conversations
20468 SET last_message_idx = 0, last_message_created_at = 1700000000010
20469 WHERE id = ?1",
20470 fparams![conversation_id],
20471 )
20472 .unwrap();
20473 storage
20474 .conn
20475 .execute_compat(
20476 "UPDATE conversation_tail_state
20477 SET last_message_idx = 0, last_message_created_at = 1700000000010
20478 WHERE conversation_id = ?1",
20479 fparams![conversation_id],
20480 )
20481 .unwrap();
20482
20483 let footprints = storage
20484 .list_conversation_footprints_for_lexical_rebuild()
20485 .unwrap();
20486
20487 assert_eq!(
20488 footprints,
20489 vec![LexicalRebuildConversationFootprintRow {
20490 conversation_id,
20491 message_count: 3,
20492 message_bytes: 3 * LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
20493 }],
20494 "stale-low tail caches must not under-plan lexical shards and trip doc>plan invariants"
20495 );
20496 }
20497
20498 #[test]
20499 fn list_conversation_footprints_for_lexical_rebuild_tolerates_missing_tail_state_table() {
20500 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20501 use std::path::PathBuf;
20502
20503 let dir = TempDir::new().unwrap();
20504 let db_path = dir.path().join("agent_search.db");
20505 let storage = SqliteStorage::open(&db_path).unwrap();
20506 let agent = Agent {
20507 id: None,
20508 slug: "codex".into(),
20509 name: "Codex".into(),
20510 version: Some("0.2.3".into()),
20511 kind: AgentKind::Cli,
20512 };
20513 let agent_id = storage.ensure_agent(&agent).unwrap();
20514 let conversation_id = storage
20515 .insert_conversation_tree(
20516 agent_id,
20517 None,
20518 &Conversation {
20519 id: None,
20520 agent_slug: "codex".into(),
20521 workspace: Some(PathBuf::from("/tmp/workspace")),
20522 external_id: Some("footprint-missing-tail-table".to_string()),
20523 title: Some("footprint-missing-tail-table".to_string()),
20524 source_path: PathBuf::from("/tmp/footprint-missing-tail-table.jsonl"),
20525 started_at: Some(1_700_000_000_000),
20526 ended_at: Some(1_700_000_000_100),
20527 approx_tokens: None,
20528 metadata_json: serde_json::Value::Null,
20529 messages: vec![Message {
20530 id: None,
20531 idx: 10,
20532 role: MessageRole::User,
20533 author: None,
20534 created_at: Some(1_700_000_000_010),
20535 content: "legacy sparse tail without hot table".into(),
20536 extra_json: serde_json::Value::Null,
20537 snippets: Vec::new(),
20538 }],
20539 source_id: LOCAL_SOURCE_ID.into(),
20540 origin_host: None,
20541 },
20542 )
20543 .unwrap()
20544 .conversation_id;
20545
20546 storage
20547 .conn
20548 .execute_compat(
20549 "UPDATE conversations
20550 SET last_message_idx = NULL, last_message_created_at = NULL
20551 WHERE id = ?1",
20552 fparams![conversation_id],
20553 )
20554 .unwrap();
20555 storage
20556 .conn
20557 .execute_compat("DROP TABLE conversation_tail_state", fparams![])
20558 .unwrap();
20559
20560 let footprints = storage
20561 .list_conversation_footprints_for_lexical_rebuild()
20562 .unwrap();
20563
20564 assert_eq!(
20565 footprints,
20566 vec![LexicalRebuildConversationFootprintRow {
20567 conversation_id,
20568 message_count: 11,
20569 message_bytes: 11 * LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
20570 }],
20571 "read-only lexical self-heal must tolerate pre-tail-cache databases and use messages MAX(idx)"
20572 );
20573 }
20574
20575 #[test]
20576 fn list_conversation_footprints_for_lexical_rebuild_tolerates_legacy_search_demo_fixture() {
20577 let fixture_db = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
20578 .join("tests")
20579 .join("fixtures")
20580 .join("search_demo_data")
20581 .join("agent_search.db");
20582 let storage = FrankenStorage::open_readonly(&fixture_db).unwrap();
20583
20584 let footprints = storage
20585 .list_conversation_footprints_for_lexical_rebuild()
20586 .unwrap();
20587
20588 assert!(
20589 !footprints.is_empty(),
20590 "search self-heal should be able to plan a lexical rebuild from the legacy search demo fixture"
20591 );
20592 assert!(
20593 footprints
20594 .iter()
20595 .all(|footprint| footprint.message_count > 0),
20596 "legacy fixture conversations should derive message counts from messages when tail caches are absent"
20597 );
20598 }
20599
20600 #[test]
20601 fn lexical_rebuild_listing_normalizes_host_only_remote_source_from_blank_source_id() {
20602 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20603 use std::path::PathBuf;
20604
20605 let dir = TempDir::new().unwrap();
20606 let db_path = dir.path().join("agent_search.db");
20607 let storage = SqliteStorage::open(&db_path).unwrap();
20608 let agent = Agent {
20609 id: None,
20610 slug: "codex".into(),
20611 name: "Codex".into(),
20612 version: Some("0.2.3".into()),
20613 kind: AgentKind::Cli,
20614 };
20615 let agent_id = storage.ensure_agent(&agent).unwrap();
20616 let conversation = Conversation {
20617 id: None,
20618 agent_slug: "codex".into(),
20619 workspace: Some(PathBuf::from("/tmp/workspace")),
20620 external_id: Some("legacy-blank-source".into()),
20621 title: Some("Legacy blank source".into()),
20622 source_path: PathBuf::from("/tmp/legacy-blank-source.jsonl"),
20623 started_at: Some(1_700_000_000_000),
20624 ended_at: Some(1_700_000_000_100),
20625 approx_tokens: None,
20626 metadata_json: serde_json::Value::Null,
20627 messages: vec![Message {
20628 id: None,
20629 idx: 0,
20630 role: MessageRole::User,
20631 author: None,
20632 created_at: Some(1_700_000_000_000),
20633 content: "hello".into(),
20634 extra_json: serde_json::Value::Null,
20635 snippets: Vec::new(),
20636 }],
20637 source_id: LOCAL_SOURCE_ID.into(),
20638 origin_host: None,
20639 };
20640
20641 let conversation_id = storage
20642 .insert_conversation_tree(agent_id, None, &conversation)
20643 .unwrap()
20644 .conversation_id;
20645 storage.conn.execute("PRAGMA foreign_keys = OFF").unwrap();
20646 storage
20647 .conn
20648 .execute_compat(
20649 "UPDATE conversations SET source_id = ?1, origin_host = ?2 WHERE id = ?3",
20650 fparams![" ", "dev@laptop", conversation_id],
20651 )
20652 .unwrap();
20653 storage.conn.execute("PRAGMA foreign_keys = ON").unwrap();
20654
20655 let listed = storage.list_conversations(10, 0).unwrap();
20656 assert_eq!(listed.len(), 1);
20657 assert_eq!(listed[0].source_id, "dev@laptop");
20658 assert_eq!(listed[0].origin_host.as_deref(), Some("dev@laptop"));
20659
20660 let (agent_slugs, workspace_paths) = storage.build_lexical_rebuild_lookups().unwrap();
20661 let rebuild_listed = storage
20662 .list_conversations_for_lexical_rebuild_after_id(10, 0, &agent_slugs, &workspace_paths)
20663 .unwrap();
20664 assert_eq!(rebuild_listed.len(), 1);
20665 assert_eq!(rebuild_listed[0].source_id, "dev@laptop");
20666 assert_eq!(rebuild_listed[0].origin_host.as_deref(), Some("dev@laptop"));
20667 }
20668
20669 #[test]
20670 fn seed_canonical_from_best_historical_bundle_copies_data_and_resets_runtime_meta() {
20671 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20672 use std::path::PathBuf;
20673
20674 let dir = TempDir::new().unwrap();
20675 let canonical_db = dir.path().join("agent_search.db");
20676 let source_db = dir
20677 .path()
20678 .join("backups/agent_search.db.20260322T020200.bak");
20679
20680 fs::create_dir_all(source_db.parent().unwrap()).unwrap();
20681
20682 let source = SqliteStorage::open(&source_db).unwrap();
20683 let agent = Agent {
20684 id: None,
20685 slug: "codex".into(),
20686 name: "Codex".into(),
20687 version: Some("0.2.3".into()),
20688 kind: AgentKind::Cli,
20689 };
20690 let agent_id = source.ensure_agent(&agent).unwrap();
20691 let conversation = Conversation {
20692 id: None,
20693 agent_slug: "codex".into(),
20694 workspace: Some(PathBuf::from("/tmp/workspace")),
20695 external_id: Some("seed-conv".into()),
20696 title: Some("Historical seed".into()),
20697 source_path: PathBuf::from("/tmp/historical-seed.jsonl"),
20698 started_at: Some(1_700_000_000_000),
20699 ended_at: Some(1_700_000_000_100),
20700 approx_tokens: Some(42),
20701 metadata_json: serde_json::json!({"seed": true}),
20702 messages: vec![Message {
20703 id: None,
20704 idx: 0,
20705 role: MessageRole::Agent,
20706 author: Some("assistant".into()),
20707 created_at: Some(1_700_000_000_050),
20708 content: "seeded message".into(),
20709 extra_json: serde_json::json!({"usage": {"total_tokens": 12}}),
20710 snippets: Vec::new(),
20711 }],
20712 source_id: LOCAL_SOURCE_ID.into(),
20713 origin_host: None,
20714 };
20715 source
20716 .insert_conversation_tree(agent_id, None, &conversation)
20717 .unwrap();
20718 source.set_last_scan_ts(123).unwrap();
20719 source.set_last_indexed_at(456).unwrap();
20720 source.set_last_embedded_message_id(789).unwrap();
20721 source
20722 .conn
20723 .execute_compat(
20724 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
20725 fparams!["historical_bundle_salvaged:stale", "{\"stale\":true}"],
20726 )
20727 .unwrap();
20728 drop(source);
20729
20730 let legacy_v13_fts_sql = "CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, content='', tokenize='porter')";
20741 let duplicate_legacy_fts_sql = "CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, message_id UNINDEXED, tokenize='porter')";
20742 let legacy = rusqlite_test_fixture_conn(&source_db);
20743 legacy
20744 .execute_batch(
20745 "UPDATE meta SET value = '13' WHERE key = 'schema_version';
20746 DELETE FROM _schema_migrations WHERE version = 14;
20747 PRAGMA writable_schema = ON;",
20748 )
20749 .unwrap();
20750 legacy
20751 .execute(
20752 "DELETE FROM meta WHERE key = ?1",
20753 [FTS_FRANKEN_REBUILD_META_KEY],
20754 )
20755 .unwrap();
20756 legacy
20758 .execute(
20759 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
20760 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
20761 [legacy_v13_fts_sql],
20762 )
20763 .unwrap();
20764 legacy
20766 .execute(
20767 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
20768 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
20769 [duplicate_legacy_fts_sql],
20770 )
20771 .unwrap();
20772 legacy
20773 .execute_batch("PRAGMA writable_schema = OFF;")
20774 .unwrap();
20775 drop(legacy);
20776
20777 {
20780 let verify = rusqlite_test_fixture_conn(&source_db);
20781 verify
20782 .execute_batch("PRAGMA writable_schema = ON;")
20783 .unwrap();
20784 let fts_entries: i64 = verify
20785 .query_row(
20786 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
20787 [],
20788 |row| row.get(0),
20789 )
20790 .unwrap();
20791 assert_eq!(
20792 fts_entries, 2,
20793 "test fixture should reproduce the duplicate legacy fts_messages rows"
20794 );
20795 let msg_count: i64 = verify
20796 .query_row("SELECT COUNT(*) FROM messages", [], |row| row.get(0))
20797 .unwrap();
20798 assert_eq!(msg_count, 1);
20799 }
20800
20801 let fresh = SqliteStorage::open(&canonical_db).unwrap();
20802 drop(fresh);
20803
20804 let outcome = seed_canonical_from_best_historical_bundle(&canonical_db)
20805 .unwrap()
20806 .unwrap();
20807 assert_eq!(outcome.bundles_imported, 1);
20808 assert_eq!(outcome.conversations_imported, 1);
20809 assert_eq!(outcome.messages_imported, 1);
20810
20811 let readonly = open_franken_with_flags(
20812 &canonical_db.to_string_lossy(),
20813 FrankenOpenFlags::SQLITE_OPEN_READ_ONLY,
20814 )
20815 .unwrap();
20816 let readonly_message_count: i64 = readonly
20817 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
20818 row.get_typed(0)
20819 })
20820 .unwrap();
20821 assert_eq!(readonly_message_count, 1);
20822
20823 let seeded = SqliteStorage::open(&canonical_db).unwrap();
20824 assert_eq!(
20825 seeded
20826 .count_sessions_in_range(None, None, None, None)
20827 .unwrap()
20828 .0,
20829 1
20830 );
20831 let message_count: i64 = seeded
20832 .conn
20833 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
20834 row.get_typed(0)
20835 })
20836 .unwrap();
20837 assert_eq!(message_count, 1);
20838 assert_eq!(seeded.get_last_scan_ts().unwrap(), None);
20839 assert_eq!(seeded.get_last_embedded_message_id().unwrap(), None);
20840
20841 let last_indexed: Option<String> = seeded
20842 .conn
20843 .query_row_map(
20844 "SELECT value FROM meta WHERE key = 'last_indexed_at'",
20845 fparams![],
20846 |row| row.get_typed(0),
20847 )
20848 .optional()
20849 .unwrap();
20850 assert!(last_indexed.is_none());
20851
20852 let salvage_keys: Vec<String> = seeded
20853 .conn
20854 .query_map_collect(
20855 "SELECT key FROM meta WHERE key LIKE 'historical_bundle_salvaged:%' ORDER BY key",
20856 fparams![],
20857 |row| row.get_typed(0),
20858 )
20859 .unwrap();
20860 assert_eq!(salvage_keys.len(), 1);
20861
20862 let reopened_readonly = open_franken_with_flags(
20863 &canonical_db.to_string_lossy(),
20864 FrankenOpenFlags::SQLITE_OPEN_READ_ONLY,
20865 )
20866 .unwrap();
20867 let reopened_fts_entries: i64 = reopened_readonly
20868 .query_row_map(
20869 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
20870 fparams![],
20871 |row| row.get_typed(0),
20872 )
20873 .unwrap();
20874 assert_eq!(
20875 reopened_fts_entries, 1,
20876 "seeded canonical db should keep a single stock-SQLite fts_messages schema row"
20877 );
20878 let reopened_message_count: i64 = reopened_readonly
20879 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
20880 row.get_typed(0)
20881 })
20882 .unwrap();
20883 assert_eq!(reopened_message_count, 1);
20884
20885 let franken_seeded = FrankenStorage::open(&canonical_db).unwrap();
20886 assert_eq!(
20887 franken_seeded.schema_version().unwrap(),
20888 CURRENT_SCHEMA_VERSION
20889 );
20890 franken_seeded
20897 .ensure_search_fallback_fts_consistency()
20898 .expect("ensure FTS consistency after seed");
20899 let post_franken_schema_rows: i64 = franken_seeded
20900 .raw()
20901 .query_row_map(
20902 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
20903 fparams![],
20904 |row| row.get_typed(0),
20905 )
20906 .unwrap();
20907 assert_eq!(post_franken_schema_rows, 1);
20908 let fts_probe = franken_seeded
20909 .raw()
20910 .query("SELECT COUNT(*) FROM fts_messages");
20911 assert!(
20912 fts_probe.is_ok(),
20913 "expected post-seed FTS to be queryable, got {fts_probe:?}"
20914 );
20915 }
20916
20917 #[test]
20918 fn failed_baseline_seed_preserves_existing_canonical_bundle() {
20919 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20920 use std::path::PathBuf;
20921
20922 let dir = TempDir::new().unwrap();
20923 let canonical_db = dir.path().join("agent_search.db");
20924 let source_db = dir
20925 .path()
20926 .join("backups/agent_search.db.20260325T120000Z.bad-seed.bak");
20927
20928 fs::create_dir_all(source_db.parent().unwrap()).unwrap();
20929
20930 let canonical = SqliteStorage::open(&canonical_db).unwrap();
20931 canonical
20932 .conn
20933 .execute_compat(
20934 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
20935 fparams!["sentinel", "keep-me"],
20936 )
20937 .unwrap();
20938 drop(canonical);
20939
20940 let source = SqliteStorage::open(&source_db).unwrap();
20941 let agent = Agent {
20942 id: None,
20943 slug: "codex".into(),
20944 name: "Codex".into(),
20945 version: Some("0.2.3".into()),
20946 kind: AgentKind::Cli,
20947 };
20948 let agent_id = source.ensure_agent(&agent).unwrap();
20949 let conversation = Conversation {
20950 id: None,
20951 agent_slug: "codex".into(),
20952 workspace: Some(PathBuf::from("/tmp/workspace")),
20953 external_id: Some("bad-seed-conv".into()),
20954 title: Some("Bad seed".into()),
20955 source_path: PathBuf::from("/tmp/bad-seed.jsonl"),
20956 started_at: Some(1_700_000_000_000),
20957 ended_at: Some(1_700_000_000_100),
20958 approx_tokens: Some(42),
20959 metadata_json: serde_json::json!({"seed": "bad"}),
20960 messages: vec![Message {
20961 id: None,
20962 idx: 0,
20963 role: MessageRole::Agent,
20964 author: Some("assistant".into()),
20965 created_at: Some(1_700_000_000_050),
20966 content: "this seed should fail".into(),
20967 extra_json: serde_json::Value::Null,
20968 snippets: Vec::new(),
20969 }],
20970 source_id: LOCAL_SOURCE_ID.into(),
20971 origin_host: None,
20972 };
20973 source
20974 .insert_conversation_tree(agent_id, None, &conversation)
20975 .unwrap();
20976 drop(source);
20977
20978 let legacy = FrankenConnection::open(source_db.to_string_lossy().into_owned()).unwrap();
20979 legacy
20980 .execute("UPDATE meta SET value = '12' WHERE key = 'schema_version'")
20981 .unwrap();
20982 drop(legacy);
20983
20984 let err = seed_canonical_from_best_historical_bundle(&canonical_db).unwrap_err();
20985 assert!(
20986 err.to_string()
20987 .contains("schema_version 12 is too old for baseline import"),
20988 "unexpected seed error: {err:#}"
20989 );
20990
20991 let reopened = SqliteStorage::open(&canonical_db).unwrap();
20992 let sentinel: Option<String> = reopened
20993 .conn
20994 .query_row_map(
20995 "SELECT value FROM meta WHERE key = 'sentinel'",
20996 fparams![],
20997 |row| row.get_typed(0),
20998 )
20999 .optional()
21000 .unwrap();
21001 assert_eq!(sentinel.as_deref(), Some("keep-me"));
21002
21003 let conversation_count: i64 = reopened
21004 .conn
21005 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
21006 row.get_typed(0)
21007 })
21008 .unwrap();
21009 assert_eq!(conversation_count, 0);
21010
21011 let readonly = open_franken_with_flags(
21012 &canonical_db.to_string_lossy(),
21013 FrankenOpenFlags::SQLITE_OPEN_READ_ONLY,
21014 )
21015 .unwrap();
21016 let readonly_conversation_count: i64 = readonly
21017 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
21018 row.get_typed(0)
21019 })
21020 .unwrap();
21021 assert_eq!(readonly_conversation_count, 0);
21022 }
21023
21024 #[test]
21025 fn fetch_messages_for_lexical_rebuild_skips_extra_json() {
21026 let dir = TempDir::new().unwrap();
21027 let db_path = dir.path().join("test.db");
21028 let storage = SqliteStorage::open(&db_path).unwrap();
21029
21030 let agent = Agent {
21031 id: None,
21032 slug: "codex".into(),
21033 name: "Codex".into(),
21034 version: Some("0.2.3".into()),
21035 kind: AgentKind::Cli,
21036 };
21037 let agent_id = storage.ensure_agent(&agent).unwrap();
21038
21039 let conversation = Conversation {
21040 id: None,
21041 agent_slug: "codex".into(),
21042 workspace: Some(PathBuf::from("/tmp/workspace")),
21043 external_id: Some("lexical-rebuild-test".into()),
21044 title: Some("Lexical rebuild".into()),
21045 source_path: PathBuf::from("/tmp/lexical-rebuild.jsonl"),
21046 started_at: Some(1_700_000_000_000),
21047 ended_at: Some(1_700_000_000_100),
21048 approx_tokens: Some(42),
21049 metadata_json: serde_json::Value::Null,
21050 messages: vec![Message {
21051 id: None,
21052 idx: 0,
21053 role: MessageRole::Agent,
21054 author: Some("assistant".into()),
21055 created_at: Some(1_700_000_000_050),
21056 content: "indexed text".into(),
21057 extra_json: serde_json::json!({
21058 "usage": { "total_tokens": 1234 },
21059 "irrelevant_blob": "still preserved in canonical storage"
21060 }),
21061 snippets: Vec::new(),
21062 }],
21063 source_id: LOCAL_SOURCE_ID.into(),
21064 origin_host: None,
21065 };
21066
21067 let inserted = storage
21068 .insert_conversation_tree(agent_id, None, &conversation)
21069 .unwrap();
21070 let conversation_id = inserted.conversation_id;
21071
21072 let stored = storage.fetch_messages(conversation_id).unwrap();
21073 assert_eq!(stored.len(), 1);
21074 assert!(!stored[0].extra_json.is_null());
21075
21076 let lexical = storage
21077 .fetch_messages_for_lexical_rebuild(conversation_id)
21078 .unwrap();
21079 assert_eq!(lexical.len(), 1);
21080 assert_eq!(lexical[0].content, "indexed text");
21081 assert_eq!(lexical[0].author.as_deref(), Some("assistant"));
21082 assert!(lexical[0].extra_json.is_null());
21083 }
21084
21085 #[test]
21086 fn fetch_messages_for_lexical_rebuild_batch_groups_and_orders_messages() {
21087 let dir = TempDir::new().unwrap();
21088 let db_path = dir.path().join("test.db");
21089 let storage = SqliteStorage::open(&db_path).unwrap();
21090
21091 let agent = Agent {
21092 id: None,
21093 slug: "codex".into(),
21094 name: "Codex".into(),
21095 version: Some("0.2.3".into()),
21096 kind: AgentKind::Cli,
21097 };
21098 let agent_id = storage.ensure_agent(&agent).unwrap();
21099
21100 let first = Conversation {
21101 id: None,
21102 agent_slug: "codex".into(),
21103 workspace: Some(PathBuf::from("/tmp/workspace")),
21104 external_id: Some("lexical-batch-1".into()),
21105 title: Some("Lexical batch 1".into()),
21106 source_path: PathBuf::from("/tmp/lexical-batch-1.jsonl"),
21107 started_at: Some(1_700_000_000_000),
21108 ended_at: Some(1_700_000_000_100),
21109 approx_tokens: Some(42),
21110 metadata_json: serde_json::Value::Null,
21111 messages: vec![
21112 Message {
21113 id: None,
21114 idx: 0,
21115 role: MessageRole::User,
21116 author: Some("user".into()),
21117 created_at: Some(1_700_000_000_010),
21118 content: "first-a".into(),
21119 extra_json: serde_json::json!({"opaque": true}),
21120 snippets: Vec::new(),
21121 },
21122 Message {
21123 id: None,
21124 idx: 1,
21125 role: MessageRole::Agent,
21126 author: Some("assistant".into()),
21127 created_at: Some(1_700_000_000_020),
21128 content: "first-b".into(),
21129 extra_json: serde_json::json!({"opaque": true}),
21130 snippets: Vec::new(),
21131 },
21132 ],
21133 source_id: LOCAL_SOURCE_ID.into(),
21134 origin_host: None,
21135 };
21136
21137 let second = Conversation {
21138 id: None,
21139 agent_slug: "codex".into(),
21140 workspace: Some(PathBuf::from("/tmp/workspace")),
21141 external_id: Some("lexical-batch-2".into()),
21142 title: Some("Lexical batch 2".into()),
21143 source_path: PathBuf::from("/tmp/lexical-batch-2.jsonl"),
21144 started_at: Some(1_700_000_000_200),
21145 ended_at: Some(1_700_000_000_300),
21146 approx_tokens: Some(84),
21147 metadata_json: serde_json::Value::Null,
21148 messages: vec![Message {
21149 id: None,
21150 idx: 0,
21151 role: MessageRole::Tool,
21152 author: Some("tool".into()),
21153 created_at: Some(1_700_000_000_210),
21154 content: "second-a".into(),
21155 extra_json: serde_json::json!({"opaque": true}),
21156 snippets: Vec::new(),
21157 }],
21158 source_id: LOCAL_SOURCE_ID.into(),
21159 origin_host: None,
21160 };
21161 let third = Conversation {
21162 external_id: Some("lexical-batch-3".into()),
21163 title: Some("Lexical batch 3".into()),
21164 source_path: PathBuf::from("/tmp/lexical-batch-3.jsonl"),
21165 messages: vec![Message {
21166 id: None,
21167 idx: 0,
21168 role: MessageRole::System,
21169 author: Some("system".into()),
21170 created_at: Some(1_700_000_000_410),
21171 content: "third-a".into(),
21172 extra_json: serde_json::json!({"opaque": true}),
21173 snippets: Vec::new(),
21174 }],
21175 ..second.clone()
21176 };
21177
21178 let first_id = storage
21179 .insert_conversation_tree(agent_id, None, &first)
21180 .unwrap()
21181 .conversation_id;
21182 let second_id = storage
21183 .insert_conversation_tree(agent_id, None, &second)
21184 .unwrap()
21185 .conversation_id;
21186 let third_id = storage
21187 .insert_conversation_tree(agent_id, None, &third)
21188 .unwrap()
21189 .conversation_id;
21190
21191 let lexical = storage
21192 .fetch_messages_for_lexical_rebuild_batch(&[third_id, first_id], None, None)
21193 .unwrap();
21194
21195 let first_messages = lexical.get(&first_id).expect("first conversation");
21196 assert_eq!(first_messages.len(), 2);
21197 assert_eq!(first_messages[0].content, "first-a");
21198 assert_eq!(first_messages[1].content, "first-b");
21199 assert!(
21200 first_messages
21201 .iter()
21202 .all(|message| message.extra_json.is_null())
21203 );
21204
21205 assert!(
21206 !lexical.contains_key(&second_id),
21207 "batch fetch must exclude conversations not requested by the caller"
21208 );
21209
21210 let third_messages = lexical.get(&third_id).expect("third conversation");
21211 assert_eq!(third_messages.len(), 1);
21212 assert_eq!(third_messages[0].content, "third-a");
21213 assert!(third_messages[0].extra_json.is_null());
21214 }
21215
21216 #[test]
21217 fn fetch_messages_for_lexical_rebuild_batch_enforces_content_byte_guardrail() {
21218 let dir = TempDir::new().unwrap();
21219 let db_path = dir.path().join("test.db");
21220 let storage = SqliteStorage::open(&db_path).unwrap();
21221
21222 let agent = Agent {
21223 id: None,
21224 slug: "codex".into(),
21225 name: "Codex".into(),
21226 version: Some("0.2.3".into()),
21227 kind: AgentKind::Cli,
21228 };
21229 let agent_id = storage.ensure_agent(&agent).unwrap();
21230
21231 let conversation = Conversation {
21232 id: None,
21233 agent_slug: "codex".into(),
21234 workspace: Some(PathBuf::from("/tmp/workspace")),
21235 external_id: Some("lexical-batch-guard".into()),
21236 title: Some("Lexical batch guard".into()),
21237 source_path: PathBuf::from("/tmp/lexical-batch-guard.jsonl"),
21238 started_at: Some(1_700_000_000_000),
21239 ended_at: Some(1_700_000_000_100),
21240 approx_tokens: Some(42),
21241 metadata_json: serde_json::Value::Null,
21242 messages: vec![
21243 Message {
21244 id: None,
21245 idx: 0,
21246 role: MessageRole::User,
21247 author: Some("user".into()),
21248 created_at: Some(1_700_000_000_010),
21249 content: "123456".into(),
21250 extra_json: serde_json::Value::Null,
21251 snippets: Vec::new(),
21252 },
21253 Message {
21254 id: None,
21255 idx: 1,
21256 role: MessageRole::Agent,
21257 author: Some("assistant".into()),
21258 created_at: Some(1_700_000_000_020),
21259 content: "abcdef".into(),
21260 extra_json: serde_json::Value::Null,
21261 snippets: Vec::new(),
21262 },
21263 ],
21264 source_id: LOCAL_SOURCE_ID.into(),
21265 origin_host: None,
21266 };
21267
21268 let conversation_id = storage
21269 .insert_conversation_tree(agent_id, None, &conversation)
21270 .unwrap()
21271 .conversation_id;
21272
21273 let error = storage
21274 .fetch_messages_for_lexical_rebuild_batch(&[conversation_id], Some(10), Some(8))
21275 .expect_err("guardrail should reject oversized batch content");
21276
21277 let message = format!("{error:#}");
21278 assert!(
21279 message.contains("content-byte guardrail"),
21280 "expected guardrail reason in error, got {message}"
21281 );
21282 }
21283
21284 #[test]
21285 fn fetch_messages_handles_manual_rows_inserted_via_raw_connection() {
21286 let dir = TempDir::new().unwrap();
21287 let db_path = dir.path().join("manual-rows.db");
21288 let storage = FrankenStorage::open(&db_path).unwrap();
21289 let conn = storage.raw();
21290
21291 conn.execute(
21292 "INSERT INTO agents (id, slug, name, kind, created_at, updated_at)
21293 VALUES (1, 'claude_code', 'Claude Code', 'local', 0, 0)",
21294 )
21295 .unwrap();
21296 conn.execute(
21297 "INSERT INTO conversations
21298 (id, agent_id, external_id, title, source_path, source_id, started_at)
21299 VALUES (1, 1, 'manual-ext', 'Manual Session', '/tmp/manual.jsonl', 'local', 200)",
21300 )
21301 .unwrap();
21302 conn.execute(
21303 "INSERT INTO messages
21304 (id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin)
21305 VALUES (1, 1, 0, 'user', 'tester', 1700000000000, 'manual body', '{\"k\":1}', NULL)",
21306 )
21307 .unwrap();
21308
21309 let lexical = storage.fetch_messages_for_lexical_rebuild(1).unwrap();
21310 assert_eq!(lexical.len(), 1);
21311 assert_eq!(lexical[0].content, "manual body");
21312
21313 let full = storage.fetch_messages(1).unwrap();
21314 assert_eq!(full.len(), 1);
21315 assert_eq!(full[0].content, "manual body");
21316 assert_eq!(full[0].author.as_deref(), Some("tester"));
21317 assert_eq!(full[0].extra_json, serde_json::json!({ "k": 1 }));
21318 }
21319
21320 #[test]
21321 fn lexical_rebuild_batch_messages_query_avoids_sorter_temp_btrees() {
21322 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
21323 use std::path::PathBuf;
21324
21325 let dir = TempDir::new().unwrap();
21326 let db_path = dir.path().join("agent_search.db");
21327 let storage = SqliteStorage::open(&db_path).unwrap();
21328
21329 let agent = Agent {
21330 id: None,
21331 slug: "claude_code".into(),
21332 name: "Claude Code".into(),
21333 version: None,
21334 kind: AgentKind::Cli,
21335 };
21336 let agent_id = storage.ensure_agent(&agent).unwrap();
21337
21338 for (external_id, base_ts) in [
21339 ("conv-1", 1_700_000_000_000_i64),
21340 ("conv-2", 1_700_000_001_000_i64),
21341 ] {
21342 let conversation = Conversation {
21343 id: None,
21344 agent_slug: "claude_code".into(),
21345 workspace: Some(PathBuf::from("/tmp/workspace")),
21346 external_id: Some(external_id.to_string()),
21347 title: Some("Lexical rebuild".into()),
21348 source_path: PathBuf::from(format!("/tmp/{external_id}.jsonl")),
21349 started_at: Some(base_ts),
21350 ended_at: Some(base_ts + 100),
21351 approx_tokens: None,
21352 metadata_json: serde_json::Value::Null,
21353 messages: vec![
21354 Message {
21355 id: None,
21356 idx: 0,
21357 role: MessageRole::User,
21358 author: Some("user".into()),
21359 created_at: Some(base_ts + 10),
21360 content: format!("{external_id}-first"),
21361 extra_json: serde_json::Value::Null,
21362 snippets: Vec::new(),
21363 },
21364 Message {
21365 id: None,
21366 idx: 1,
21367 role: MessageRole::Agent,
21368 author: Some("assistant".into()),
21369 created_at: Some(base_ts + 20),
21370 content: format!("{external_id}-second"),
21371 extra_json: serde_json::Value::Null,
21372 snippets: Vec::new(),
21373 },
21374 ],
21375 source_id: LOCAL_SOURCE_ID.into(),
21376 origin_host: None,
21377 };
21378 storage
21379 .insert_conversation_tree(agent_id, None, &conversation)
21380 .unwrap();
21381 }
21382
21383 let conversation_ids: Vec<i64> = storage
21384 .conn
21385 .query_map_collect(
21386 "SELECT id FROM conversations ORDER BY id",
21387 fparams![],
21388 |row| row.get_typed(0),
21389 )
21390 .unwrap();
21391 assert_eq!(conversation_ids.len(), 2);
21392
21393 let plan_details: Vec<String> = storage
21394 .conn
21395 .query_map_collect(
21396 "EXPLAIN QUERY PLAN \
21397 SELECT conversation_id, id, idx, role, author, created_at, content \
21398 FROM messages \
21399 WHERE conversation_id IN (?1, ?2) \
21400 ORDER BY conversation_id ASC, idx ASC",
21401 fparams![conversation_ids[0], conversation_ids[1]],
21402 |row| row.get_typed(3),
21403 )
21404 .unwrap();
21405
21406 assert!(
21407 plan_details
21408 .iter()
21409 .any(|detail| detail.contains("sqlite_autoindex_messages_1")),
21410 "expected batched lexical rebuild fetch to use the conversation_id/idx composite index, got {plan_details:?}"
21411 );
21412 assert!(
21413 !plan_details
21414 .iter()
21415 .any(|detail| detail.contains("TEMP B-TREE")),
21416 "expected batched lexical rebuild fetch to avoid sorter temp b-trees, got {plan_details:?}"
21417 );
21418 }
21419
21420 #[test]
21421 fn stream_messages_for_lexical_rebuild_groups_and_orders_messages() {
21422 let dir = TempDir::new().unwrap();
21423 let db_path = dir.path().join("test.db");
21424 let storage = SqliteStorage::open(&db_path).unwrap();
21425
21426 let agent = Agent {
21427 id: None,
21428 slug: "codex".into(),
21429 name: "Codex".into(),
21430 version: Some("0.2.3".into()),
21431 kind: AgentKind::Cli,
21432 };
21433 let agent_id = storage.ensure_agent(&agent).unwrap();
21434
21435 let first = Conversation {
21436 id: None,
21437 agent_slug: "codex".into(),
21438 workspace: Some(PathBuf::from("/tmp/workspace")),
21439 external_id: Some("lexical-stream-1".into()),
21440 title: Some("Lexical stream 1".into()),
21441 source_path: PathBuf::from("/tmp/lexical-stream-1.jsonl"),
21442 started_at: Some(1_700_000_000_000),
21443 ended_at: Some(1_700_000_000_100),
21444 approx_tokens: Some(42),
21445 metadata_json: serde_json::Value::Null,
21446 messages: vec![
21447 Message {
21448 id: None,
21449 idx: 0,
21450 role: MessageRole::User,
21451 author: Some("user".into()),
21452 created_at: Some(1_700_000_000_010),
21453 content: "first-a".into(),
21454 extra_json: serde_json::json!({"opaque": true}),
21455 snippets: Vec::new(),
21456 },
21457 Message {
21458 id: None,
21459 idx: 1,
21460 role: MessageRole::Agent,
21461 author: Some("assistant".into()),
21462 created_at: Some(1_700_000_000_020),
21463 content: "first-b".into(),
21464 extra_json: serde_json::json!({"opaque": true}),
21465 snippets: Vec::new(),
21466 },
21467 ],
21468 source_id: LOCAL_SOURCE_ID.into(),
21469 origin_host: None,
21470 };
21471
21472 let second = Conversation {
21473 id: None,
21474 agent_slug: "codex".into(),
21475 workspace: Some(PathBuf::from("/tmp/workspace")),
21476 external_id: Some("lexical-stream-2".into()),
21477 title: Some("Lexical stream 2".into()),
21478 source_path: PathBuf::from("/tmp/lexical-stream-2.jsonl"),
21479 started_at: Some(1_700_000_000_200),
21480 ended_at: Some(1_700_000_000_300),
21481 approx_tokens: Some(84),
21482 metadata_json: serde_json::Value::Null,
21483 messages: vec![Message {
21484 id: None,
21485 idx: 0,
21486 role: MessageRole::Tool,
21487 author: Some("tool".into()),
21488 created_at: Some(1_700_000_000_210),
21489 content: "second-a".into(),
21490 extra_json: serde_json::json!({"opaque": true}),
21491 snippets: Vec::new(),
21492 }],
21493 source_id: LOCAL_SOURCE_ID.into(),
21494 origin_host: None,
21495 };
21496
21497 let first_id = storage
21498 .insert_conversation_tree(agent_id, None, &first)
21499 .unwrap()
21500 .conversation_id;
21501 let second_id = storage
21502 .insert_conversation_tree(agent_id, None, &second)
21503 .unwrap()
21504 .conversation_id;
21505
21506 let mut streamed = Vec::new();
21507 storage
21508 .stream_messages_for_lexical_rebuild_from_conversation_id(first_id, |row| {
21509 streamed.push((
21510 row.conversation_id,
21511 row.idx,
21512 row.role,
21513 row.author,
21514 row.content,
21515 ));
21516 Ok(())
21517 })
21518 .unwrap();
21519
21520 assert_eq!(
21521 streamed,
21522 vec![
21523 (
21524 first_id,
21525 0,
21526 "user".to_string(),
21527 Some("user".to_string()),
21528 "first-a".to_string(),
21529 ),
21530 (
21531 first_id,
21532 1,
21533 "agent".to_string(),
21534 Some("assistant".to_string()),
21535 "first-b".to_string(),
21536 ),
21537 (
21538 second_id,
21539 0,
21540 "tool".to_string(),
21541 Some("tool".to_string()),
21542 "second-a".to_string(),
21543 ),
21544 ]
21545 );
21546 }
21547
21548 #[test]
21549 fn stream_messages_for_lexical_rebuild_between_conversation_ids_respects_upper_bound() {
21550 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
21551 use std::path::PathBuf;
21552
21553 let dir = TempDir::new().unwrap();
21554 let db_path = dir.path().join("agent_search.db");
21555 let storage = SqliteStorage::open(&db_path).unwrap();
21556
21557 let agent = Agent {
21558 id: None,
21559 slug: "claude_code".into(),
21560 name: "Claude Code".into(),
21561 version: Some("1.2.3".into()),
21562 kind: AgentKind::Cli,
21563 };
21564 let agent_id = storage.ensure_agent(&agent).unwrap();
21565
21566 let first = Conversation {
21567 id: None,
21568 agent_slug: "claude_code".into(),
21569 workspace: Some(PathBuf::from("/tmp/workspace")),
21570 external_id: Some("lexical-range-1".into()),
21571 title: Some("Lexical range 1".into()),
21572 source_path: PathBuf::from("/tmp/lexical-range-1.jsonl"),
21573 started_at: Some(1_700_000_000_000),
21574 ended_at: Some(1_700_000_000_100),
21575 approx_tokens: Some(42),
21576 metadata_json: serde_json::Value::Null,
21577 messages: vec![Message {
21578 id: None,
21579 idx: 0,
21580 role: MessageRole::User,
21581 author: Some("user".into()),
21582 created_at: Some(1_700_000_000_010),
21583 content: "first-only".into(),
21584 extra_json: serde_json::json!({"opaque": true}),
21585 snippets: Vec::new(),
21586 }],
21587 source_id: LOCAL_SOURCE_ID.into(),
21588 origin_host: None,
21589 };
21590
21591 let second = Conversation {
21592 id: None,
21593 agent_slug: "claude_code".into(),
21594 workspace: Some(PathBuf::from("/tmp/workspace")),
21595 external_id: Some("lexical-range-2".into()),
21596 title: Some("Lexical range 2".into()),
21597 source_path: PathBuf::from("/tmp/lexical-range-2.jsonl"),
21598 started_at: Some(1_700_000_000_200),
21599 ended_at: Some(1_700_000_000_300),
21600 approx_tokens: Some(84),
21601 metadata_json: serde_json::Value::Null,
21602 messages: vec![Message {
21603 id: None,
21604 idx: 0,
21605 role: MessageRole::Tool,
21606 author: Some("tool".into()),
21607 created_at: Some(1_700_000_000_210),
21608 content: "second-should-not-appear".into(),
21609 extra_json: serde_json::json!({"opaque": true}),
21610 snippets: Vec::new(),
21611 }],
21612 source_id: LOCAL_SOURCE_ID.into(),
21613 origin_host: None,
21614 };
21615
21616 let first_id = storage
21617 .insert_conversation_tree(agent_id, None, &first)
21618 .unwrap()
21619 .conversation_id;
21620 let second_id = storage
21621 .insert_conversation_tree(agent_id, None, &second)
21622 .unwrap()
21623 .conversation_id;
21624
21625 let mut streamed = Vec::new();
21626 storage
21627 .stream_messages_for_lexical_rebuild_between_conversation_ids(
21628 first_id,
21629 first_id,
21630 |row| {
21631 streamed.push((row.conversation_id, row.idx, row.content));
21632 Ok(())
21633 },
21634 )
21635 .unwrap();
21636
21637 assert_eq!(streamed, vec![(first_id, 0, "first-only".to_string())]);
21638 assert!(
21639 streamed
21640 .iter()
21641 .all(|(conversation_id, _, _)| *conversation_id != second_id),
21642 "upper bound should exclude later conversation ids"
21643 );
21644 }
21645
21646 #[test]
21647 fn stream_messages_for_lexical_rebuild_between_conversation_ids_handles_mixed_ranges() {
21648 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
21649 use std::path::PathBuf;
21650
21651 let dir = TempDir::new().unwrap();
21652 let db_path = dir.path().join("agent_search.db");
21653 let storage = SqliteStorage::open(&db_path).unwrap();
21654
21655 let claude_agent_id = storage
21656 .ensure_agent(&Agent {
21657 id: None,
21658 slug: "claude_code".into(),
21659 name: "Claude Code".into(),
21660 version: None,
21661 kind: AgentKind::Cli,
21662 })
21663 .unwrap();
21664 let aider_agent_id = storage
21665 .ensure_agent(&Agent {
21666 id: None,
21667 slug: "aider".into(),
21668 name: "Aider".into(),
21669 version: None,
21670 kind: AgentKind::Cli,
21671 })
21672 .unwrap();
21673
21674 type MessageSpec = (i64, MessageRole, Option<String>, Option<i64>, String);
21675
21676 let mut expected = Vec::new();
21677 let mut first_conversation_id = None;
21678 let mut last_conversation_id = None;
21679 let mut insert_conversation =
21680 |agent_id: i64,
21681 external_id: &str,
21682 title: &str,
21683 source_path: &str,
21684 started_at: i64,
21685 message_specs: Vec<MessageSpec>| {
21686 let conversation = Conversation {
21687 id: None,
21688 agent_slug: if agent_id == aider_agent_id {
21689 "aider".into()
21690 } else {
21691 "claude_code".into()
21692 },
21693 workspace: Some(PathBuf::from("/tmp/workspace")),
21694 external_id: Some(external_id.to_string()),
21695 title: Some(title.to_string()),
21696 source_path: PathBuf::from(source_path),
21697 started_at: Some(started_at),
21698 ended_at: Some(started_at + 100),
21699 approx_tokens: None,
21700 metadata_json: serde_json::Value::Null,
21701 messages: message_specs
21702 .iter()
21703 .map(|(idx, role, author, created_at, content)| Message {
21704 id: None,
21705 idx: *idx,
21706 role: role.clone(),
21707 author: author.clone(),
21708 created_at: *created_at,
21709 content: content.clone(),
21710 extra_json: serde_json::Value::Null,
21711 snippets: Vec::new(),
21712 })
21713 .collect(),
21714 source_id: LOCAL_SOURCE_ID.into(),
21715 origin_host: None,
21716 };
21717 let conversation_id = storage
21718 .insert_conversation_tree(agent_id, None, &conversation)
21719 .unwrap()
21720 .conversation_id;
21721 if first_conversation_id.is_none() {
21722 first_conversation_id = Some(conversation_id);
21723 }
21724 last_conversation_id = Some(conversation_id);
21725 expected.extend(message_specs.into_iter().map(
21726 |(idx, role, author, created_at, content)| {
21727 (
21728 conversation_id,
21729 idx,
21730 match role {
21731 MessageRole::User => "user".to_string(),
21732 MessageRole::Agent => "agent".to_string(),
21733 MessageRole::Tool => "tool".to_string(),
21734 MessageRole::System => "system".to_string(),
21735 MessageRole::Other(other) => other,
21736 },
21737 author,
21738 created_at,
21739 content,
21740 )
21741 },
21742 ));
21743 };
21744
21745 for (label, base_ts) in [
21746 ("alpha", 1_700_000_000_000_i64),
21747 ("beta", 1_700_000_001_000_i64),
21748 ("gamma", 1_700_000_002_000_i64),
21749 ("delta", 1_700_000_003_000_i64),
21750 ("epsilon", 1_700_000_004_000_i64),
21751 ] {
21752 insert_conversation(
21753 claude_agent_id,
21754 &format!("lexical-{label}"),
21755 &format!("Lexical {label}"),
21756 &format!("/tmp/{label}.jsonl"),
21757 base_ts,
21758 vec![
21759 (
21760 0,
21761 MessageRole::User,
21762 None,
21763 Some(base_ts + 10),
21764 format!("{label}_content"),
21765 ),
21766 (
21767 1,
21768 MessageRole::Agent,
21769 None,
21770 Some(base_ts + 20),
21771 format!("{label}_content_response"),
21772 ),
21773 ],
21774 );
21775 }
21776
21777 insert_conversation(
21778 aider_agent_id,
21779 "lexical-aider-history",
21780 "Aider Chat: coding_agent_session_search",
21781 "/tmp/.aider.chat.history.md",
21782 1_764_619_673_394,
21783 vec![
21784 (
21785 0,
21786 MessageRole::System,
21787 Some("system".to_string()),
21788 None,
21789 "# aider chat started at 2025-12-01 20:07:47".to_string(),
21790 ),
21791 (
21792 1,
21793 MessageRole::User,
21794 Some("user".to_string()),
21795 None,
21796 "/tmp/workspace/.venv/bin/aider --no-git --message hello world".to_string(),
21797 ),
21798 ],
21799 );
21800 insert_conversation(
21801 aider_agent_id,
21802 "lexical-aider-fixture",
21803 "Aider Chat: aider",
21804 "/tmp/tests/fixtures/aider/.aider.chat.history.md",
21805 1_764_621_401_399,
21806 vec![
21807 (
21808 0,
21809 MessageRole::User,
21810 Some("user".to_string()),
21811 None,
21812 "/add src/main.rs".to_string(),
21813 ),
21814 (
21815 1,
21816 MessageRole::Agent,
21817 Some("assistant".to_string()),
21818 None,
21819 "Added src/main.rs to the chat.
21820
21821#### /add src/main.rs"
21822 .to_string(),
21823 ),
21824 (
21825 2,
21826 MessageRole::User,
21827 Some("user".to_string()),
21828 None,
21829 "Please refactor.".to_string(),
21830 ),
21831 (
21832 3,
21833 MessageRole::Agent,
21834 Some("assistant".to_string()),
21835 None,
21836 "Sure, here is the code.".to_string(),
21837 ),
21838 ],
21839 );
21840
21841 let mut streamed = Vec::new();
21842 storage
21843 .stream_messages_for_lexical_rebuild_between_conversation_ids(
21844 first_conversation_id.unwrap(),
21845 last_conversation_id.unwrap(),
21846 |row| {
21847 streamed.push((
21848 row.conversation_id,
21849 row.idx,
21850 row.role,
21851 row.author,
21852 row.created_at,
21853 row.content,
21854 ));
21855 Ok(())
21856 },
21857 )
21858 .unwrap();
21859
21860 assert_eq!(streamed, expected);
21861 }
21862
21863 #[test]
21864 fn lexical_rebuild_stream_queries_use_rowid_and_per_conversation_probes() {
21865 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
21866 use std::path::PathBuf;
21867
21868 let dir = TempDir::new().unwrap();
21869 let db_path = dir.path().join("agent_search.db");
21870 let storage = SqliteStorage::open(&db_path).unwrap();
21871
21872 let agent = Agent {
21873 id: None,
21874 slug: "claude_code".into(),
21875 name: "Claude Code".into(),
21876 version: None,
21877 kind: AgentKind::Cli,
21878 };
21879 let agent_id = storage.ensure_agent(&agent).unwrap();
21880
21881 for (external_id, base_ts) in [
21882 ("conv-1", 1_700_000_000_000_i64),
21883 ("conv-2", 1_700_000_001_000_i64),
21884 ] {
21885 let conversation = Conversation {
21886 id: None,
21887 agent_slug: "claude_code".into(),
21888 workspace: Some(PathBuf::from("/tmp/workspace")),
21889 external_id: Some(external_id.to_string()),
21890 title: Some("Lexical rebuild".into()),
21891 source_path: PathBuf::from(format!("/tmp/{external_id}.jsonl")),
21892 started_at: Some(base_ts),
21893 ended_at: Some(base_ts + 100),
21894 approx_tokens: None,
21895 metadata_json: serde_json::Value::Null,
21896 messages: vec![
21897 Message {
21898 id: None,
21899 idx: 0,
21900 role: MessageRole::User,
21901 author: Some("user".into()),
21902 created_at: Some(base_ts + 10),
21903 content: format!("{external_id}-first"),
21904 extra_json: serde_json::Value::Null,
21905 snippets: Vec::new(),
21906 },
21907 Message {
21908 id: None,
21909 idx: 1,
21910 role: MessageRole::Agent,
21911 author: Some("assistant".into()),
21912 created_at: Some(base_ts + 20),
21913 content: format!("{external_id}-second"),
21914 extra_json: serde_json::Value::Null,
21915 snippets: Vec::new(),
21916 },
21917 ],
21918 source_id: LOCAL_SOURCE_ID.into(),
21919 origin_host: None,
21920 };
21921 storage
21922 .insert_conversation_tree(agent_id, None, &conversation)
21923 .unwrap();
21924 }
21925
21926 let first_id: i64 = storage
21927 .conn
21928 .query_row_map(
21929 "SELECT id FROM conversations ORDER BY id LIMIT 1",
21930 fparams![],
21931 |row| row.get_typed(0),
21932 )
21933 .unwrap();
21934 let last_id: i64 = storage
21935 .conn
21936 .query_row_map(
21937 "SELECT id FROM conversations ORDER BY id DESC LIMIT 1",
21938 fparams![],
21939 |row| row.get_typed(0),
21940 )
21941 .unwrap();
21942
21943 let conversation_plan_details: Vec<String> = storage
21944 .conn
21945 .query_map_collect(
21946 "EXPLAIN QUERY PLAN SELECT id FROM conversations WHERE id >= ?1 AND id <= ?2 ORDER BY id ASC",
21947 fparams![first_id, last_id],
21948 |row| row.get_typed(3),
21949 )
21950 .unwrap();
21951 assert!(
21952 !conversation_plan_details
21953 .iter()
21954 .any(|detail| detail.contains("TEMP B-TREE")),
21955 "expected streamed lexical rebuild conversation listing to avoid sorter temp b-trees, got {conversation_plan_details:?}"
21956 );
21957
21958 let message_plan_details: Vec<String> = storage
21959 .conn
21960 .query_map_collect(
21961 "EXPLAIN QUERY PLAN SELECT id, idx, role, author, created_at, content FROM messages INDEXED BY sqlite_autoindex_messages_1 WHERE conversation_id = ?1 ORDER BY idx",
21962 fparams![first_id],
21963 |row| row.get_typed(3),
21964 )
21965 .unwrap();
21966 assert!(
21967 message_plan_details
21968 .iter()
21969 .any(|detail| detail.contains("sqlite_autoindex_messages_1")
21970 || detail.contains("idx_messages_conv_idx")),
21971 "expected per-conversation lexical rebuild fetch to use the conversation_id/idx index, got {message_plan_details:?}"
21972 );
21973 assert!(
21974 !message_plan_details
21975 .iter()
21976 .any(|detail| detail.contains("TEMP B-TREE")),
21977 "expected per-conversation lexical rebuild fetch to avoid sorter temp b-trees, got {message_plan_details:?}"
21978 );
21979 }
21980
21981 #[test]
21982 fn discover_historical_database_bundles_prefers_larger_archives_first() {
21983 let dir = TempDir::new().unwrap();
21984 let canonical_db = dir.path().join("agent_search.db");
21985 fs::write(&canonical_db, b"canonical").unwrap();
21986
21987 let smaller = dir.path().join("agent_search.corrupt.small");
21988 fs::write(&smaller, vec![0_u8; 32]).unwrap();
21989
21990 let backups_dir = dir.path().join("backups");
21991 fs::create_dir_all(&backups_dir).unwrap();
21992 let larger = backups_dir.join("agent_search.db.20260322T020200.bak");
21993 fs::write(&larger, vec![0_u8; 128]).unwrap();
21994
21995 let bundles = discover_historical_database_bundles(&canonical_db);
21996 let ordered_paths: Vec<PathBuf> =
21997 bundles.into_iter().map(|bundle| bundle.root_path).collect();
21998
21999 assert_eq!(ordered_paths, vec![larger, smaller]);
22000 }
22001
22002 #[test]
22003 fn discover_historical_database_bundles_prefers_queryable_direct_bundles_first() {
22004 let dir = TempDir::new().unwrap();
22005 let canonical_db = dir.path().join("agent_search.db");
22006 fs::write(&canonical_db, b"canonical").unwrap();
22007
22008 let larger_corrupt = dir.path().join("agent_search.corrupt.20260324_212907");
22009 fs::write(&larger_corrupt, vec![0_u8; 4096]).unwrap();
22010
22011 let backups_dir = dir.path().join("backups");
22012 fs::create_dir_all(&backups_dir).unwrap();
22013 let smaller_healthy = backups_dir.join("agent_search.db.20260322T020200.bak");
22014 let conn = FrankenConnection::open(smaller_healthy.to_string_lossy().into_owned()).unwrap();
22015 conn.execute_batch(
22016 "CREATE TABLE conversations (id INTEGER PRIMARY KEY, source_path TEXT);
22017 CREATE TABLE messages (
22018 id INTEGER PRIMARY KEY,
22019 conversation_id INTEGER NOT NULL,
22020 idx INTEGER NOT NULL,
22021 content TEXT
22022 );
22023 INSERT INTO conversations(id, source_path) VALUES (1, '/tmp/history.jsonl');
22024 INSERT INTO messages(id, conversation_id, idx, content)
22025 VALUES (1, 1, 0, 'seed');",
22026 )
22027 .unwrap();
22028 drop(conn);
22029
22030 let bundles = discover_historical_database_bundles(&canonical_db);
22031 let ordered_paths: Vec<PathBuf> = bundles
22032 .iter()
22033 .map(|bundle| bundle.root_path.clone())
22034 .collect();
22035
22036 assert_eq!(ordered_paths, vec![smaller_healthy, larger_corrupt]);
22037 assert!(bundles[0].supports_direct_readonly);
22038 assert!(!bundles[1].supports_direct_readonly);
22039 }
22040
22041 #[test]
22042 fn salvage_historical_databases_skips_unreadable_quarantined_bundles() {
22043 let dir = TempDir::new().unwrap();
22044 let canonical_db = dir.path().join("agent_search.db");
22045 let storage = SqliteStorage::open(&canonical_db).unwrap();
22046
22047 let quarantined = dir.path().join("agent_search.corrupt.20260324_212907");
22048 fs::write(&quarantined, b"not a sqlite database").unwrap();
22049
22050 let discovered: Vec<PathBuf> = discover_historical_database_bundles(&canonical_db)
22051 .into_iter()
22052 .map(|bundle| bundle.root_path)
22053 .collect();
22054 assert_eq!(discovered, vec![quarantined]);
22055
22056 let outcome = storage.salvage_historical_databases(&canonical_db).unwrap();
22057 assert_eq!(outcome.bundles_considered, 1);
22058 assert_eq!(outcome.bundles_imported, 0);
22059 assert_eq!(outcome.conversations_imported, 0);
22060 assert_eq!(outcome.messages_imported, 0);
22061 assert!(storage.list_conversations(10, 0).unwrap().is_empty());
22062 }
22063
22064 #[test]
22065 fn discover_historical_database_bundles_includes_repair_lab_and_snapshots_named_roots() {
22066 let dir = TempDir::new().unwrap();
22067 let canonical_db = dir.path().join("agent_search.db");
22068 fs::write(&canonical_db, b"canonical").unwrap();
22069
22070 let repair_lab_dir = dir.path().join("repair-lab").join("live-copy");
22071 fs::create_dir_all(&repair_lab_dir).unwrap();
22072 let repair_lab_db = repair_lab_dir.join("agent_search.db");
22073 fs::write(&repair_lab_db, vec![0_u8; 96]).unwrap();
22074 fs::write(
22075 repair_lab_dir.join("agent_search.rebuild-test.db"),
22076 vec![0_u8; 192],
22077 )
22078 .unwrap();
22079
22080 let snapshots_dir = dir.path().join("snapshots").join("20260324T013201Z");
22081 fs::create_dir_all(&snapshots_dir).unwrap();
22082 let snapshot_db = snapshots_dir.join("agent_search.db");
22083 fs::write(&snapshot_db, vec![0_u8; 64]).unwrap();
22084
22085 let bundles = discover_historical_database_bundles(&canonical_db);
22086 let ordered_paths: Vec<PathBuf> =
22087 bundles.into_iter().map(|bundle| bundle.root_path).collect();
22088
22089 assert!(ordered_paths.contains(&repair_lab_db));
22090 assert!(ordered_paths.contains(&snapshot_db));
22091 assert!(
22092 !ordered_paths
22093 .iter()
22094 .any(|path| path.file_name().and_then(|name| name.to_str())
22095 == Some("agent_search.rebuild-test.db"))
22096 );
22097 }
22098
22099 #[test]
22100 fn discover_historical_database_bundles_prefers_healthy_backup_over_replay_priority() {
22101 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
22102
22103 let dir = TempDir::new().unwrap();
22104 let canonical_db = dir.path().join("agent_search.db");
22105 fs::write(&canonical_db, b"canonical").unwrap();
22106
22107 let replay_dir = dir
22108 .path()
22109 .join("repair-lab")
22110 .join("replay-20260324T070101Z");
22111 fs::create_dir_all(&replay_dir).unwrap();
22112 let replay_db = replay_dir.join("agent_search.db");
22113 let replay_storage = SqliteStorage::open(&replay_db).unwrap();
22114 let agent = Agent {
22115 id: None,
22116 slug: "codex".into(),
22117 name: "Codex".into(),
22118 version: Some("0.2.3".into()),
22119 kind: AgentKind::Cli,
22120 };
22121 let agent_id = replay_storage.ensure_agent(&agent).unwrap();
22122 let conversation = Conversation {
22123 id: None,
22124 agent_slug: "codex".into(),
22125 workspace: Some(PathBuf::from("/tmp/workspace")),
22126 external_id: Some("replay-conv".into()),
22127 title: Some("Replay bundle".into()),
22128 source_path: PathBuf::from("/tmp/replay.jsonl"),
22129 started_at: Some(1_700_000_000_000),
22130 ended_at: Some(1_700_000_000_100),
22131 approx_tokens: Some(42),
22132 metadata_json: serde_json::Value::Null,
22133 messages: vec![Message {
22134 id: None,
22135 idx: 0,
22136 role: MessageRole::Agent,
22137 author: Some("assistant".into()),
22138 created_at: Some(1_700_000_000_050),
22139 content: "replay message".into(),
22140 extra_json: serde_json::Value::Null,
22141 snippets: Vec::new(),
22142 }],
22143 source_id: LOCAL_SOURCE_ID.into(),
22144 origin_host: None,
22145 };
22146 replay_storage
22147 .insert_conversation_tree(agent_id, None, &conversation)
22148 .unwrap();
22149 drop(replay_storage);
22150
22151 let duplicate_legacy_fts_sql = "CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, message_id UNINDEXED, tokenize='porter')";
22152 let replay_legacy = rusqlite_test_fixture_conn(&replay_db);
22153 replay_legacy
22154 .execute_batch(
22155 "UPDATE meta SET value = '13' WHERE key = 'schema_version';
22156 DELETE FROM _schema_migrations WHERE version = 14;
22157 PRAGMA writable_schema = ON;",
22158 )
22159 .unwrap();
22160 replay_legacy
22161 .execute(
22162 "DELETE FROM meta WHERE key = ?1",
22163 [FTS_FRANKEN_REBUILD_META_KEY],
22164 )
22165 .unwrap();
22166 replay_legacy
22167 .execute(
22168 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
22169 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
22170 [duplicate_legacy_fts_sql],
22171 )
22172 .unwrap();
22173 replay_legacy
22174 .execute_batch("PRAGMA writable_schema = OFF;")
22175 .unwrap();
22176 drop(replay_legacy);
22177
22178 let backups_dir = dir.path().join("backups");
22179 fs::create_dir_all(&backups_dir).unwrap();
22180 let clean_backup = backups_dir.join("agent_search.db.20260322T020200.bak");
22181 let clean_storage = SqliteStorage::open(&clean_backup).unwrap();
22182 let clean_agent_id = clean_storage.ensure_agent(&agent).unwrap();
22183 clean_storage
22184 .insert_conversation_tree(clean_agent_id, None, &conversation)
22185 .unwrap();
22186 drop(clean_storage);
22187
22188 let bundles = discover_historical_database_bundles(&canonical_db);
22189 let ordered_paths: Vec<PathBuf> = bundles
22190 .iter()
22191 .map(|bundle| bundle.root_path.clone())
22192 .collect();
22193
22194 assert_eq!(ordered_paths[0], clean_backup);
22195 assert_eq!(ordered_paths[1], replay_db);
22196 assert_eq!(
22197 bundles[0].probe.schema_version,
22198 Some(CURRENT_SCHEMA_VERSION)
22199 );
22200 assert_eq!(bundles[0].probe.fts_schema_rows, Some(0));
22206 assert!(!bundles[0].probe.fts_queryable);
22209 assert_eq!(bundles[1].probe.schema_version, Some(13));
22210 assert_eq!(bundles[1].probe.fts_schema_rows, Some(1));
22215 }
22216
22217 #[test]
22218 fn ensure_fts_consistency_via_rusqlite_catches_up_missing_rows() {
22219 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
22220
22221 let dir = TempDir::new().unwrap();
22222 let db_path = dir.path().join("fts-catchup.db");
22223 let storage = SqliteStorage::open(&db_path).unwrap();
22224 let agent = Agent {
22225 id: None,
22226 slug: "codex".into(),
22227 name: "Codex".into(),
22228 version: Some("0.2.3".into()),
22229 kind: AgentKind::Cli,
22230 };
22231 let agent_id = storage.ensure_agent(&agent).unwrap();
22232 let conversation = Conversation {
22233 id: None,
22234 agent_slug: "codex".into(),
22235 workspace: Some(PathBuf::from("/tmp/workspace")),
22236 external_id: Some("fts-catchup".into()),
22237 title: Some("FTS catchup".into()),
22238 source_path: PathBuf::from("/tmp/fts-catchup.jsonl"),
22239 started_at: Some(1_700_000_000_000),
22240 ended_at: Some(1_700_000_000_100),
22241 approx_tokens: Some(42),
22242 metadata_json: serde_json::Value::Null,
22243 messages: vec![Message {
22244 id: None,
22245 idx: 0,
22246 role: MessageRole::User,
22247 author: Some("user".into()),
22248 created_at: Some(1_700_000_000_050),
22249 content: "initial message".into(),
22250 extra_json: serde_json::Value::Null,
22251 snippets: Vec::new(),
22252 }],
22253 source_id: LOCAL_SOURCE_ID.into(),
22254 origin_host: None,
22255 };
22256 storage
22257 .insert_conversation_tree(agent_id, None, &conversation)
22258 .unwrap();
22259 drop(storage);
22260
22261 rebuild_fts_via_rusqlite(&db_path).unwrap();
22262
22263 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
22264 let conversation_id: i64 = conn
22265 .query_row_map("SELECT id FROM conversations LIMIT 1", fparams![], |row| {
22266 row.get_typed(0)
22267 })
22268 .unwrap();
22269 conn.execute_compat(
22270 "INSERT INTO messages(id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin)
22271 VALUES(2, ?1, 1, 'assistant', 'assistant', 1700000000060, 'authentication catchup', NULL, NULL)",
22272 fparams![conversation_id],
22273 )
22274 .unwrap();
22275 drop(conn);
22276
22277 let repair = ensure_fts_consistency_via_rusqlite(&db_path).unwrap();
22278 assert_eq!(
22279 repair,
22280 FtsConsistencyRepair::IncrementalCatchUp {
22281 inserted_rows: 1,
22282 total_rows: 2
22283 }
22284 );
22285
22286 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
22287 let auth_rows: i64 = conn
22288 .query_row_map(
22289 "SELECT COUNT(*) FROM fts_messages WHERE rowid = 2",
22290 fparams![],
22291 |row| row.get_typed(0),
22292 )
22293 .unwrap();
22294 assert_eq!(auth_rows, 1);
22295 }
22296
22297 #[test]
22298 fn rebuild_fts_via_rusqlite_cleans_duplicate_legacy_schema_rows() {
22299 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
22300
22301 let dir = TempDir::new().unwrap();
22302 let db_path = dir.path().join("fts-duplicate-rebuild.db");
22303
22304 let storage = SqliteStorage::open(&db_path).unwrap();
22305 let agent = Agent {
22306 id: None,
22307 slug: "codex".into(),
22308 name: "Codex".into(),
22309 version: Some("0.2.3".into()),
22310 kind: AgentKind::Cli,
22311 };
22312 let agent_id = storage.ensure_agent(&agent).unwrap();
22313 let conversation = Conversation {
22314 id: None,
22315 agent_slug: "codex".into(),
22316 workspace: Some(PathBuf::from("/ws")),
22317 external_id: Some("retro".into()),
22318 title: Some("retro".into()),
22319 source_path: PathBuf::from("/tmp/retro.jsonl"),
22320 started_at: Some(42),
22321 ended_at: Some(42),
22322 approx_tokens: None,
22323 metadata_json: serde_json::Value::Null,
22324 messages: vec![Message {
22325 id: None,
22326 idx: 0,
22327 role: MessageRole::User,
22328 author: None,
22329 created_at: Some(42),
22330 content: "retro investigation".into(),
22331 extra_json: serde_json::Value::Null,
22332 snippets: Vec::new(),
22333 }],
22334 source_id: LOCAL_SOURCE_ID.into(),
22335 origin_host: None,
22336 };
22337 storage
22338 .insert_conversation_tree(agent_id, None, &conversation)
22339 .unwrap();
22340 drop(storage);
22341 materialize_fresh_fts_schema_via_rusqlite(&db_path).unwrap();
22342
22343 let conn = rusqlite_test_fixture_conn(&db_path);
22344 conn.execute_batch("PRAGMA writable_schema = ON;").unwrap();
22345 conn.execute(
22346 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
22347 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
22348 ["CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, message_id UNINDEXED, tokenize='porter')"],
22349 )
22350 .unwrap();
22351 conn.execute_batch("PRAGMA writable_schema = OFF;").unwrap();
22352 let duplicate_rows: i64 = conn
22353 .query_row(
22354 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
22355 [],
22356 |row| row.get(0),
22357 )
22358 .unwrap();
22359 assert_eq!(duplicate_rows, 2);
22360 drop(conn);
22361
22362 let inserted = rebuild_fts_via_rusqlite(&db_path).unwrap();
22363 assert_eq!(inserted, 1);
22364
22365 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
22366 let schema_rows = franken_fts_schema_rows(&conn).unwrap();
22367 assert_eq!(
22368 schema_rows, 1,
22369 "DROP TABLE should leave one clean FTS schema"
22370 );
22371 let match_count: i64 = conn
22372 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
22373 row.get_typed(0)
22374 })
22375 .unwrap();
22376 assert_eq!(match_count, 1);
22377 }
22378
22379 #[test]
22384 fn ensure_agent_creates_new() {
22385 let dir = TempDir::new().unwrap();
22386 let db_path = dir.path().join("test.db");
22387 let storage = SqliteStorage::open(&db_path).unwrap();
22388
22389 let agent = Agent {
22390 id: None,
22391 slug: "test_agent".into(),
22392 name: "Test Agent".into(),
22393 version: Some("1.0".into()),
22394 kind: AgentKind::Cli,
22395 };
22396
22397 let id = storage.ensure_agent(&agent).unwrap();
22398 assert!(id > 0);
22399 }
22400
22401 #[test]
22402 fn ensure_agent_returns_existing_id() {
22403 let dir = TempDir::new().unwrap();
22404 let db_path = dir.path().join("test.db");
22405 let storage = SqliteStorage::open(&db_path).unwrap();
22406
22407 let agent = Agent {
22408 id: None,
22409 slug: "codex".into(),
22410 name: "Codex".into(),
22411 version: None,
22412 kind: AgentKind::Cli,
22413 };
22414
22415 let id1 = storage.ensure_agent(&agent).unwrap();
22416 let id2 = storage.ensure_agent(&agent).unwrap();
22417 assert_eq!(id1, id2);
22418 }
22419
22420 #[test]
22421 fn ensure_agent_unchanged_preserves_updated_at() {
22422 let dir = TempDir::new().unwrap();
22423 let db_path = dir.path().join("test.db");
22424 let storage = SqliteStorage::open(&db_path).unwrap();
22425
22426 let agent = Agent {
22427 id: None,
22428 slug: "codex".into(),
22429 name: "Codex".into(),
22430 version: Some("1.0".into()),
22431 kind: AgentKind::Cli,
22432 };
22433
22434 storage.ensure_agent(&agent).unwrap();
22435 let initial_updated_at: i64 = storage
22436 .conn
22437 .query_row_map(
22438 "SELECT updated_at FROM agents WHERE slug = ?1",
22439 fparams![agent.slug.as_str()],
22440 |row| row.get_typed(0),
22441 )
22442 .unwrap();
22443 std::thread::sleep(std::time::Duration::from_millis(5));
22444
22445 storage.ensure_agent(&agent).unwrap();
22446 let fetched_updated_at: i64 = storage
22447 .conn
22448 .query_row_map(
22449 "SELECT updated_at FROM agents WHERE slug = ?1",
22450 fparams![agent.slug.as_str()],
22451 |row| row.get_typed(0),
22452 )
22453 .unwrap();
22454
22455 assert_eq!(fetched_updated_at, initial_updated_at);
22456 }
22457
22458 #[test]
22459 fn ensure_agent_changed_metadata_updates_cached_slug() {
22460 let dir = TempDir::new().unwrap();
22461 let db_path = dir.path().join("test.db");
22462 let storage = SqliteStorage::open(&db_path).unwrap();
22463
22464 let mut agent = Agent {
22465 id: None,
22466 slug: "codex".into(),
22467 name: "Codex".into(),
22468 version: Some("1.0".into()),
22469 kind: AgentKind::Cli,
22470 };
22471
22472 let id1 = storage.ensure_agent(&agent).unwrap();
22473 agent.name = "Codex CLI".into();
22474 agent.version = Some("1.1".into());
22475 let id2 = storage.ensure_agent(&agent).unwrap();
22476
22477 let fetched: (String, Option<String>) = storage
22478 .conn
22479 .query_row_map(
22480 "SELECT name, version FROM agents WHERE slug = ?1",
22481 fparams![agent.slug.as_str()],
22482 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
22483 )
22484 .unwrap();
22485
22486 assert_eq!(id1, id2);
22487 assert_eq!(fetched, ("Codex CLI".into(), Some("1.1".into())));
22488 }
22489
22490 #[test]
22491 fn list_agents_returns_inserted() {
22492 let dir = TempDir::new().unwrap();
22493 let db_path = dir.path().join("test.db");
22494 let storage = SqliteStorage::open(&db_path).unwrap();
22495
22496 let agent = Agent {
22497 id: None,
22498 slug: "new_agent".into(),
22499 name: "New Agent".into(),
22500 version: None,
22501 kind: AgentKind::VsCode,
22502 };
22503 storage.ensure_agent(&agent).unwrap();
22504
22505 let agents = storage.list_agents().unwrap();
22506 assert!(agents.iter().any(|a| a.slug == "new_agent"));
22507 }
22508
22509 #[test]
22514 fn ensure_workspace_creates_new() {
22515 let dir = TempDir::new().unwrap();
22516 let db_path = dir.path().join("test.db");
22517 let storage = SqliteStorage::open(&db_path).unwrap();
22518
22519 let id = storage
22520 .ensure_workspace(Path::new("/home/user/project"), Some("My Project"))
22521 .unwrap();
22522 assert!(id > 0);
22523 }
22524
22525 #[test]
22526 fn ensure_workspace_returns_existing() {
22527 let dir = TempDir::new().unwrap();
22528 let db_path = dir.path().join("test.db");
22529 let storage = SqliteStorage::open(&db_path).unwrap();
22530
22531 let path = Path::new("/home/user/myproject");
22532 let id1 = storage.ensure_workspace(path, None).unwrap();
22533 let id2 = storage.ensure_workspace(path, None).unwrap();
22534 assert_eq!(id1, id2);
22535 }
22536
22537 #[test]
22538 fn ensure_workspace_changed_display_name_updates_cached_path() {
22539 let dir = TempDir::new().unwrap();
22540 let db_path = dir.path().join("test.db");
22541 let storage = SqliteStorage::open(&db_path).unwrap();
22542
22543 let path = Path::new("/home/user/myproject");
22544 let id1 = storage.ensure_workspace(path, Some("Before")).unwrap();
22545 let id2 = storage.ensure_workspace(path, Some("After")).unwrap();
22546
22547 let display_name: Option<String> = storage
22548 .conn
22549 .query_row_map(
22550 "SELECT display_name FROM workspaces WHERE path = ?1",
22551 fparams![path.to_string_lossy().as_ref()],
22552 |row| row.get_typed(0),
22553 )
22554 .unwrap();
22555
22556 assert_eq!(id1, id2);
22557 assert_eq!(display_name.as_deref(), Some("After"));
22558 }
22559
22560 #[test]
22561 fn list_workspaces_returns_inserted() {
22562 let dir = TempDir::new().unwrap();
22563 let db_path = dir.path().join("test.db");
22564 let storage = SqliteStorage::open(&db_path).unwrap();
22565
22566 storage
22567 .ensure_workspace(Path::new("/test/workspace"), Some("Test WS"))
22568 .unwrap();
22569
22570 let workspaces = storage.list_workspaces().unwrap();
22571 assert!(
22572 workspaces
22573 .iter()
22574 .any(|w| w.path.to_str() == Some("/test/workspace"))
22575 );
22576 }
22577
22578 #[test]
22583 fn upsert_source_creates_new() {
22584 let dir = TempDir::new().unwrap();
22585 let db_path = dir.path().join("test.db");
22586 let storage = SqliteStorage::open(&db_path).unwrap();
22587
22588 let source = Source {
22589 id: "test-laptop".into(),
22590 kind: SourceKind::Ssh,
22591 host_label: Some("test.local".into()),
22592 machine_id: Some("test-machine-id".into()),
22593 platform: None,
22594 config_json: None,
22595 created_at: Some(SqliteStorage::now_millis()),
22596 updated_at: None,
22597 };
22598
22599 storage.upsert_source(&source).unwrap();
22600 let fetched = storage.get_source("test-laptop").unwrap();
22601 assert!(fetched.is_some());
22602 assert_eq!(fetched.unwrap().host_label, Some("test.local".into()));
22603 }
22604
22605 #[test]
22606 fn upsert_source_updates_existing() {
22607 let dir = TempDir::new().unwrap();
22608 let db_path = dir.path().join("test.db");
22609 let storage = SqliteStorage::open(&db_path).unwrap();
22610
22611 let source1 = Source {
22612 id: "my-source".into(),
22613 kind: SourceKind::Ssh,
22614 host_label: Some("Original Label".into()),
22615 machine_id: None,
22616 platform: None,
22617 config_json: None,
22618 created_at: Some(SqliteStorage::now_millis()),
22619 updated_at: None,
22620 };
22621 storage.upsert_source(&source1).unwrap();
22622
22623 let source2 = Source {
22624 id: "my-source".into(),
22625 kind: SourceKind::Ssh,
22626 host_label: Some("Updated Label".into()),
22627 machine_id: None,
22628 platform: Some("linux".into()),
22629 config_json: None,
22630 created_at: Some(SqliteStorage::now_millis()),
22631 updated_at: Some(SqliteStorage::now_millis()),
22632 };
22633 storage.upsert_source(&source2).unwrap();
22634
22635 let fetched = storage.get_source("my-source").unwrap().unwrap();
22636 assert_eq!(fetched.host_label, Some("Updated Label".into()));
22637 assert!(fetched.platform.is_some());
22638 }
22639
22640 #[test]
22641 fn upsert_source_unchanged_preserves_updated_at() {
22642 let dir = TempDir::new().unwrap();
22643 let db_path = dir.path().join("test.db");
22644 let storage = SqliteStorage::open(&db_path).unwrap();
22645
22646 let source = Source {
22647 id: "stable-source".into(),
22648 kind: SourceKind::Ssh,
22649 host_label: Some("builder.local".into()),
22650 machine_id: None,
22651 platform: Some("linux".into()),
22652 config_json: Some(serde_json::json!({"role": "bench"})),
22653 created_at: None,
22654 updated_at: None,
22655 };
22656
22657 storage.upsert_source(&source).unwrap();
22658 let initial = storage.get_source("stable-source").unwrap().unwrap();
22659 std::thread::sleep(std::time::Duration::from_millis(5));
22660
22661 storage.upsert_source(&source).unwrap();
22662 let fetched = storage.get_source("stable-source").unwrap().unwrap();
22663
22664 assert_eq!(fetched.created_at, initial.created_at);
22665 assert_eq!(fetched.updated_at, initial.updated_at);
22666 assert_eq!(fetched.host_label, initial.host_label);
22667 assert_eq!(fetched.platform, initial.platform);
22668 assert_eq!(fetched.config_json, initial.config_json);
22669 }
22670
22671 #[test]
22672 fn ensure_source_for_conversation_recreates_remote_source_after_delete() {
22673 let dir = TempDir::new().unwrap();
22674 let db_path = dir.path().join("test.db");
22675 let storage = SqliteStorage::open(&db_path).unwrap();
22676
22677 let conversation = Conversation {
22678 id: None,
22679 agent_slug: "codex".into(),
22680 workspace: Some(PathBuf::from("/ws/cache-recreate")),
22681 external_id: Some("cache-recreate".into()),
22682 title: Some("Cache Recreate".into()),
22683 source_path: PathBuf::from("/log/cache-recreate.jsonl"),
22684 started_at: Some(1_700_000_000_000),
22685 ended_at: Some(1_700_000_000_001),
22686 approx_tokens: Some(16),
22687 metadata_json: serde_json::json!({}),
22688 messages: vec![Message {
22689 id: None,
22690 idx: 0,
22691 role: MessageRole::User,
22692 author: Some("tester".into()),
22693 created_at: Some(1_700_000_000_000),
22694 content: "cache recreate".into(),
22695 extra_json: serde_json::json!({}),
22696 snippets: Vec::new(),
22697 }],
22698 source_id: "cache-remote-source".into(),
22699 origin_host: Some("builder-cache".into()),
22700 };
22701
22702 storage
22703 .ensure_source_for_conversation(&conversation)
22704 .unwrap();
22705 assert!(storage.get_source("cache-remote-source").unwrap().is_some());
22706
22707 let deleted = storage.delete_source("cache-remote-source", false).unwrap();
22708 assert!(deleted);
22709 assert!(storage.get_source("cache-remote-source").unwrap().is_none());
22710
22711 storage
22712 .ensure_source_for_conversation(&conversation)
22713 .unwrap();
22714 let recreated = storage.get_source("cache-remote-source").unwrap();
22715 assert!(recreated.is_some());
22716 assert_eq!(
22717 recreated.unwrap().host_label.as_deref(),
22718 Some("builder-cache")
22719 );
22720 }
22721
22722 #[test]
22723 fn delete_source_removes_entry() {
22724 let dir = TempDir::new().unwrap();
22725 let db_path = dir.path().join("test.db");
22726 let storage = SqliteStorage::open(&db_path).unwrap();
22727
22728 let source = Source {
22729 id: "to-delete".into(),
22730 kind: SourceKind::Local,
22731 host_label: None,
22732 machine_id: None,
22733 platform: None,
22734 config_json: None,
22735 created_at: Some(SqliteStorage::now_millis()),
22736 updated_at: None,
22737 };
22738 storage.upsert_source(&source).unwrap();
22739
22740 let deleted = storage.delete_source("to-delete", false).unwrap();
22741 assert!(deleted);
22742
22743 let fetched = storage.get_source("to-delete").unwrap();
22744 assert!(fetched.is_none());
22745 }
22746
22747 #[test]
22748 fn delete_source_cannot_delete_local() {
22749 let dir = TempDir::new().unwrap();
22750 let db_path = dir.path().join("test.db");
22751 let storage = SqliteStorage::open(&db_path).unwrap();
22752
22753 let result = storage.delete_source(LOCAL_SOURCE_ID, false);
22754 assert!(result.is_err());
22755 }
22756
22757 #[test]
22758 fn list_sources_includes_local() {
22759 let dir = TempDir::new().unwrap();
22760 let db_path = dir.path().join("test.db");
22761 let storage = SqliteStorage::open(&db_path).unwrap();
22762
22763 let sources = storage.list_sources().unwrap();
22764 assert!(sources.iter().any(|s| s.id == LOCAL_SOURCE_ID));
22765 }
22766
22767 #[test]
22768 fn insert_conversation_tree_blank_local_source_normalizes_to_local_id() {
22769 let dir = TempDir::new().unwrap();
22770 let db_path = dir.path().join("test.db");
22771 let storage = SqliteStorage::open(&db_path).unwrap();
22772
22773 let agent_id = storage
22774 .ensure_agent(&Agent {
22775 id: None,
22776 slug: "codex".into(),
22777 name: "Codex".into(),
22778 version: None,
22779 kind: AgentKind::Cli,
22780 })
22781 .unwrap();
22782
22783 let conversation = Conversation {
22784 id: None,
22785 agent_slug: "codex".into(),
22786 workspace: None,
22787 external_id: Some("blank-local-source".into()),
22788 title: Some("Blank local source".into()),
22789 source_path: dir.path().join("blank-local.jsonl"),
22790 started_at: Some(1_700_000_000_000),
22791 ended_at: Some(1_700_000_000_001),
22792 approx_tokens: None,
22793 metadata_json: serde_json::Value::Null,
22794 messages: vec![Message {
22795 id: None,
22796 idx: 0,
22797 role: MessageRole::User,
22798 author: None,
22799 created_at: Some(1_700_000_000_000),
22800 content: "hello".into(),
22801 extra_json: serde_json::Value::Null,
22802 snippets: Vec::new(),
22803 }],
22804 source_id: " ".into(),
22805 origin_host: None,
22806 };
22807
22808 storage
22809 .insert_conversation_tree(agent_id, None, &conversation)
22810 .unwrap();
22811
22812 assert!(storage.get_source(" ").unwrap().is_none());
22813 let source = storage
22814 .get_source(LOCAL_SOURCE_ID)
22815 .unwrap()
22816 .expect("local source row should exist");
22817 assert_eq!(source.kind, SourceKind::Local);
22818 assert_eq!(source.host_label, None);
22819
22820 let conversations = storage.list_conversations(10, 0).unwrap();
22821 assert_eq!(conversations.len(), 1);
22822 assert_eq!(conversations[0].source_id, LOCAL_SOURCE_ID);
22823 assert_eq!(conversations[0].origin_host, None);
22824 }
22825
22826 #[test]
22827 fn repeated_local_inserts_do_not_touch_bootstrap_source_row() {
22828 let dir = TempDir::new().unwrap();
22829 let db_path = dir.path().join("test.db");
22830 let storage = SqliteStorage::open(&db_path).unwrap();
22831
22832 let agent_id = storage
22833 .ensure_agent(&Agent {
22834 id: None,
22835 slug: "codex".into(),
22836 name: "Codex".into(),
22837 version: None,
22838 kind: AgentKind::Cli,
22839 })
22840 .unwrap();
22841
22842 let bootstrap_updated_at: i64 = storage
22843 .conn
22844 .query_row_map(
22845 "SELECT updated_at FROM sources WHERE id = ?1",
22846 fparams![LOCAL_SOURCE_ID],
22847 |row| row.get_typed(0),
22848 )
22849 .unwrap();
22850
22851 let make_conversation = |external_id: &str, suffix: &str| Conversation {
22852 id: None,
22853 agent_slug: "codex".into(),
22854 workspace: None,
22855 external_id: Some(external_id.into()),
22856 title: Some(format!("Local source {suffix}")),
22857 source_path: dir.path().join(format!("local-{suffix}.jsonl")),
22858 started_at: Some(1_700_000_000_000),
22859 ended_at: Some(1_700_000_000_001),
22860 approx_tokens: None,
22861 metadata_json: serde_json::Value::Null,
22862 messages: vec![Message {
22863 id: None,
22864 idx: 0,
22865 role: MessageRole::User,
22866 author: None,
22867 created_at: Some(1_700_000_000_000),
22868 content: format!("hello-{suffix}"),
22869 extra_json: serde_json::Value::Null,
22870 snippets: Vec::new(),
22871 }],
22872 source_id: LOCAL_SOURCE_ID.into(),
22873 origin_host: None,
22874 };
22875
22876 std::thread::sleep(std::time::Duration::from_millis(5));
22877 storage
22878 .insert_conversation_tree(agent_id, None, &make_conversation("local-source-1", "one"))
22879 .unwrap();
22880 let after_first_insert: i64 = storage
22881 .conn
22882 .query_row_map(
22883 "SELECT updated_at FROM sources WHERE id = ?1",
22884 fparams![LOCAL_SOURCE_ID],
22885 |row| row.get_typed(0),
22886 )
22887 .unwrap();
22888
22889 std::thread::sleep(std::time::Duration::from_millis(5));
22890 storage
22891 .insert_conversation_tree(agent_id, None, &make_conversation("local-source-2", "two"))
22892 .unwrap();
22893 let after_second_insert: i64 = storage
22894 .conn
22895 .query_row_map(
22896 "SELECT updated_at FROM sources WHERE id = ?1",
22897 fparams![LOCAL_SOURCE_ID],
22898 |row| row.get_typed(0),
22899 )
22900 .unwrap();
22901
22902 assert_eq!(after_first_insert, bootstrap_updated_at);
22903 assert_eq!(after_second_insert, bootstrap_updated_at);
22904 }
22905
22906 #[test]
22907 fn insert_conversation_tree_blank_remote_source_normalizes_to_origin_host() {
22908 let dir = TempDir::new().unwrap();
22909 let db_path = dir.path().join("test.db");
22910 let storage = SqliteStorage::open(&db_path).unwrap();
22911
22912 let agent_id = storage
22913 .ensure_agent(&Agent {
22914 id: None,
22915 slug: "codex".into(),
22916 name: "Codex".into(),
22917 version: None,
22918 kind: AgentKind::Cli,
22919 })
22920 .unwrap();
22921
22922 let conversation = Conversation {
22923 id: None,
22924 agent_slug: "codex".into(),
22925 workspace: None,
22926 external_id: Some("blank-remote-source".into()),
22927 title: Some("Blank remote source".into()),
22928 source_path: dir.path().join("blank-remote.jsonl"),
22929 started_at: Some(1_700_000_000_000),
22930 ended_at: Some(1_700_000_000_001),
22931 approx_tokens: None,
22932 metadata_json: serde_json::Value::Null,
22933 messages: vec![Message {
22934 id: None,
22935 idx: 0,
22936 role: MessageRole::User,
22937 author: None,
22938 created_at: Some(1_700_000_000_000),
22939 content: "hello".into(),
22940 extra_json: serde_json::Value::Null,
22941 snippets: Vec::new(),
22942 }],
22943 source_id: " ".into(),
22944 origin_host: Some("user@work-laptop".into()),
22945 };
22946
22947 storage
22948 .insert_conversation_tree(agent_id, None, &conversation)
22949 .unwrap();
22950
22951 assert!(storage.get_source(" ").unwrap().is_none());
22952 let source = storage
22953 .get_source("user@work-laptop")
22954 .unwrap()
22955 .expect("normalized remote source row should exist");
22956 assert_eq!(source.kind, SourceKind::Ssh);
22957 assert_eq!(source.host_label.as_deref(), Some("user@work-laptop"));
22958
22959 let conversations = storage.list_conversations(10, 0).unwrap();
22960 assert_eq!(conversations.len(), 1);
22961 assert_eq!(conversations[0].source_id, "user@work-laptop");
22962 assert_eq!(
22963 conversations[0].origin_host.as_deref(),
22964 Some("user@work-laptop")
22965 );
22966 }
22967
22968 #[test]
22969 fn insert_conversations_batched_normalizes_host_only_remote_source_id() {
22970 let dir = TempDir::new().unwrap();
22971 let db_path = dir.path().join("test.db");
22972 let storage = SqliteStorage::open(&db_path).unwrap();
22973
22974 let agent_id = storage
22975 .ensure_agent(&Agent {
22976 id: None,
22977 slug: "codex".into(),
22978 name: "Codex".into(),
22979 version: None,
22980 kind: AgentKind::Cli,
22981 })
22982 .unwrap();
22983
22984 let conversation = Conversation {
22985 id: None,
22986 agent_slug: "codex".into(),
22987 workspace: None,
22988 external_id: Some("batched-blank-remote-source".into()),
22989 title: Some("Batched blank remote source".into()),
22990 source_path: dir.path().join("batched-blank-remote.jsonl"),
22991 started_at: Some(1_700_000_000_000),
22992 ended_at: Some(1_700_000_000_001),
22993 approx_tokens: None,
22994 metadata_json: serde_json::Value::Null,
22995 messages: vec![Message {
22996 id: None,
22997 idx: 0,
22998 role: MessageRole::User,
22999 author: None,
23000 created_at: Some(1_700_000_000_000),
23001 content: "hello".into(),
23002 extra_json: serde_json::Value::Null,
23003 snippets: Vec::new(),
23004 }],
23005 source_id: " ".into(),
23006 origin_host: Some("user@batch-host".into()),
23007 };
23008
23009 storage
23010 .insert_conversations_batched(&[(agent_id, None, &conversation)])
23011 .unwrap();
23012
23013 assert!(storage.get_source(" ").unwrap().is_none());
23014 let source = storage
23015 .get_source("user@batch-host")
23016 .unwrap()
23017 .expect("normalized batched remote source row should exist");
23018 assert_eq!(source.kind, SourceKind::Ssh);
23019 assert_eq!(source.host_label.as_deref(), Some("user@batch-host"));
23020
23021 let conversations = storage.list_conversations(10, 0).unwrap();
23022 assert_eq!(conversations.len(), 1);
23023 assert_eq!(conversations[0].source_id, "user@batch-host");
23024 assert_eq!(
23025 conversations[0].origin_host.as_deref(),
23026 Some("user@batch-host")
23027 );
23028 }
23029
23030 #[test]
23031 fn get_source_ids_excludes_local() {
23032 let dir = TempDir::new().unwrap();
23033 let db_path = dir.path().join("test.db");
23034 let storage = SqliteStorage::open(&db_path).unwrap();
23035
23036 let source = Source {
23038 id: "remote-1".into(),
23039 kind: SourceKind::Ssh,
23040 host_label: Some("server".into()),
23041 machine_id: None,
23042 platform: None,
23043 config_json: None,
23044 created_at: Some(SqliteStorage::now_millis()),
23045 updated_at: None,
23046 };
23047 storage.upsert_source(&source).unwrap();
23048
23049 let ids = storage.get_source_ids().unwrap();
23050 assert!(!ids.contains(&LOCAL_SOURCE_ID.to_string()));
23051 assert!(ids.contains(&"remote-1".to_string()));
23052 }
23053
23054 #[test]
23059 fn get_last_scan_ts_returns_none_initially() {
23060 let dir = TempDir::new().unwrap();
23061 let db_path = dir.path().join("test.db");
23062 let storage = SqliteStorage::open(&db_path).unwrap();
23063
23064 let ts = storage.get_last_scan_ts().unwrap();
23065 assert!(ts.is_none());
23066 }
23067
23068 #[test]
23069 fn set_and_get_last_scan_ts() {
23070 let dir = TempDir::new().unwrap();
23071 let db_path = dir.path().join("test.db");
23072 let storage = SqliteStorage::open(&db_path).unwrap();
23073
23074 let expected_ts = 1700000000000_i64;
23075 storage.set_last_scan_ts(expected_ts).unwrap();
23076
23077 let actual_ts = storage.get_last_scan_ts().unwrap();
23078 assert_eq!(actual_ts, Some(expected_ts));
23079 }
23080
23081 #[test]
23086 fn now_millis_returns_reasonable_value() {
23087 let ts = SqliteStorage::now_millis();
23088 assert!(ts > 1577836800000);
23090 assert!(ts < 4102444800000);
23092 }
23093
23094 #[test]
23099 fn msgpack_roundtrip_basic_object() {
23100 let value = serde_json::json!({
23101 "key": "value",
23102 "number": 42,
23103 "nested": { "inner": true }
23104 });
23105
23106 let bytes = serialize_json_to_msgpack(&value).expect("should serialize");
23107 let recovered = deserialize_msgpack_to_json(&bytes);
23108
23109 assert_eq!(value, recovered);
23110 }
23111
23112 #[test]
23113 fn msgpack_returns_none_for_null() {
23114 let value = serde_json::Value::Null;
23115 assert!(serialize_json_to_msgpack(&value).is_none());
23116 }
23117
23118 #[test]
23119 fn message_insert_stores_null_extra_json_as_sql_null() {
23120 let dir = TempDir::new().unwrap();
23121 let db_path = dir.path().join("test.db");
23122 let storage = SqliteStorage::open(&db_path).unwrap();
23123 let agent_id = storage
23124 .ensure_agent(&Agent {
23125 id: None,
23126 slug: "codex".into(),
23127 name: "Codex".into(),
23128 version: None,
23129 kind: AgentKind::Cli,
23130 })
23131 .unwrap();
23132 let conversation = Conversation {
23133 id: None,
23134 agent_slug: "codex".into(),
23135 workspace: None,
23136 external_id: Some("null-extra-json".into()),
23137 title: Some("Null extra_json".into()),
23138 source_path: PathBuf::from("/tmp/null-extra-json.jsonl"),
23139 started_at: Some(1_700_000_000_000),
23140 ended_at: Some(1_700_000_000_001),
23141 approx_tokens: None,
23142 metadata_json: serde_json::Value::Null,
23143 messages: vec![Message {
23144 id: None,
23145 idx: 0,
23146 role: MessageRole::User,
23147 author: None,
23148 created_at: Some(1_700_000_000_000),
23149 content: "null metadata message".into(),
23150 extra_json: serde_json::Value::Null,
23151 snippets: Vec::new(),
23152 }],
23153 source_id: LOCAL_SOURCE_ID.into(),
23154 origin_host: None,
23155 };
23156
23157 let conversation_id = storage
23158 .insert_conversation_tree(agent_id, None, &conversation)
23159 .unwrap()
23160 .conversation_id;
23161
23162 let (extra_json, extra_bin): (Option<String>, Option<Vec<u8>>) = storage
23163 .conn
23164 .query_row_map(
23165 "SELECT extra_json, extra_bin FROM messages WHERE conversation_id = ?1",
23166 fparams![conversation_id],
23167 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
23168 )
23169 .unwrap();
23170 assert!(extra_json.is_none());
23171 assert!(extra_bin.is_none());
23172
23173 let stored = storage.fetch_messages(conversation_id).unwrap();
23174 assert!(stored[0].extra_json.is_null());
23175 }
23176
23177 #[test]
23178 fn message_insert_stores_nonempty_extra_json_as_msgpack_only() {
23179 let dir = TempDir::new().unwrap();
23180 let db_path = dir.path().join("test.db");
23181 let storage = SqliteStorage::open(&db_path).unwrap();
23182 let agent_id = storage
23183 .ensure_agent(&Agent {
23184 id: None,
23185 slug: "codex".into(),
23186 name: "Codex".into(),
23187 version: None,
23188 kind: AgentKind::Cli,
23189 })
23190 .unwrap();
23191 let extra_json = serde_json::json!({ "idx": 7, "kind": "profile" });
23192 let conversation = Conversation {
23193 id: None,
23194 agent_slug: "codex".into(),
23195 workspace: None,
23196 external_id: Some("msgpack-extra-json".into()),
23197 title: Some("MessagePack extra_json".into()),
23198 source_path: PathBuf::from("/tmp/msgpack-extra-json.jsonl"),
23199 started_at: Some(1_700_000_000_000),
23200 ended_at: Some(1_700_000_000_001),
23201 approx_tokens: None,
23202 metadata_json: serde_json::Value::Null,
23203 messages: vec![Message {
23204 id: None,
23205 idx: 0,
23206 role: MessageRole::User,
23207 author: None,
23208 created_at: Some(1_700_000_000_000),
23209 content: "msgpack metadata message".into(),
23210 extra_json: extra_json.clone(),
23211 snippets: Vec::new(),
23212 }],
23213 source_id: LOCAL_SOURCE_ID.into(),
23214 origin_host: None,
23215 };
23216
23217 let conversation_id = storage
23218 .insert_conversation_tree(agent_id, None, &conversation)
23219 .unwrap()
23220 .conversation_id;
23221
23222 let (extra_json_text, extra_bin): (Option<String>, Option<Vec<u8>>) = storage
23223 .conn
23224 .query_row_map(
23225 "SELECT extra_json, extra_bin FROM messages WHERE conversation_id = ?1",
23226 fparams![conversation_id],
23227 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
23228 )
23229 .unwrap();
23230 assert!(extra_json_text.is_none());
23231 assert!(extra_bin.is_some());
23232
23233 let stored = storage.fetch_messages(conversation_id).unwrap();
23234 assert_eq!(stored[0].extra_json, extra_json);
23235 }
23236
23237 #[test]
23238 fn conversation_insert_preserves_null_metadata_json_as_json_null() {
23239 let dir = TempDir::new().unwrap();
23240 let db_path = dir.path().join("test.db");
23241 let storage = SqliteStorage::open(&db_path).unwrap();
23242 let agent_id = storage
23243 .ensure_agent(&Agent {
23244 id: None,
23245 slug: "codex".into(),
23246 name: "Codex".into(),
23247 version: None,
23248 kind: AgentKind::Cli,
23249 })
23250 .unwrap();
23251 let conversation = Conversation {
23252 id: None,
23253 agent_slug: "codex".into(),
23254 workspace: None,
23255 external_id: Some("null-conversation-metadata".into()),
23256 title: Some("Null conversation metadata".into()),
23257 source_path: PathBuf::from("/tmp/null-conversation-metadata.jsonl"),
23258 started_at: Some(1_700_000_000_000),
23259 ended_at: Some(1_700_000_000_001),
23260 approx_tokens: None,
23261 metadata_json: serde_json::Value::Null,
23262 messages: vec![Message {
23263 id: None,
23264 idx: 0,
23265 role: MessageRole::User,
23266 author: None,
23267 created_at: Some(1_700_000_000_000),
23268 content: "null conversation metadata message".into(),
23269 extra_json: serde_json::Value::Null,
23270 snippets: Vec::new(),
23271 }],
23272 source_id: LOCAL_SOURCE_ID.into(),
23273 origin_host: None,
23274 };
23275
23276 storage
23277 .insert_conversation_tree(agent_id, None, &conversation)
23278 .unwrap();
23279
23280 let (metadata_json, metadata_bin): (Option<String>, Option<Vec<u8>>) = storage
23281 .conn
23282 .query_row_map(
23283 "SELECT metadata_json, metadata_bin FROM conversations WHERE external_id = ?1",
23284 fparams!["null-conversation-metadata"],
23285 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
23286 )
23287 .unwrap();
23288 assert_eq!(metadata_json.as_deref(), Some("null"));
23289 assert!(metadata_bin.is_none());
23290
23291 let listed = storage.list_conversations(10, 0).unwrap();
23292 assert!(listed[0].metadata_json.is_null());
23293 }
23294
23295 #[test]
23296 fn conversation_insert_stores_nonempty_metadata_as_msgpack_only() {
23297 let dir = TempDir::new().unwrap();
23298 let db_path = dir.path().join("test.db");
23299 let storage = SqliteStorage::open(&db_path).unwrap();
23300 let agent_id = storage
23301 .ensure_agent(&Agent {
23302 id: None,
23303 slug: "codex".into(),
23304 name: "Codex".into(),
23305 version: None,
23306 kind: AgentKind::Cli,
23307 })
23308 .unwrap();
23309 let metadata_json = serde_json::json!({ "bench": true, "source": "profile" });
23310 let conversation = Conversation {
23311 id: None,
23312 agent_slug: "codex".into(),
23313 workspace: None,
23314 external_id: Some("msgpack-conversation-metadata".into()),
23315 title: Some("MessagePack conversation metadata".into()),
23316 source_path: PathBuf::from("/tmp/msgpack-conversation-metadata.jsonl"),
23317 started_at: Some(1_700_000_000_000),
23318 ended_at: Some(1_700_000_000_001),
23319 approx_tokens: None,
23320 metadata_json: metadata_json.clone(),
23321 messages: vec![Message {
23322 id: None,
23323 idx: 0,
23324 role: MessageRole::User,
23325 author: None,
23326 created_at: Some(1_700_000_000_000),
23327 content: "msgpack conversation metadata message".into(),
23328 extra_json: serde_json::Value::Null,
23329 snippets: Vec::new(),
23330 }],
23331 source_id: LOCAL_SOURCE_ID.into(),
23332 origin_host: None,
23333 };
23334
23335 storage
23336 .insert_conversation_tree(agent_id, None, &conversation)
23337 .unwrap();
23338
23339 let (metadata_text, metadata_bin): (Option<String>, Option<Vec<u8>>) = storage
23340 .conn
23341 .query_row_map(
23342 "SELECT metadata_json, metadata_bin FROM conversations WHERE external_id = ?1",
23343 fparams!["msgpack-conversation-metadata"],
23344 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
23345 )
23346 .unwrap();
23347 assert!(metadata_text.is_none());
23348 assert!(metadata_bin.is_some());
23349
23350 let listed = storage.list_conversations(10, 0).unwrap();
23351 assert_eq!(listed[0].metadata_json, metadata_json);
23352 }
23353
23354 #[test]
23355 fn msgpack_returns_none_for_empty_object() {
23356 let value = serde_json::json!({});
23357 assert!(serialize_json_to_msgpack(&value).is_none());
23358 }
23359
23360 #[test]
23361 fn parse_historical_json_column_preserves_large_payloads_as_raw_json() {
23362 let raw = format!("{{\"blob\":\"{}\"}}", "x".repeat(1_000_000));
23363
23364 let value = parse_historical_json_column(Some(raw.clone()));
23365
23366 assert_eq!(historical_raw_json(&value), Some(raw.as_str()));
23367 assert_eq!(json_value_size_hint(&value), raw.len());
23368 }
23369
23370 #[test]
23371 fn parse_historical_json_column_preserves_small_payloads_as_raw_json() {
23372 let raw = String::from("{\"ok\":true,\"n\":1}");
23373
23374 let value = parse_historical_json_column(Some(raw.clone()));
23375
23376 assert_eq!(historical_raw_json(&value), Some(raw.as_str()));
23377 }
23378
23379 #[test]
23380 fn msgpack_serializes_non_empty_array() {
23381 let value = serde_json::json!([1, 2, 3]);
23382 let bytes = serialize_json_to_msgpack(&value).expect("should serialize array");
23383 let recovered = deserialize_msgpack_to_json(&bytes);
23384 assert_eq!(value, recovered);
23385 }
23386
23387 #[test]
23388 fn msgpack_smaller_than_json() {
23389 let value = serde_json::json!({
23390 "field_name_one": "some_value",
23391 "field_name_two": 123456,
23392 "field_name_three": [1, 2, 3, 4, 5],
23393 "field_name_four": { "nested": true }
23394 });
23395
23396 let json_bytes = serde_json::to_vec(&value).unwrap();
23397 let msgpack_bytes = serialize_json_to_msgpack(&value).unwrap();
23398
23399 assert!(
23401 msgpack_bytes.len() < json_bytes.len(),
23402 "MessagePack ({} bytes) should be smaller than JSON ({} bytes)",
23403 msgpack_bytes.len(),
23404 json_bytes.len()
23405 );
23406 }
23407
23408 #[test]
23409 fn migration_v7_adds_binary_columns() {
23410 let dir = TempDir::new().unwrap();
23411 let db_path = dir.path().join("test.db");
23412 let storage = SqliteStorage::open(&db_path).unwrap();
23413
23414 let has_metadata_bin = storage
23416 .raw()
23417 .query("PRAGMA table_info(conversations)")
23418 .unwrap()
23419 .iter()
23420 .any(|row| row.get_typed::<String>(1).unwrap() == "metadata_bin");
23421 assert!(
23422 has_metadata_bin,
23423 "conversations should have metadata_bin column"
23424 );
23425
23426 let has_extra_bin = storage
23428 .raw()
23429 .query("PRAGMA table_info(messages)")
23430 .unwrap()
23431 .iter()
23432 .any(|row| row.get_typed::<String>(1).unwrap() == "extra_bin");
23433 assert!(has_extra_bin, "messages should have extra_bin column");
23434 }
23435
23436 #[test]
23437 fn insert_conversation_tree_rehydrates_append_tail_state_cache_after_manual_clear() {
23438 let dir = TempDir::new().unwrap();
23439 let db_path = dir.path().join("append-tail-state-cache.db");
23440 let storage = SqliteStorage::open(&db_path).unwrap();
23441 let agent_id = storage
23442 .ensure_agent(&Agent {
23443 id: None,
23444 slug: "codex".into(),
23445 name: "Codex".into(),
23446 version: None,
23447 kind: AgentKind::Cli,
23448 })
23449 .unwrap();
23450 let workspace = PathBuf::from("/ws/profiled-append-remote");
23451 let workspace_id = storage.ensure_workspace(&workspace, None).unwrap();
23452
23453 let initial = make_profiled_append_remote_merge_conversation(11, 5);
23454 let insert_outcome = storage
23455 .insert_conversation_tree(agent_id, Some(workspace_id), &initial)
23456 .unwrap();
23457 let conversation_id = insert_outcome.conversation_id;
23458
23459 let initial_tail: (Option<i64>, Option<i64>, Option<i64>) = storage
23460 .raw()
23461 .query_row_map(
23462 "SELECT ended_at, last_message_idx, last_message_created_at
23463 FROM conversation_tail_state
23464 WHERE conversation_id = ?1",
23465 fparams![conversation_id],
23466 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
23467 )
23468 .unwrap();
23469 assert_eq!(initial_tail, (Some(111_005), Some(4), Some(111_004)));
23470
23471 storage
23472 .raw()
23473 .execute_compat(
23474 "UPDATE conversations SET ended_at = ?1 WHERE id = ?2",
23475 fparams![111_999_i64, conversation_id],
23476 )
23477 .unwrap();
23478 storage
23479 .raw()
23480 .execute_compat(
23481 "DELETE FROM conversation_tail_state WHERE conversation_id = ?1",
23482 fparams![conversation_id],
23483 )
23484 .unwrap();
23485
23486 let appended = make_profiled_append_remote_merge_conversation(11, 10);
23487 let append_outcome = storage
23488 .insert_conversation_tree(agent_id, Some(workspace_id), &appended)
23489 .unwrap();
23490 assert_eq!(append_outcome.inserted_indices, vec![5, 6, 7, 8, 9]);
23491
23492 let final_tail: (Option<i64>, Option<i64>, Option<i64>) = storage
23493 .raw()
23494 .query_row_map(
23495 "SELECT ended_at, last_message_idx, last_message_created_at
23496 FROM conversation_tail_state
23497 WHERE conversation_id = ?1",
23498 fparams![conversation_id],
23499 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
23500 )
23501 .unwrap();
23502 assert_eq!(final_tail, (Some(111_999), Some(9), Some(111_009)));
23503 }
23504
23505 #[test]
23506 fn msgpack_deserialize_empty_returns_default() {
23507 let recovered = deserialize_msgpack_to_json(&[]);
23508 assert_eq!(recovered, serde_json::Value::Object(serde_json::Map::new()));
23509 }
23510
23511 #[test]
23512 fn msgpack_deserialize_garbage_returns_default() {
23513 let recovered = deserialize_msgpack_to_json(&[0x85]);
23516 assert_eq!(recovered, serde_json::Value::Object(serde_json::Map::new()));
23517 }
23518
23519 #[test]
23520 fn stats_aggregator_collects_and_expands() {
23521 let mut agg = StatsAggregator::new();
23522 assert!(agg.is_empty());
23523
23524 agg.record("claude", "local", 100, 5, 500);
23527 agg.record("codex", "local", 100, 3, 300);
23529 agg.record("claude", "local", 101, 2, 200);
23531
23532 assert!(!agg.is_empty());
23533 assert_eq!(agg.raw_entry_count(), 3);
23534
23535 let entries = agg.expand();
23536 assert_eq!(entries.len(), 10);
23564
23565 let day100_all = entries
23567 .iter()
23568 .find(|(d, a, s, _)| *d == 100 && a == "all" && s == "all")
23569 .unwrap();
23570 assert_eq!(day100_all.3.session_count_delta, 2);
23571 assert_eq!(day100_all.3.message_count_delta, 8);
23572 assert_eq!(day100_all.3.total_chars_delta, 800);
23573 }
23574
23575 #[test]
23580 fn lazy_franken_db_not_open_before_get() {
23581 let dir = TempDir::new().unwrap();
23582 let db_path = dir.path().join("lazy_test.db");
23583
23584 let _storage = SqliteStorage::open(&db_path).unwrap();
23586
23587 let lazy = LazyFrankenDb::new(db_path);
23588 assert!(
23589 !lazy.is_open(),
23590 "LazyFrankenDb must not open on construction"
23591 );
23592 }
23593
23594 #[test]
23595 fn lazy_franken_db_opens_on_first_get() {
23596 let dir = TempDir::new().unwrap();
23597 let db_path = dir.path().join("lazy_test.db");
23598
23599 let _storage = SqliteStorage::open(&db_path).unwrap();
23601 drop(_storage);
23602
23603 let lazy = LazyFrankenDb::new(db_path);
23604 assert!(!lazy.is_open());
23605
23606 let conn = lazy.get("test").expect("should open successfully");
23607 let count: i64 = conn
23608 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |r| {
23609 r.get_typed(0)
23610 })
23611 .unwrap();
23612 assert_eq!(count, 0);
23613 drop(conn);
23614
23615 assert!(lazy.is_open(), "LazyFrankenDb must be open after get()");
23616 }
23617
23618 #[test]
23619 fn lazy_franken_db_reuses_connection() {
23620 let dir = TempDir::new().unwrap();
23621 let db_path = dir.path().join("lazy_test.db");
23622 let _storage = SqliteStorage::open(&db_path).unwrap();
23623 drop(_storage);
23624
23625 let lazy = LazyFrankenDb::new(db_path);
23626
23627 {
23629 let conn = lazy.get("first").unwrap();
23630 conn.execute_batch("CREATE TABLE IF NOT EXISTS test_tbl (id INTEGER)")
23631 .unwrap();
23632 }
23633
23634 {
23636 let conn = lazy.get("second").unwrap();
23637 let count: i64 = conn
23638 .query_row_map("SELECT COUNT(*) FROM test_tbl", fparams![], |r| {
23639 r.get_typed(0)
23640 })
23641 .unwrap();
23642 assert_eq!(count, 0);
23643 }
23644 }
23645
23646 #[test]
23647 fn lazy_franken_db_not_found_error() {
23648 let dir = TempDir::new().unwrap();
23649 let db_path = dir.path().join("nonexistent.db");
23650
23651 let lazy = LazyFrankenDb::new(db_path);
23652 let result = lazy.get("test");
23653 assert!(result.is_err());
23654 assert!(
23655 matches!(result.unwrap_err(), LazyDbError::NotFound(_)),
23656 "should return NotFound for missing DB"
23657 );
23658 }
23659
23660 #[test]
23661 fn lazy_franken_db_path_accessor() {
23662 let path = PathBuf::from("/tmp/test_lazy.db");
23663 let lazy = LazyFrankenDb::new(path.clone());
23664 assert_eq!(lazy.path(), path.as_path());
23665 }
23666
23667 #[test]
23672 fn sql_like_match_basic_patterns() {
23673 assert!(sql_like_match("claude-opus-4-20250101", "claude-opus-4%"));
23674 assert!(sql_like_match("claude-opus-4", "claude-opus-4%"));
23675 assert!(!sql_like_match("claude-sonnet-4", "claude-opus-4%"));
23676
23677 assert!(sql_like_match("gemini-2.0-flash-001", "gemini-2%flash%"));
23679 assert!(sql_like_match("gemini-2-flash", "gemini-2%flash%"));
23680 assert!(!sql_like_match("gemini-2-pro", "gemini-2%flash%"));
23681
23682 assert!(sql_like_match("hello", "hello"));
23684 assert!(!sql_like_match("hello!", "hello"));
23685
23686 assert!(sql_like_match("gpt-4o", "gpt-4_"));
23688 assert!(!sql_like_match("gpt-4oo", "gpt-4_"));
23689
23690 assert!(sql_like_match("Claude-Opus-4", "claude-opus-4%"));
23692 }
23693
23694 #[test]
23695 fn date_str_to_day_id_converts_correctly() {
23696 assert_eq!(date_str_to_day_id("2025-10-01").unwrap(), 2100);
23698 assert_eq!(date_str_to_day_id("2024-04-01").unwrap(), 1552);
23700 assert!(date_str_to_day_id("invalid").is_err());
23701 }
23702
23703 #[test]
23704 fn pricing_table_lookup_selects_matching_entry() {
23705 let effective_day = date_str_to_day_id("2025-10-01").unwrap();
23706 let lookup_day = date_str_to_day_id("2026-02-06").unwrap();
23707 let table = PricingTable {
23708 entries: vec![
23709 PricingEntry {
23710 model_pattern: "claude-opus-4%".into(),
23711 provider: "anthropic".into(),
23712 input_cost_per_mtok: 15.0,
23713 output_cost_per_mtok: 75.0,
23714 cache_read_cost_per_mtok: Some(1.5),
23715 cache_creation_cost_per_mtok: Some(18.75),
23716 effective_day_id: effective_day,
23717 },
23718 PricingEntry {
23719 model_pattern: "claude-sonnet-4%".into(),
23720 provider: "anthropic".into(),
23721 input_cost_per_mtok: 3.0,
23722 output_cost_per_mtok: 15.0,
23723 cache_read_cost_per_mtok: Some(0.3),
23724 cache_creation_cost_per_mtok: Some(3.75),
23725 effective_day_id: effective_day,
23726 },
23727 ],
23728 };
23729
23730 let result = table.lookup("claude-opus-4-20260101", lookup_day);
23731 assert!(result.is_some());
23732 assert_eq!(result.unwrap().input_cost_per_mtok, 15.0);
23733
23734 let result = table.lookup("claude-sonnet-4-latest", lookup_day);
23735 assert!(result.is_some());
23736 assert_eq!(result.unwrap().input_cost_per_mtok, 3.0);
23737
23738 assert!(table.lookup("unknown-model", lookup_day).is_none());
23739 }
23740
23741 #[test]
23742 fn pricing_table_lookup_respects_effective_date() {
23743 let effective_day_1 = date_str_to_day_id("2025-10-01").unwrap();
23744 let effective_day_2 = date_str_to_day_id("2026-01-01").unwrap();
23745 let table = PricingTable {
23746 entries: vec![
23747 PricingEntry {
23748 model_pattern: "claude-opus-4%".into(),
23749 provider: "anthropic".into(),
23750 input_cost_per_mtok: 15.0,
23751 output_cost_per_mtok: 75.0,
23752 cache_read_cost_per_mtok: None,
23753 cache_creation_cost_per_mtok: None,
23754 effective_day_id: effective_day_1,
23755 },
23756 PricingEntry {
23757 model_pattern: "claude-opus-4%".into(),
23758 provider: "anthropic".into(),
23759 input_cost_per_mtok: 12.0,
23760 output_cost_per_mtok: 60.0,
23761 cache_read_cost_per_mtok: None,
23762 cache_creation_cost_per_mtok: None,
23763 effective_day_id: effective_day_2,
23764 },
23765 ],
23766 };
23767
23768 let result = table.lookup("claude-opus-4", date_str_to_day_id("2025-11-01").unwrap());
23770 assert!(result.is_some());
23771 assert_eq!(result.unwrap().input_cost_per_mtok, 15.0);
23772
23773 let result = table.lookup("claude-opus-4", date_str_to_day_id("2026-02-01").unwrap());
23775 assert!(result.is_some());
23776 assert_eq!(result.unwrap().input_cost_per_mtok, 12.0);
23777
23778 assert!(
23780 table
23781 .lookup("claude-opus-4", date_str_to_day_id("2024-01-01").unwrap())
23782 .is_none()
23783 );
23784 }
23785
23786 #[test]
23787 fn pricing_table_lookup_specificity_tiebreak() {
23788 let effective_day = date_str_to_day_id("2025-01-01").unwrap();
23789 let lookup_day = date_str_to_day_id("2026-01-01").unwrap();
23790 let table = PricingTable {
23791 entries: vec![
23792 PricingEntry {
23793 model_pattern: "gpt-4%".into(),
23794 provider: "openai".into(),
23795 input_cost_per_mtok: 10.0,
23796 output_cost_per_mtok: 30.0,
23797 cache_read_cost_per_mtok: None,
23798 cache_creation_cost_per_mtok: None,
23799 effective_day_id: effective_day,
23800 },
23801 PricingEntry {
23802 model_pattern: "gpt-4-turbo%".into(),
23803 provider: "openai".into(),
23804 input_cost_per_mtok: 5.0,
23805 output_cost_per_mtok: 15.0,
23806 cache_read_cost_per_mtok: None,
23807 cache_creation_cost_per_mtok: None,
23808 effective_day_id: effective_day,
23809 },
23810 ],
23811 };
23812
23813 let result = table.lookup("gpt-4-turbo-2025", lookup_day);
23815 assert!(result.is_some());
23816 assert_eq!(result.unwrap().input_cost_per_mtok, 5.0);
23817
23818 let result = table.lookup("gpt-4o", lookup_day);
23820 assert!(result.is_some());
23821 assert_eq!(result.unwrap().input_cost_per_mtok, 10.0);
23822 }
23823
23824 #[test]
23825 fn pricing_table_compute_cost_basic() {
23826 let effective_day = date_str_to_day_id("2025-10-01").unwrap();
23827 let table = PricingTable {
23828 entries: vec![PricingEntry {
23829 model_pattern: "claude-opus-4%".into(),
23830 provider: "anthropic".into(),
23831 input_cost_per_mtok: 15.0,
23832 output_cost_per_mtok: 75.0,
23833 cache_read_cost_per_mtok: Some(1.5),
23834 cache_creation_cost_per_mtok: Some(18.75),
23835 effective_day_id: effective_day,
23836 }],
23837 };
23838
23839 let cost = table.compute_cost(
23840 Some("claude-opus-4-latest"),
23841 date_str_to_day_id("2026-02-06").unwrap(),
23842 Some(1000),
23843 Some(500),
23844 None,
23845 None,
23846 );
23847 assert!(cost.is_some());
23848 assert!((cost.unwrap() - 0.0525).abs() < 1e-10);
23850 }
23851
23852 #[test]
23853 fn pricing_table_compute_cost_with_cache() {
23854 let effective_day = date_str_to_day_id("2025-10-01").unwrap();
23855 let table = PricingTable {
23856 entries: vec![PricingEntry {
23857 model_pattern: "claude-opus-4%".into(),
23858 provider: "anthropic".into(),
23859 input_cost_per_mtok: 15.0,
23860 output_cost_per_mtok: 75.0,
23861 cache_read_cost_per_mtok: Some(1.5),
23862 cache_creation_cost_per_mtok: Some(18.75),
23863 effective_day_id: effective_day,
23864 }],
23865 };
23866
23867 let cost = table.compute_cost(
23868 Some("claude-opus-4-latest"),
23869 date_str_to_day_id("2026-02-06").unwrap(),
23870 Some(1_000_000),
23871 Some(100_000),
23872 Some(500_000),
23873 Some(200_000),
23874 );
23875 assert!(cost.is_some());
23876 assert!((cost.unwrap() - 16.5).abs() < 1e-10);
23882 }
23883
23884 #[test]
23885 fn pricing_table_compute_cost_returns_none_for_unknown_model() {
23886 let effective_day = date_str_to_day_id("2025-10-01").unwrap();
23887 let lookup_day = date_str_to_day_id("2026-02-06").unwrap();
23888 let table = PricingTable {
23889 entries: vec![PricingEntry {
23890 model_pattern: "claude-opus-4%".into(),
23891 provider: "anthropic".into(),
23892 input_cost_per_mtok: 15.0,
23893 output_cost_per_mtok: 75.0,
23894 cache_read_cost_per_mtok: None,
23895 cache_creation_cost_per_mtok: None,
23896 effective_day_id: effective_day,
23897 }],
23898 };
23899
23900 assert!(
23901 table
23902 .compute_cost(
23903 Some("unknown-model"),
23904 lookup_day,
23905 Some(1000),
23906 Some(500),
23907 None,
23908 None
23909 )
23910 .is_none()
23911 );
23912 assert!(
23913 table
23914 .compute_cost(None, lookup_day, Some(1000), Some(500), None, None)
23915 .is_none()
23916 );
23917 assert!(
23918 table
23919 .compute_cost(Some("claude-opus-4"), lookup_day, None, None, None, None)
23920 .is_none()
23921 );
23922 }
23923
23924 #[test]
23925 fn pricing_table_load_from_db() {
23926 let dir = TempDir::new().unwrap();
23927 let db_path = dir.path().join("test.db");
23928 let storage = SqliteStorage::open(&db_path).unwrap();
23929
23930 let table = PricingTable::load(&storage.conn).unwrap();
23931 assert!(!table.is_empty());
23932
23933 let lookup_day = date_str_to_day_id("2026-02-06").unwrap();
23934
23935 let opus = table.lookup("claude-opus-4-latest", lookup_day);
23936 assert!(opus.is_some());
23937 assert_eq!(opus.unwrap().input_cost_per_mtok, 15.0);
23938
23939 let flash = table.lookup("gemini-2.0-flash-001", lookup_day);
23940 assert!(flash.is_some());
23941 assert_eq!(flash.unwrap().input_cost_per_mtok, 0.075);
23942 }
23943
23944 #[test]
23945 fn pricing_table_load_rejects_invalid_effective_date() {
23946 let dir = TempDir::new().unwrap();
23947 let db_path = dir.path().join("test.db");
23948 let storage = SqliteStorage::open(&db_path).unwrap();
23949
23950 storage
23951 .conn
23952 .execute_compat(
23953 "INSERT INTO model_pricing (
23954 model_pattern, provider, input_cost_per_mtok, output_cost_per_mtok,
23955 cache_read_cost_per_mtok, cache_creation_cost_per_mtok, effective_date
23956 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
23957 fparams![
23958 "broken-model%",
23959 "test",
23960 1.0_f64,
23961 2.0_f64,
23962 Option::<f64>::None,
23963 Option::<f64>::None,
23964 "not-a-date"
23965 ],
23966 )
23967 .unwrap();
23968
23969 let err = PricingTable::load(&storage.conn).unwrap_err();
23970 assert!(err.to_string().contains("invalid effective_date"));
23971 }
23972
23973 #[test]
23974 fn pricing_diagnostics_tracks_coverage() {
23975 let mut diag = PricingDiagnostics::default();
23976 diag.record_priced();
23977 diag.record_priced();
23978 diag.record_unpriced(Some("custom-model-v1"));
23979 diag.record_unpriced(Some("custom-model-v1"));
23980 diag.record_unpriced(None);
23981
23982 assert_eq!(diag.priced_count, 2);
23983 assert_eq!(diag.unpriced_count, 3);
23984 assert_eq!(diag.unknown_models.len(), 2);
23985 assert_eq!(diag.unknown_models["custom-model-v1"], 2);
23986 assert_eq!(diag.unknown_models["(none)"], 1);
23987 }
23988
23989 fn franken_storage_in_memory() -> FrankenStorage {
23999 let conn = FrankenConnection::open(":memory:").unwrap();
24000 let storage = FrankenStorage::new(conn, PathBuf::from(":memory:"));
24001 storage.run_migrations().unwrap();
24002 storage.apply_config().unwrap();
24003 storage
24004 }
24005
24006 #[test]
24007 fn franken_migrations_create_all_tables() {
24008 let storage = franken_storage_in_memory();
24009
24010 let version = storage.schema_version().unwrap();
24012 assert_eq!(
24013 version, CURRENT_SCHEMA_VERSION,
24014 "fresh FrankenStorage should be at current schema version"
24015 );
24016
24017 let rows = storage
24019 .raw()
24020 .query("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;")
24021 .unwrap();
24022 let table_names: Vec<String> = rows
24023 .iter()
24024 .filter_map(|r| r.get_typed::<String>(0).ok())
24025 .collect();
24026
24027 for required in [
24028 "meta",
24029 "agents",
24030 "workspaces",
24031 "conversations",
24032 "messages",
24033 "snippets",
24034 "tags",
24035 "conversation_tags",
24036 ] {
24037 assert!(
24038 table_names.contains(&required.to_string()),
24039 "missing table: {required}"
24040 );
24041 }
24042
24043 assert!(
24045 table_names.contains(&"sources".to_string()),
24046 "missing sources table"
24047 );
24048
24049 assert!(
24051 table_names.contains(&"daily_stats".to_string()),
24052 "missing daily_stats table"
24053 );
24054
24055 assert!(
24057 table_names.contains(&"embedding_jobs".to_string()),
24058 "missing embedding_jobs table"
24059 );
24060
24061 for analytics_table in ["message_metrics", "usage_hourly", "usage_daily"] {
24063 assert!(
24064 table_names.contains(&analytics_table.to_string()),
24065 "missing table: {analytics_table}"
24066 );
24067 }
24068 assert!(
24069 table_names.contains(&"conversation_tail_state".to_string()),
24070 "missing conversation_tail_state table"
24071 );
24072 assert!(
24073 table_names.contains(&"conversation_external_lookup".to_string()),
24074 "missing conversation_external_lookup table"
24075 );
24076 assert!(
24077 table_names.contains(&"conversation_external_tail_lookup".to_string()),
24078 "missing conversation_external_tail_lookup table"
24079 );
24080
24081 let rows = storage
24084 .raw()
24085 .query("SELECT COUNT(*) FROM _schema_migrations;")
24086 .unwrap();
24087 let count: i64 = rows.first().unwrap().get_typed(0).unwrap();
24088 assert_eq!(
24089 count,
24090 (13..=CURRENT_SCHEMA_VERSION).count() as i64,
24091 "_schema_migrations should record the V13 base schema and post-V13 migrations"
24092 );
24093
24094 let rows = storage
24096 .raw()
24097 .query("SELECT version FROM _schema_migrations ORDER BY version;")
24098 .unwrap();
24099 let versions: Vec<i64> = rows
24100 .iter()
24101 .map(|row| row.get_typed(0))
24102 .collect::<std::result::Result<_, _>>()
24103 .unwrap();
24104 assert_eq!(
24105 versions,
24106 (13..=CURRENT_SCHEMA_VERSION).collect::<Vec<i64>>(),
24107 "_schema_migrations should contain v13 through current"
24108 );
24109 }
24110
24111 #[test]
24112 fn franken_migrations_idempotent() {
24113 let storage = franken_storage_in_memory();
24114 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24115
24116 storage.run_migrations().unwrap();
24118 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24119 }
24120
24121 #[test]
24122 fn migration_v20_backfills_conversation_external_tail_lookup() {
24123 let storage = franken_storage_in_memory();
24124 let agent_id = storage
24125 .ensure_agent(&Agent {
24126 id: None,
24127 slug: "codex".into(),
24128 name: "Codex".into(),
24129 version: None,
24130 kind: AgentKind::Cli,
24131 })
24132 .unwrap();
24133 let workspace_id = storage
24134 .ensure_workspace(&PathBuf::from("/ws/profiled-storage-remote"), None)
24135 .unwrap();
24136 let mut conv = make_profiled_storage_remote_conversation(1919, 2);
24137 conv.source_id = "profiled-storage-remote-source-東京".into();
24138 conv.external_id = Some("profiled-storage-remote-☃-1919".into());
24139 let outcome = storage
24140 .insert_conversation_tree(agent_id, Some(workspace_id), &conv)
24141 .unwrap();
24142 let external_id = conv.external_id.as_deref().unwrap();
24143 let lookup_key = conversation_external_lookup_key(&conv.source_id, agent_id, external_id);
24144
24145 storage
24146 .raw()
24147 .execute("DELETE FROM conversation_external_tail_lookup")
24148 .unwrap();
24149 storage
24150 .raw()
24151 .execute("DELETE FROM _schema_migrations WHERE version = 20")
24152 .unwrap();
24153 storage
24154 .raw()
24155 .execute_compat(
24156 "UPDATE meta SET value = ?1 WHERE key = 'schema_version'",
24157 fparams!["19"],
24158 )
24159 .unwrap();
24160
24161 storage.run_migrations().unwrap();
24162
24163 let backfilled: (i64, Option<i64>, Option<i64>, Option<i64>) = storage
24164 .raw()
24165 .query_row_map(
24166 "SELECT conversation_id, ended_at, last_message_idx, last_message_created_at
24167 FROM conversation_external_tail_lookup
24168 WHERE lookup_key = ?1",
24169 fparams![lookup_key.as_str()],
24170 |row| {
24171 Ok((
24172 row.get_typed(0)?,
24173 row.get_typed(1)?,
24174 row.get_typed(2)?,
24175 row.get_typed(3)?,
24176 ))
24177 },
24178 )
24179 .unwrap();
24180 assert_eq!(
24181 backfilled,
24182 (
24183 outcome.conversation_id,
24184 conv.ended_at,
24185 Some(1),
24186 conv.messages[1].created_at
24187 )
24188 );
24189 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24190 }
24191
24192 #[test]
24193 fn migration_v15_creates_lazy_tail_state_cache() {
24194 let conn = FrankenConnection::open(":memory:").unwrap();
24195 conn.execute_batch(
24196 "CREATE TABLE conversations (
24197 id INTEGER PRIMARY KEY,
24198 ended_at INTEGER
24199 );
24200 CREATE TABLE messages (
24201 id INTEGER PRIMARY KEY,
24202 conversation_id INTEGER NOT NULL,
24203 idx INTEGER NOT NULL,
24204 created_at INTEGER
24205 );
24206 INSERT INTO conversations(id, ended_at) VALUES
24207 (1, 1710000000300),
24208 (2, NULL);
24209 INSERT INTO messages(id, conversation_id, idx, created_at) VALUES
24210 (10, 1, 0, 1710000000100),
24211 (11, 1, 1, 1710000000200),
24212 (12, 2, 0, 1710000000400);",
24213 )
24214 .unwrap();
24215
24216 conn.execute(
24217 "CREATE TABLE _schema_migrations (
24218 version INTEGER PRIMARY KEY,
24219 name TEXT NOT NULL,
24220 applied_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now'))
24221 );",
24222 )
24223 .unwrap();
24224
24225 assert!(
24226 apply_conversation_tail_state_cache_migration(&conn).unwrap(),
24227 "v15 migration should apply once"
24228 );
24229 assert!(
24230 !apply_conversation_tail_state_cache_migration(&conn).unwrap(),
24231 "v15 migration should be idempotent once recorded"
24232 );
24233
24234 let columns = conn.query("PRAGMA table_info(conversations);").unwrap();
24235 let column_names: HashSet<String> = columns
24236 .iter()
24237 .map(|row| row.get_typed(1))
24238 .collect::<std::result::Result<_, frankensqlite::FrankenError>>()
24239 .unwrap();
24240 assert!(column_names.contains("last_message_idx"));
24241 assert!(column_names.contains("last_message_created_at"));
24242
24243 let tail_rows: i64 = conn
24244 .query("SELECT COUNT(*) FROM conversation_tail_state;")
24245 .unwrap()
24246 .first()
24247 .unwrap()
24248 .get_typed(0)
24249 .unwrap();
24250 assert_eq!(
24251 tail_rows, 0,
24252 "v15 should create the cache without an open-time message scan"
24253 );
24254
24255 let applied: i64 = conn
24256 .query("SELECT COUNT(*) FROM _schema_migrations WHERE version = 15;")
24257 .unwrap()
24258 .first()
24259 .unwrap()
24260 .get_typed(0)
24261 .unwrap();
24262 assert_eq!(applied, 1);
24263 }
24264
24265 #[test]
24266 fn schema_repair_adds_missing_conversations_token_columns() {
24267 let conn = FrankenConnection::open(":memory:").unwrap();
24268 conn.execute_batch(
24269 "CREATE TABLE conversations (
24270 id INTEGER PRIMARY KEY,
24271 agent_id INTEGER NOT NULL,
24272 source_path TEXT NOT NULL
24273 );",
24274 )
24275 .unwrap();
24276 let storage = FrankenStorage::new(conn, std::path::PathBuf::from(":memory:"));
24277
24278 storage.repair_missing_conversation_token_columns().unwrap();
24279 storage.repair_missing_conversation_token_columns().unwrap();
24280
24281 let columns = franken_table_column_names(&storage.conn, "conversations").unwrap();
24282 for &(column_name, _) in REQUIRED_CONVERSATION_TOKEN_COLUMNS {
24283 assert!(
24284 columns.contains(column_name),
24285 "schema repair should add conversations.{column_name}"
24286 );
24287 }
24288 }
24289
24290 #[test]
24291 fn franken_meta_schema_version_in_sync() {
24292 let storage = franken_storage_in_memory();
24293
24294 let rows = storage
24296 .raw()
24297 .query("SELECT value FROM meta WHERE key = 'schema_version';")
24298 .unwrap();
24299 let meta_version: String = rows.first().unwrap().get_typed(0).unwrap();
24300 assert_eq!(
24301 meta_version,
24302 CURRENT_SCHEMA_VERSION.to_string(),
24303 "meta.schema_version should match CURRENT_SCHEMA_VERSION"
24304 );
24305 }
24306
24307 #[test]
24308 fn franken_transition_from_meta_version() {
24309 let dir = TempDir::new().unwrap();
24310 let db_path = dir.path().join("test_transition.db");
24311
24312 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24315 conn.execute("CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);")
24316 .unwrap();
24317 conn.execute("INSERT INTO meta(key, value) VALUES('schema_version', '10');")
24318 .unwrap();
24319 conn.execute("CREATE TABLE conversations (id INTEGER PRIMARY KEY);")
24321 .unwrap();
24322 drop(conn);
24323
24324 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24326 transition_from_meta_version(&conn).unwrap();
24327
24328 let rows = conn
24332 .query("SELECT version FROM _schema_migrations ORDER BY version;")
24333 .unwrap();
24334 let versions: Vec<i64> = rows.iter().filter_map(|r| r.get_typed(0).ok()).collect();
24335 assert_eq!(
24336 versions,
24337 (1..=13).collect::<Vec<i64>>(),
24338 "transition should bridge legacy V10 databases through the combined V13 base marker"
24339 );
24340 }
24341
24342 #[test]
24343 fn franken_transition_from_current_meta_backfills_current_schema_marker() {
24344 let dir = TempDir::new().unwrap();
24345 let db_path = dir.path().join("test_current_transition.db");
24346
24347 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24348 conn.execute("CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);")
24349 .unwrap();
24350 conn.execute_compat(
24351 "INSERT INTO meta(key, value) VALUES('schema_version', ?1);",
24352 &[ParamValue::from(CURRENT_SCHEMA_VERSION.to_string())],
24353 )
24354 .unwrap();
24355 conn.execute("CREATE TABLE conversations (id INTEGER PRIMARY KEY);")
24356 .unwrap();
24357 drop(conn);
24358
24359 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24360 transition_from_meta_version(&conn).unwrap();
24361
24362 let rows = conn
24363 .query("SELECT version FROM _schema_migrations ORDER BY version;")
24364 .unwrap();
24365 let versions: Vec<i64> = rows.iter().filter_map(|r| r.get_typed(0).ok()).collect();
24366 assert_eq!(
24367 versions,
24368 (1..=CURRENT_SCHEMA_VERSION).collect::<Vec<i64>>(),
24369 "current meta schema marker should backfill every known migration"
24370 );
24371 }
24372
24373 #[test]
24374 fn franken_transition_skips_when_already_done() {
24375 let dir = TempDir::new().unwrap();
24376 let db_path = dir.path().join("test_transition_skip.db");
24377
24378 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24380 conn.execute(
24381 "CREATE TABLE _schema_migrations (version INTEGER PRIMARY KEY, name TEXT NOT NULL, applied_at TEXT NOT NULL DEFAULT 'now');",
24382 ).unwrap();
24383 conn.execute("INSERT INTO _schema_migrations (version, name) VALUES (1, 'test');")
24384 .unwrap();
24385
24386 transition_from_meta_version(&conn).unwrap();
24388
24389 let rows = conn
24391 .query("SELECT COUNT(*) FROM _schema_migrations;")
24392 .unwrap();
24393 let count: i64 = rows.first().unwrap().get_typed(0).unwrap();
24394 assert_eq!(
24395 count, 1,
24396 "transition should not re-run on already-transitioned DB"
24397 );
24398 }
24399
24400 #[test]
24401 fn franken_transition_fresh_db_is_noop() {
24402 let dir = TempDir::new().unwrap();
24403 let db_path = dir.path().join("test_fresh_noop.db");
24404
24405 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24407 transition_from_meta_version(&conn).unwrap();
24408
24409 let res = conn.query("SELECT * FROM \"_schema_migrations\";");
24411 assert!(
24412 res.is_err(),
24413 "transition should not create _schema_migrations on fresh DB"
24414 );
24415 }
24416
24417 #[test]
24418 fn franken_transition_with_fts_virtual_table_succeeds() {
24419 let dir = TempDir::new().unwrap();
24420 let db_path = dir.path().join("test_transition_with_fts.db");
24421
24422 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
24423 conn.execute_batch(
24424 "CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);
24425 INSERT INTO meta(key, value) VALUES('schema_version', '13');
24426 CREATE TABLE conversations (id INTEGER PRIMARY KEY);
24427 CREATE VIRTUAL TABLE fts_messages USING fts5(
24428 content,
24429 title,
24430 agent,
24431 workspace,
24432 source_path,
24433 created_at,
24434 content='',
24435 tokenize='porter unicode61'
24436 );",
24437 )
24438 .unwrap();
24439 drop(conn);
24440
24441 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24442 transition_from_meta_version(&conn).unwrap();
24443
24444 let rows = conn
24445 .query("SELECT version FROM _schema_migrations ORDER BY version;")
24446 .unwrap();
24447 let versions: Vec<i64> = rows.iter().filter_map(|r| r.get_typed(0).ok()).collect();
24448 assert_eq!(versions, (1..=13).collect::<Vec<i64>>());
24449 }
24450
24451 #[test]
24452 fn franken_storage_open_legacy_v13_with_fts_virtual_table_succeeds() {
24453 let dir = TempDir::new().unwrap();
24454 let db_path = dir.path().join("test_open_legacy_v13_with_fts.db");
24455
24456 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
24457 conn.execute_batch(
24458 "CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);
24459 INSERT INTO meta(key, value) VALUES('schema_version', '13');
24460 CREATE TABLE agents (
24461 id INTEGER PRIMARY KEY,
24462 slug TEXT NOT NULL
24463 );
24464 CREATE TABLE workspaces (
24465 id INTEGER PRIMARY KEY,
24466 path TEXT NOT NULL
24467 );
24468 CREATE TABLE sources (
24469 id TEXT PRIMARY KEY,
24470 kind TEXT NOT NULL,
24471 host_label TEXT,
24472 machine_id TEXT,
24473 platform TEXT,
24474 config_json TEXT,
24475 created_at INTEGER NOT NULL,
24476 updated_at INTEGER NOT NULL
24477 );
24478 CREATE TABLE conversations (
24479 id INTEGER PRIMARY KEY,
24480 agent_id INTEGER NOT NULL,
24481 workspace_id INTEGER,
24482 source_id TEXT NOT NULL DEFAULT 'local',
24483 external_id TEXT,
24484 title TEXT,
24485 source_path TEXT NOT NULL,
24486 started_at INTEGER,
24487 ended_at INTEGER
24488 );
24489 CREATE TABLE messages (
24490 id INTEGER PRIMARY KEY,
24491 conversation_id INTEGER NOT NULL,
24492 idx INTEGER NOT NULL,
24493 role TEXT NOT NULL,
24494 author TEXT,
24495 created_at INTEGER,
24496 content TEXT NOT NULL,
24497 extra_json TEXT,
24498 extra_bin BLOB
24499 );
24500 INSERT INTO agents(id, slug) VALUES (1, 'codex');
24501 INSERT INTO workspaces(id, path) VALUES (1, '/data/projects/coding_agent_session_search');
24502 INSERT INTO sources(id, kind, host_label, created_at, updated_at)
24503 VALUES ('local', 'local', NULL, 1710000000000, 1710000000000);
24504 INSERT INTO conversations(
24505 id,
24506 agent_id,
24507 workspace_id,
24508 source_id,
24509 external_id,
24510 title,
24511 source_path,
24512 started_at
24513 )
24514 VALUES (
24515 1,
24516 1,
24517 1,
24518 'local',
24519 'legacy-session',
24520 'legacy session',
24521 '/tmp/legacy.jsonl',
24522 1710000000000
24523 );
24524 INSERT INTO messages(id, conversation_id, idx, role, author, created_at, content)
24525 VALUES (1, 1, 0, 'user', 'tester', 1710000000000, 'legacy content');
24526 CREATE VIRTUAL TABLE fts_messages USING fts5(
24527 content,
24528 title,
24529 agent,
24530 workspace,
24531 source_path,
24532 created_at,
24533 message_id,
24534 content='',
24535 tokenize='porter unicode61'
24536 );",
24537 )
24538 .unwrap();
24539 drop(conn);
24540
24541 let storage = FrankenStorage::open(&db_path).unwrap();
24542 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24543
24544 let rows = storage
24545 .raw()
24546 .query("SELECT version FROM _schema_migrations ORDER BY version;")
24547 .unwrap();
24548 let versions: Vec<i64> = rows.iter().filter_map(|r| r.get_typed(0).ok()).collect();
24549 assert_eq!(versions, (1..=CURRENT_SCHEMA_VERSION).collect::<Vec<i64>>());
24550 }
24551
24552 #[test]
24553 fn franken_storage_open_repairs_duplicate_fts_messages_schema_rows() {
24554 let dir = TempDir::new().unwrap();
24555 let db_path = dir.path().join("test_open_repairs_duplicate_fts_schema.db");
24556
24557 let storage = FrankenStorage::open(&db_path).unwrap();
24558 let agent = Agent {
24559 id: None,
24560 slug: "codex".into(),
24561 name: "Codex".into(),
24562 version: None,
24563 kind: AgentKind::Cli,
24564 };
24565 let agent_id = storage.ensure_agent(&agent).unwrap();
24566 let conversation = Conversation {
24567 id: None,
24568 agent_slug: "codex".into(),
24569 workspace: Some(PathBuf::from("/tmp/workspace")),
24570 external_id: Some("dup-fts-schema".into()),
24571 title: Some("Duplicate FTS schema".into()),
24572 source_path: PathBuf::from("/tmp/dup-fts-schema.jsonl"),
24573 started_at: Some(1_700_000_000_000),
24574 ended_at: Some(1_700_000_000_100),
24575 approx_tokens: Some(42),
24576 metadata_json: serde_json::Value::Null,
24577 messages: vec![Message {
24578 id: None,
24579 idx: 0,
24580 role: MessageRole::User,
24581 author: Some("user".into()),
24582 created_at: Some(1_700_000_000_050),
24583 content: "message that should remain queryable".into(),
24584 extra_json: serde_json::Value::Null,
24585 snippets: Vec::new(),
24586 }],
24587 source_id: LOCAL_SOURCE_ID.into(),
24588 origin_host: None,
24589 };
24590 storage
24591 .insert_conversation_tree(agent_id, None, &conversation)
24592 .unwrap();
24593 drop(storage);
24594 materialize_fresh_fts_schema_via_rusqlite(&db_path).unwrap();
24595
24596 let duplicate_legacy_fts_sql = "CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, message_id UNINDEXED, tokenize='porter')";
24597 let conn = rusqlite_test_fixture_conn(&db_path);
24598 conn.execute_batch("PRAGMA writable_schema = ON;").unwrap();
24599 conn.execute(
24600 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
24601 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
24602 [duplicate_legacy_fts_sql],
24603 )
24604 .unwrap();
24605 conn.execute(
24606 "DELETE FROM meta WHERE key = ?1",
24607 [FTS_FRANKEN_REBUILD_META_KEY],
24608 )
24609 .unwrap();
24610 conn.execute_batch("PRAGMA writable_schema = OFF;").unwrap();
24613
24614 let duplicate_rows: i64 = conn
24615 .query_row(
24616 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
24617 [],
24618 |row| row.get(0),
24619 )
24620 .unwrap();
24621 assert_eq!(duplicate_rows, 2);
24622 drop(conn);
24623
24624 let reopened = FrankenStorage::open(&db_path).unwrap();
24625 assert_eq!(reopened.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24626 let generation_rows: Vec<String> = reopened
24627 .raw()
24628 .query_map_collect(
24629 "SELECT value FROM meta WHERE key = ?1",
24630 fparams![FTS_FRANKEN_REBUILD_META_KEY],
24631 |row| row.get_typed(0),
24632 )
24633 .unwrap();
24634 assert_eq!(
24635 generation_rows.len(),
24636 0,
24637 "canonical open should not eagerly rewrite FTS repair metadata"
24638 );
24639 reopened.ensure_search_fallback_fts_consistency().unwrap();
24640 let repaired = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
24641 assert_eq!(franken_fts_schema_rows(&repaired).unwrap(), 1);
24642
24643 let total_messages: i64 = reopened
24644 .raw()
24645 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
24646 row.get_typed(0)
24647 })
24648 .unwrap();
24649 let total_fts_rows: i64 = reopened
24650 .raw()
24651 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
24652 row.get_typed(0)
24653 })
24654 .unwrap();
24655 assert_eq!(total_fts_rows, total_messages);
24656 }
24657
24658 #[test]
24659 fn franken_storage_open_fresh_db_keeps_single_franken_fts_schema_row() {
24660 let dir = TempDir::new().unwrap();
24661 let db_path = dir.path().join("fresh-franken-storage-open.db");
24662
24663 let storage = FrankenStorage::open(&db_path).unwrap();
24664 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24665
24666 storage
24673 .ensure_search_fallback_fts_consistency()
24674 .expect("ensure FTS consistency after fresh open");
24675 drop(storage);
24676
24677 let c_reader = FrankenConnection::open(db_path.to_string_lossy().into_owned())
24678 .expect("open DB via frankensqlite for sqlite_master inspection");
24679 assert_eq!(
24680 franken_fts_schema_rows(&c_reader).unwrap(),
24681 1,
24682 "exactly one fts_messages schema row should exist after ensure_search_fallback_fts_consistency"
24683 );
24684 drop(c_reader);
24685
24686 let storage = FrankenStorage::open(&db_path).unwrap();
24687 assert!(
24688 storage
24689 .raw()
24690 .query("SELECT COUNT(*) FROM fts_messages")
24691 .is_ok(),
24692 "fts_messages must be queryable through frankensqlite after open"
24693 );
24694 }
24695
24696 #[test]
24697 fn franken_storage_open_repairs_missing_analytics_tables_when_version_markers_lie() {
24698 let dir = TempDir::new().unwrap();
24699 let db_path = dir.path().join("test_repair_missing_analytics.db");
24700
24701 {
24702 let storage = FrankenStorage::open(&db_path).unwrap();
24703 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24704 }
24705
24706 {
24707 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
24708 for table in &[
24709 "usage_models_daily",
24710 "usage_daily",
24711 "usage_hourly",
24712 "message_metrics",
24713 "token_daily_stats",
24714 "token_usage",
24715 "model_pricing",
24716 "embedding_jobs",
24717 "daily_stats",
24718 ] {
24719 conn.execute(&format!("DROP TABLE IF EXISTS {table}"))
24720 .unwrap();
24721 }
24722 conn.execute_compat(
24723 "UPDATE meta SET value = ?1 WHERE key = 'schema_version'",
24724 &[ParamValue::from(CURRENT_SCHEMA_VERSION.to_string())],
24725 )
24726 .unwrap();
24727 }
24728
24729 let repaired = FrankenStorage::open(&db_path).unwrap();
24730 assert_eq!(repaired.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24731
24732 let analytics_count: i64 = repaired
24733 .raw()
24734 .query_row_map(
24735 "SELECT COUNT(*) FROM sqlite_master
24736 WHERE type='table'
24737 AND name IN (
24738 'daily_stats',
24739 'embedding_jobs',
24740 'token_usage',
24741 'token_daily_stats',
24742 'model_pricing',
24743 'message_metrics',
24744 'usage_hourly',
24745 'usage_daily',
24746 'usage_models_daily'
24747 )",
24748 &[],
24749 |row| row.get_typed(0),
24750 )
24751 .unwrap();
24752 assert_eq!(
24753 analytics_count, 9,
24754 "open() should recreate the missing analytics tables even when schema_version already says current"
24755 );
24756 }
24757
24758 #[test]
24759 fn current_schema_repair_batches_cover_every_required_probe() {
24760 let missing_tables: Vec<&'static str> = REQUIRED_CURRENT_SCHEMA_TABLE_PROBES
24761 .iter()
24762 .map(|(table_name, _)| *table_name)
24763 .collect();
24764
24765 let batches = current_schema_repair_batches_for_missing_tables(&missing_tables).unwrap();
24766 let covered_tables: HashSet<&'static str> = batches
24767 .iter()
24768 .flat_map(|batch| batch.tables.iter().copied())
24769 .collect();
24770
24771 for table_name in missing_tables {
24772 assert!(
24773 covered_tables.contains(table_name),
24774 "missing repair coverage for {table_name}"
24775 );
24776 }
24777 }
24778
24779 #[test]
24780 fn current_schema_repair_batches_do_not_replay_core_schema_bootstrap() {
24781 for batch in CURRENT_SCHEMA_REPAIR_BATCHES {
24782 assert!(
24783 !batch.sql.contains("CREATE TABLE IF NOT EXISTS meta"),
24784 "repair batch {} should not recreate meta",
24785 batch.name
24786 );
24787 assert!(
24788 !batch.sql.contains("CREATE TABLE IF NOT EXISTS agents"),
24789 "repair batch {} should not recreate agents",
24790 batch.name
24791 );
24792 assert!(
24793 !batch.sql.contains("CREATE TABLE IF NOT EXISTS workspaces"),
24794 "repair batch {} should not recreate workspaces",
24795 batch.name
24796 );
24797 assert!(
24798 !batch
24799 .sql
24800 .contains("CREATE TABLE IF NOT EXISTS conversations"),
24801 "repair batch {} should not recreate conversations",
24802 batch.name
24803 );
24804 assert!(
24805 !batch.sql.contains("CREATE TABLE IF NOT EXISTS messages"),
24806 "repair batch {} should not recreate messages",
24807 batch.name
24808 );
24809 assert!(
24810 !batch.sql.contains("CREATE TABLE IF NOT EXISTS snippets"),
24811 "repair batch {} should not recreate snippets",
24812 batch.name
24813 );
24814 assert!(
24815 !batch.sql.contains("CREATE VIRTUAL TABLE fts_messages"),
24816 "repair batch {} should not recreate FTS tables",
24817 batch.name
24818 );
24819 assert!(
24820 !batch.sql.contains("DROP TABLE"),
24821 "repair batch {} should never drop tables",
24822 batch.name
24823 );
24824 }
24825 }
24826
24827 #[test]
24828 fn build_cass_migrations_applies_combined_v13() {
24829 let conn = FrankenConnection::open(":memory:").unwrap();
24830 let base_result = build_cass_migrations_before_tail_cache()
24831 .run(&conn)
24832 .unwrap();
24833 assert!(apply_conversation_tail_state_cache_migration(&conn).unwrap());
24834 let post_result = build_cass_migrations_after_tail_cache().run(&conn).unwrap();
24835
24836 assert!(base_result.was_fresh);
24837 let mut applied = base_result.applied;
24838 applied.push(15);
24839 applied.extend(post_result.applied);
24840 assert_eq!(
24841 applied,
24842 (13..=CURRENT_SCHEMA_VERSION).collect::<Vec<i64>>(),
24843 "should apply combined V13 plus additive post-V13 migrations"
24844 );
24845 let current: i64 = conn
24846 .query("SELECT MAX(version) FROM _schema_migrations;")
24847 .unwrap()
24848 .first()
24849 .unwrap()
24850 .get_typed(0)
24851 .unwrap();
24852 assert_eq!(current, CURRENT_SCHEMA_VERSION);
24853 }
24854
24855 #[test]
24856 fn franken_insert_conversations_batched_populates_analytics_rollups() {
24857 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
24858 use frankensqlite::compat::{ConnectionExt, RowExt};
24859 use std::path::PathBuf;
24860
24861 let dir = TempDir::new().unwrap();
24862 let db_path = dir.path().join("franken-index.db");
24863 let storage = FrankenStorage::open(&db_path).unwrap();
24864
24865 let agent = Agent {
24866 id: None,
24867 slug: "claude_code".into(),
24868 name: "Claude Code".into(),
24869 version: Some("1.0".into()),
24870 kind: AgentKind::Cli,
24871 };
24872 let agent_id = storage.ensure_agent(&agent).unwrap();
24873
24874 let ts_ms = 1_770_551_400_000_i64;
24875 let usage_json = serde_json::json!({
24876 "message": {
24877 "model": "claude-opus-4-6",
24878 "usage": {
24879 "input_tokens": 100,
24880 "output_tokens": 50,
24881 "cache_read_input_tokens": 25,
24882 "cache_creation_input_tokens": 10,
24883 "service_tier": "standard"
24884 }
24885 }
24886 });
24887
24888 let conv = Conversation {
24889 id: None,
24890 agent_slug: "claude_code".into(),
24891 workspace: Some(PathBuf::from("/tmp/workspace")),
24892 external_id: Some("franken-batch-upsert".into()),
24893 title: Some("Franken batch upsert".into()),
24894 source_path: PathBuf::from("/tmp/franken.jsonl"),
24895 started_at: Some(ts_ms),
24896 ended_at: Some(ts_ms + 60_000),
24897 approx_tokens: None,
24898 metadata_json: serde_json::Value::Null,
24899 messages: vec![
24900 Message {
24901 id: None,
24902 idx: 0,
24903 role: MessageRole::User,
24904 author: None,
24905 created_at: Some(ts_ms),
24906 content: "Please make a plan.".into(),
24907 extra_json: serde_json::Value::Null,
24908 snippets: vec![],
24909 },
24910 Message {
24911 id: None,
24912 idx: 1,
24913 role: MessageRole::Agent,
24914 author: None,
24915 created_at: Some(ts_ms + 30_000),
24916 content: "## Plan\n\n1. Reproduce\n2. Patch\n3. Verify".into(),
24917 extra_json: usage_json,
24918 snippets: vec![],
24919 },
24920 ],
24921 source_id: "local".into(),
24922 origin_host: None,
24923 };
24924
24925 let outcomes = storage
24926 .insert_conversations_batched(&[(agent_id, None, &conv)])
24927 .unwrap();
24928 assert_eq!(outcomes.len(), 1);
24929 assert_eq!(outcomes[0].inserted_indices, vec![0, 1]);
24930
24931 let conn = storage.raw();
24932 let daily_stats_rows: i64 = conn
24933 .query_row_map("SELECT COUNT(*) FROM daily_stats", fparams![], |row| {
24934 row.get_typed(0)
24935 })
24936 .unwrap();
24937 let token_daily_rows: i64 = conn
24938 .query_row_map(
24939 "SELECT COUNT(*) FROM token_daily_stats",
24940 fparams![],
24941 |row| row.get_typed(0),
24942 )
24943 .unwrap();
24944 let usage_daily_rows: i64 = conn
24945 .query_row_map("SELECT COUNT(*) FROM usage_daily", fparams![], |row| {
24946 row.get_typed(0)
24947 })
24948 .unwrap();
24949 let model_daily_rows: i64 = conn
24950 .query_row_map(
24951 "SELECT COUNT(*) FROM usage_models_daily",
24952 fparams![],
24953 |row| row.get_typed(0),
24954 )
24955 .unwrap();
24956
24957 assert!(daily_stats_rows > 0, "daily_stats should be populated");
24958 assert!(
24959 token_daily_rows > 0,
24960 "token_daily_stats should be populated"
24961 );
24962 assert!(usage_daily_rows > 0, "usage_daily should be populated");
24963 assert!(
24964 model_daily_rows > 0,
24965 "usage_models_daily should be populated"
24966 );
24967 }
24968
24969 #[test]
24974 fn connection_manager_creates_readers() {
24975 let dir = TempDir::new().unwrap();
24976 let db_path = dir.path().join("cm.db");
24977
24978 let fs = FrankenStorage::open(&db_path).unwrap();
24980 drop(fs);
24981
24982 let config = ConnectionManagerConfig {
24983 reader_count: 3,
24984 max_writers: 2,
24985 };
24986 let mgr = FrankenConnectionManager::new(&db_path, config).unwrap();
24987 assert_eq!(mgr.reader_count(), 3);
24988 assert_eq!(mgr.max_writers(), 2);
24989 }
24990
24991 #[test]
24992 fn connection_manager_clamps_zero_writer_limit_to_prevent_deadlock() {
24993 let dir = TempDir::new().unwrap();
24994 let db_path = dir.path().join("cm.db");
24995
24996 let fs = FrankenStorage::open(&db_path).unwrap();
24997 drop(fs);
24998
24999 let mgr = std::sync::Arc::new(
25000 FrankenConnectionManager::new(
25001 &db_path,
25002 ConnectionManagerConfig {
25003 reader_count: 0,
25004 max_writers: 0,
25005 },
25006 )
25007 .unwrap(),
25008 );
25009 assert_eq!(mgr.reader_count(), 1);
25010 assert_eq!(mgr.max_writers(), 1);
25011
25012 let (tx, rx) = std::sync::mpsc::channel();
25013 let mgr_for_thread = std::sync::Arc::clone(&mgr);
25014 std::thread::spawn(move || {
25015 let result = mgr_for_thread.writer().map(|mut guard| {
25016 guard.mark_committed();
25017 });
25018 tx.send(result.is_ok()).expect("writer result send");
25019 });
25020
25021 assert!(
25022 rx.recv_timeout(Duration::from_secs(10)).unwrap(),
25023 "writer acquisition should not block forever when configured with zero writer slots"
25024 );
25025 }
25026
25027 #[test]
25028 fn connection_manager_reader_round_robin() {
25029 let dir = TempDir::new().unwrap();
25030 let db_path = dir.path().join("cm.db");
25031
25032 let fs = FrankenStorage::open(&db_path).unwrap();
25033 drop(fs);
25034
25035 let config = ConnectionManagerConfig {
25036 reader_count: 2,
25037 max_writers: 1,
25038 };
25039 let mgr = FrankenConnectionManager::new(&db_path, config).unwrap();
25040
25041 let idx_before = mgr.reader_idx.load(std::sync::atomic::Ordering::Relaxed);
25043 let _r1 = mgr.reader();
25044 let idx_after = mgr.reader_idx.load(std::sync::atomic::Ordering::Relaxed);
25045 assert_eq!(idx_after, idx_before + 1, "reader index should advance");
25046 }
25047
25048 #[test]
25049 fn connection_manager_writer_reads_and_writes() {
25050 use frankensqlite::compat::RowExt;
25051
25052 let dir = TempDir::new().unwrap();
25053 let db_path = dir.path().join("cm.db");
25054
25055 let fs = FrankenStorage::open(&db_path).unwrap();
25056 drop(fs);
25057
25058 let mgr = FrankenConnectionManager::new(&db_path, Default::default()).unwrap();
25059
25060 {
25062 let mut guard = mgr.writer().unwrap();
25063 guard
25064 .storage()
25065 .raw()
25066 .execute("CREATE TABLE IF NOT EXISTS cm_test (id INTEGER PRIMARY KEY, val TEXT)")
25067 .unwrap();
25068 guard
25069 .storage()
25070 .raw()
25071 .execute("INSERT INTO cm_test (val) VALUES ('hello')")
25072 .unwrap();
25073 guard.mark_committed();
25074 }
25075
25076 let reader_guard = mgr.reader();
25078 let rows = reader_guard.query("SELECT val FROM cm_test").unwrap();
25079 assert_eq!(rows.len(), 1);
25080 assert_eq!(rows[0].get_typed::<String>(0).unwrap(), "hello");
25081 }
25082
25083 #[test]
25084 fn connection_manager_writer_guard_drops_releases_slot() {
25085 let dir = TempDir::new().unwrap();
25086 let db_path = dir.path().join("cm.db");
25087
25088 let fs = FrankenStorage::open(&db_path).unwrap();
25089 drop(fs);
25090
25091 let config = ConnectionManagerConfig {
25092 reader_count: 1,
25093 max_writers: 1,
25094 };
25095 let mgr = FrankenConnectionManager::new(&db_path, config).unwrap();
25096
25097 {
25099 let mut guard = mgr.writer().unwrap();
25100 guard.mark_committed();
25101 }
25102
25103 let mut guard2 = mgr.writer().unwrap();
25105 guard2.mark_committed();
25106 }
25107
25108 #[test]
25109 fn connection_manager_concurrent_writer_works() {
25110 use frankensqlite::compat::RowExt;
25111
25112 let dir = TempDir::new().unwrap();
25113 let db_path = dir.path().join("cm.db");
25114
25115 let fs = FrankenStorage::open(&db_path).unwrap();
25116 drop(fs);
25117
25118 let config = ConnectionManagerConfig {
25119 reader_count: 1,
25120 max_writers: 2,
25121 };
25122 let mgr = FrankenConnectionManager::new(&db_path, config).unwrap();
25123
25124 {
25125 let mut guard = mgr.concurrent_writer().unwrap();
25126 guard
25127 .storage()
25128 .raw()
25129 .execute("CREATE TABLE IF NOT EXISTS cm_conc (id INTEGER PRIMARY KEY, val TEXT)")
25130 .unwrap();
25131 guard
25132 .storage()
25133 .raw()
25134 .execute("INSERT INTO cm_conc (val) VALUES ('concurrent')")
25135 .unwrap();
25136 guard.mark_committed();
25137 }
25138
25139 let reader_guard = mgr.reader();
25140 let rows = reader_guard.query("SELECT val FROM cm_conc").unwrap();
25141 assert_eq!(rows.len(), 1);
25142 assert_eq!(rows[0].get_typed::<String>(0).unwrap(), "concurrent");
25143 }
25144
25145 #[test]
25146 fn connection_manager_default_config() {
25147 let config = ConnectionManagerConfig::default();
25148 assert_eq!(config.reader_count, 4);
25149 assert!(config.max_writers > 0);
25150 }
25151
25152 #[test]
25153 fn purge_agent_archive_data_removes_only_target_agent_and_rebuilds_derived_tables() {
25154 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
25155 use std::path::PathBuf;
25156
25157 fn seed_conversation(storage: &FrankenStorage, agent_slug: &str, marker: &str) {
25158 let agent = Agent {
25159 id: None,
25160 slug: agent_slug.into(),
25161 name: agent_slug.into(),
25162 version: None,
25163 kind: AgentKind::Cli,
25164 };
25165 let agent_id = storage.ensure_agent(&agent).unwrap();
25166 let conversation = Conversation {
25167 id: None,
25168 agent_slug: agent_slug.into(),
25169 workspace: Some(PathBuf::from("/tmp/workspace")),
25170 external_id: Some(format!("{agent_slug}-{marker}")),
25171 title: Some(format!("{agent_slug} {marker}")),
25172 source_path: PathBuf::from(format!("/tmp/{agent_slug}-{marker}.jsonl")),
25173 started_at: Some(1_700_000_000_000),
25174 ended_at: Some(1_700_000_000_100),
25175 approx_tokens: None,
25176 metadata_json: serde_json::Value::Null,
25177 messages: vec![
25178 Message {
25179 id: None,
25180 idx: 0,
25181 role: MessageRole::User,
25182 author: Some("user".into()),
25183 created_at: Some(1_700_000_000_010),
25184 content: format!("{agent_slug} {marker} user"),
25185 extra_json: serde_json::Value::Null,
25186 snippets: Vec::new(),
25187 },
25188 Message {
25189 id: None,
25190 idx: 1,
25191 role: MessageRole::Agent,
25192 author: Some("assistant".into()),
25193 created_at: Some(1_700_000_000_020),
25194 content: format!("{agent_slug} {marker} assistant"),
25195 extra_json: serde_json::Value::Null,
25196 snippets: Vec::new(),
25197 },
25198 ],
25199 source_id: LOCAL_SOURCE_ID.into(),
25200 origin_host: None,
25201 };
25202 storage
25203 .insert_conversation_tree(agent_id, None, &conversation)
25204 .unwrap();
25205 }
25206
25207 let dir = TempDir::new().unwrap();
25208 let db_path = dir.path().join("agent_search.db");
25209 let storage = FrankenStorage::open(&db_path).unwrap();
25210
25211 seed_conversation(&storage, "openclaw", "purge-target");
25212 seed_conversation(&storage, "codex", "keep-target");
25213
25214 let purge = storage.purge_agent_archive_data("openclaw").unwrap();
25215 assert_eq!(purge.conversations_deleted, 1);
25216 assert_eq!(purge.messages_deleted, 2);
25217
25218 storage.rebuild_fts().unwrap();
25219 storage.rebuild_analytics().unwrap();
25220 storage.rebuild_daily_stats().unwrap();
25221 storage.rebuild_token_daily_stats().unwrap();
25222
25223 let agents = storage.list_agents().unwrap();
25224 assert_eq!(agents.len(), 1);
25225 assert_eq!(agents[0].slug, "codex");
25226 assert_eq!(storage.total_conversation_count().unwrap(), 1);
25227 assert_eq!(storage.total_message_count().unwrap(), 2);
25228
25229 let fts_rows: i64 = storage
25230 .raw()
25231 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
25232 row.get_typed(0)
25233 })
25234 .unwrap();
25235 assert_eq!(fts_rows, 2);
25236
25237 let total_daily_sessions: i64 = storage
25238 .raw()
25239 .query_row_map(
25240 "SELECT COALESCE(SUM(session_count), 0)
25241 FROM daily_stats
25242 WHERE agent_slug = 'all' AND source_id = 'all'",
25243 fparams![],
25244 |row| row.get_typed(0),
25245 )
25246 .unwrap();
25247 assert_eq!(total_daily_sessions, 1);
25248
25249 let openclaw_token_rows: i64 = storage
25250 .raw()
25251 .query_row_map(
25252 "SELECT COUNT(*) FROM token_daily_stats WHERE agent_slug = 'openclaw'",
25253 fparams![],
25254 |row| row.get_typed(0),
25255 )
25256 .unwrap();
25257 assert_eq!(openclaw_token_rows, 0);
25258 }
25259
25260 #[test]
25267 fn cleanup_orphan_fk_rows_removes_orphans_and_is_noop_on_clean_db() {
25268 let dir = TempDir::new().unwrap();
25269 let db_path = dir.path().join("orphan_fk_self_heal.db");
25270 let storage = FrankenStorage::open(&db_path).unwrap();
25271
25272 storage.raw().execute("PRAGMA foreign_keys = OFF").unwrap();
25275
25276 storage
25279 .raw()
25280 .execute_compat(
25281 "INSERT INTO agents(id, slug, name, kind, created_at, updated_at) \
25282 VALUES(1, 'test-agent', 'Test Agent', 'cli', 0, 0)",
25283 fparams![],
25284 )
25285 .unwrap();
25286 storage
25287 .raw()
25288 .execute_compat(
25289 "INSERT INTO conversations(id, agent_id, source_id, source_path, started_at) \
25290 VALUES(1, 1, 'local', '/tmp/real.jsonl', 0)",
25291 fparams![],
25292 )
25293 .unwrap();
25294 storage
25295 .raw()
25296 .execute_compat(
25297 "INSERT INTO messages(id, conversation_id, idx, role, content) \
25298 VALUES(1, 1, 0, 'user', 'real message')",
25299 fparams![],
25300 )
25301 .unwrap();
25302
25303 for (mid, cid, idx) in [(101_i64, 99_999_i64, 0_i64), (102, 0, 0), (103, 0, 1)] {
25307 storage
25308 .raw()
25309 .execute_compat(
25310 "INSERT INTO messages(id, conversation_id, idx, role, content) \
25311 VALUES(?1, ?2, ?3, 'user', 'orphan message')",
25312 fparams![mid, cid, idx],
25313 )
25314 .unwrap();
25315 }
25316
25317 for message_id in [1_i64, 101_i64, 102_i64] {
25322 storage
25323 .raw()
25324 .execute_compat(
25325 "INSERT INTO message_metrics(
25326 message_id, created_at_ms, hour_id, day_id, agent_slug,
25327 role, content_chars, content_tokens_est
25328 ) VALUES(?1, 0, 0, 0, 'test-agent', 'user', 13, 2)",
25329 fparams![message_id],
25330 )
25331 .unwrap();
25332 storage
25333 .raw()
25334 .execute_compat(
25335 "INSERT INTO token_usage(
25336 message_id, conversation_id, agent_id, timestamp_ms, day_id,
25337 role, content_chars
25338 ) VALUES(?1, 1, 1, 0, 0, 'user', 13)",
25339 fparams![message_id],
25340 )
25341 .unwrap();
25342 }
25343
25344 storage
25348 .raw()
25349 .execute_compat(
25350 "INSERT INTO snippets(message_id, file_path, start_line, end_line, language, snippet_text) \
25351 VALUES(99999, '/tmp/orphan-snippet.rs', 1, 2, 'rust', 'fn main() {}')",
25352 fparams![],
25353 )
25354 .unwrap();
25355
25356 storage.raw().execute("PRAGMA foreign_keys = ON").unwrap();
25357
25358 let messages_before: i64 = storage
25360 .raw()
25361 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
25362 row.get_typed(0)
25363 })
25364 .unwrap();
25365 assert_eq!(messages_before, 4); let snippets_before: i64 = storage
25367 .raw()
25368 .query_row_map("SELECT COUNT(*) FROM snippets", fparams![], |row| {
25369 row.get_typed(0)
25370 })
25371 .unwrap();
25372 assert_eq!(snippets_before, 1);
25373 let metrics_before: i64 = storage
25374 .raw()
25375 .query_row_map("SELECT COUNT(*) FROM message_metrics", fparams![], |row| {
25376 row.get_typed(0)
25377 })
25378 .unwrap();
25379 assert_eq!(metrics_before, 3);
25380 let token_usage_before: i64 = storage
25381 .raw()
25382 .query_row_map("SELECT COUNT(*) FROM token_usage", fparams![], |row| {
25383 row.get_typed(0)
25384 })
25385 .unwrap();
25386 assert_eq!(token_usage_before, 3);
25387
25388 let report = storage.cleanup_orphan_fk_rows().unwrap();
25390
25391 let messages_after: i64 = storage
25396 .raw()
25397 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
25398 row.get_typed(0)
25399 })
25400 .unwrap();
25401 assert_eq!(messages_after, 1, "real message must be preserved");
25402 let snippets_after: i64 = storage
25403 .raw()
25404 .query_row_map("SELECT COUNT(*) FROM snippets", fparams![], |row| {
25405 row.get_typed(0)
25406 })
25407 .unwrap();
25408 assert_eq!(snippets_after, 0);
25409 let metrics_after: i64 = storage
25410 .raw()
25411 .query_row_map("SELECT COUNT(*) FROM message_metrics", fparams![], |row| {
25412 row.get_typed(0)
25413 })
25414 .unwrap();
25415 assert_eq!(metrics_after, 1, "real message metric must be preserved");
25416 let token_usage_after: i64 = storage
25417 .raw()
25418 .query_row_map("SELECT COUNT(*) FROM token_usage", fparams![], |row| {
25419 row.get_typed(0)
25420 })
25421 .unwrap();
25422 assert_eq!(token_usage_after, 1, "real token row must be preserved");
25423
25424 assert_eq!(report.total, 4, "report total: {:?}", report);
25425 let messages_count = report
25426 .per_table
25427 .iter()
25428 .find(|(t, _)| *t == "messages")
25429 .map(|(_, c)| *c);
25430 assert_eq!(messages_count, Some(3));
25431 let snippets_count = report
25432 .per_table
25433 .iter()
25434 .find(|(t, _)| *t == "snippets")
25435 .map(|(_, c)| *c);
25436 assert_eq!(snippets_count, Some(1));
25437
25438 let second = storage.cleanup_orphan_fk_rows().unwrap();
25440 assert_eq!(second.total, 0);
25441 assert!(second.per_table.is_empty());
25442 }
25443
25444 #[test]
25445 fn cleanup_orphan_fk_rows_handles_more_than_one_delete_chunk() {
25446 let dir = TempDir::new().unwrap();
25447 let db_path = dir.path().join("orphan_fk_chunked_self_heal.db");
25448 let storage = FrankenStorage::open(&db_path).unwrap();
25449 let orphan_count = ORPHAN_FK_ID_CHUNK_SIZE + 3;
25450
25451 storage.raw().execute("PRAGMA foreign_keys = OFF").unwrap();
25452 {
25453 let mut tx = storage.raw().transaction().unwrap();
25454 for idx in 0..orphan_count {
25455 let message_id = 10_000_i64 + i64::try_from(idx).unwrap();
25456 let conversation_id = 20_000_i64 + i64::try_from(idx).unwrap();
25457 tx.execute_compat(
25458 "INSERT INTO messages(id, conversation_id, idx, role, content) \
25459 VALUES(?1, ?2, 0, 'user', 'orphan message')",
25460 fparams![message_id, conversation_id],
25461 )
25462 .unwrap();
25463 tx.execute_compat(
25464 "INSERT INTO message_metrics(
25465 message_id, created_at_ms, hour_id, day_id, agent_slug,
25466 role, content_chars, content_tokens_est
25467 ) VALUES(?1, 0, 0, 0, 'test-agent', 'user', 14, 2)",
25468 fparams![message_id],
25469 )
25470 .unwrap();
25471 }
25472 tx.commit().unwrap();
25473 }
25474 storage.raw().execute("PRAGMA foreign_keys = ON").unwrap();
25475
25476 let report = storage.cleanup_orphan_fk_rows().unwrap();
25477
25478 assert_eq!(report.total, i64::try_from(orphan_count).unwrap());
25479 let messages_count = report
25480 .per_table
25481 .iter()
25482 .find(|(table, _)| *table == "messages")
25483 .map(|(_, count)| *count);
25484 assert_eq!(messages_count, Some(i64::try_from(orphan_count).unwrap()));
25485 let messages_after: i64 = storage
25486 .raw()
25487 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
25488 row.get_typed(0)
25489 })
25490 .unwrap();
25491 assert_eq!(messages_after, 0);
25492 let metrics_after: i64 = storage
25493 .raw()
25494 .query_row_map("SELECT COUNT(*) FROM message_metrics", fparams![], |row| {
25495 row.get_typed(0)
25496 })
25497 .unwrap();
25498 assert_eq!(metrics_after, 0);
25499 }
25500
25501 #[test]
25502 fn cleanup_orphan_fk_rows_pages_direct_child_orphans() {
25503 let dir = TempDir::new().unwrap();
25504 let db_path = dir.path().join("direct_orphan_fk_paged_self_heal.db");
25505 let storage = FrankenStorage::open(&db_path).unwrap();
25506 let orphan_count = (ORPHAN_FK_ID_CHUNK_SIZE * 2) + 5;
25507
25508 storage.raw().execute("PRAGMA foreign_keys = OFF").unwrap();
25509 {
25510 let mut tx = storage.raw().transaction().unwrap();
25511 for idx in 0..orphan_count {
25512 let message_id = 50_000_i64 + i64::try_from(idx).unwrap();
25513 tx.execute_compat(
25514 "INSERT INTO message_metrics(
25515 message_id, created_at_ms, hour_id, day_id, agent_slug,
25516 role, content_chars, content_tokens_est
25517 ) VALUES(?1, 0, 0, 0, 'test-agent', 'user', 21, 3)",
25518 fparams![message_id],
25519 )
25520 .unwrap();
25521 }
25522 tx.commit().unwrap();
25523 }
25524 storage.raw().execute("PRAGMA foreign_keys = ON").unwrap();
25525
25526 let report = storage.cleanup_orphan_fk_rows().unwrap();
25527
25528 assert_eq!(report.total, i64::try_from(orphan_count).unwrap());
25529 let metrics_count = report
25530 .per_table
25531 .iter()
25532 .filter(|(table, _)| *table == "message_metrics")
25533 .map(|(_, count)| *count)
25534 .sum::<i64>();
25535 assert_eq!(metrics_count, i64::try_from(orphan_count).unwrap());
25536 assert_eq!(
25537 report
25538 .per_table
25539 .iter()
25540 .filter(|(table, _)| *table == "message_metrics")
25541 .count(),
25542 1,
25543 "paged cleanup should aggregate report entries by table: {report:?}"
25544 );
25545 let metrics_after: i64 = storage
25546 .raw()
25547 .query_row_map("SELECT COUNT(*) FROM message_metrics", fparams![], |row| {
25548 row.get_typed(0)
25549 })
25550 .unwrap();
25551 assert_eq!(metrics_after, 0);
25552 }
25553}