1use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole, Snippet};
4use crate::sources::provenance::{LOCAL_SOURCE_ID, Source, SourceKind};
5use anyhow::{Context, Result, anyhow, bail};
6use frankensqlite::{
7 Connection as FrankenConnection, Row as FrankenRow, SqliteValue,
8 compat::{
9 ConnectionExt as FrankenConnectionExt, OpenFlags as FrankenOpenFlags,
10 OptionalExtension as FrankenOptionalExtension, ParamValue, RowExt as FrankenRowExt,
11 Transaction as FrankenTransaction, TransactionExt as FrankenTransactionExt,
12 open_with_flags as open_franken_with_flags, param_slice_to_values, params_from_iter,
13 },
14 migrate::MigrationRunner,
15};
16use serde::{Deserialize, Serialize};
17use smallvec::SmallVec;
18use std::borrow::Cow;
19use std::collections::{HashMap, HashSet};
20use std::fs;
21use std::io::{BufRead, BufReader, Write};
22use std::process::{Command, Stdio};
23use std::sync::{
24 Arc,
25 atomic::{AtomicBool, AtomicI8, AtomicI64, AtomicU64, AtomicUsize, Ordering},
26};
27
28macro_rules! fparams {
30 () => {
31 &[] as &[ParamValue]
32 };
33 ($($val:expr),+ $(,)?) => {
34 &[$(ParamValue::from($val)),+] as &[ParamValue]
35 };
36}
37use std::path::{Path, PathBuf};
38use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
39use thiserror::Error;
40use tracing::info;
41
42const DOCTOR_MUTATION_DB_OPEN_LOCK_TIMEOUT: Duration = Duration::from_secs(30);
43const DOCTOR_MUTATION_LOCK_MAX_METADATA_READ: u64 = 64 * 1024;
44
45#[derive(Debug, Error)]
54pub enum LazyDbError {
55 #[error("Database not found at {0}")]
56 NotFound(PathBuf),
57 #[error("Failed to open FrankenSQLite database at {path}: {source}")]
58 FrankenOpenFailed {
59 path: PathBuf,
60 source: frankensqlite::FrankenError,
61 },
62}
63
64pub struct SendFrankenConnection(FrankenConnection, i64, u64);
75
76unsafe impl Send for SendFrankenConnection {}
79
80impl SendFrankenConnection {
81 pub(crate) fn new(conn: FrankenConnection) -> Self {
82 Self(
83 conn,
84 UNSET_INDEX_WRITER_CHECKPOINT_PAGES,
85 UNSET_INDEX_WRITER_BUSY_TIMEOUT_MS,
86 )
87 }
88
89 pub(crate) fn new_with_index_writer_state(
90 conn: FrankenConnection,
91 checkpoint_pages: i64,
92 busy_timeout_ms: u64,
93 ) -> Self {
94 Self(conn, checkpoint_pages, busy_timeout_ms)
95 }
96
97 pub(crate) fn into_parts(self) -> (FrankenConnection, i64, u64) {
98 (self.0, self.1, self.2)
99 }
100}
101
102impl std::ops::Deref for SendFrankenConnection {
103 type Target = FrankenConnection;
104 fn deref(&self) -> &FrankenConnection {
105 &self.0
106 }
107}
108
109pub struct LazyFrankenDb {
115 path: PathBuf,
116 conn: parking_lot::Mutex<Option<SendFrankenConnection>>,
117}
118
119pub struct LazyFrankenDbGuard<'a>(parking_lot::MutexGuard<'a, Option<SendFrankenConnection>>);
121
122impl std::fmt::Debug for LazyFrankenDbGuard<'_> {
123 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
124 f.debug_tuple("LazyFrankenDbGuard")
125 .field(&self.0.is_some())
126 .finish()
127 }
128}
129
130impl std::ops::Deref for LazyFrankenDbGuard<'_> {
131 type Target = FrankenConnection;
132 fn deref(&self) -> &FrankenConnection {
133 self.0
134 .as_ref()
135 .expect("LazyFrankenDb connection must be initialized before access")
136 }
137}
138
139impl LazyFrankenDb {
140 pub fn new(path: PathBuf) -> Self {
142 Self {
143 path,
144 conn: parking_lot::Mutex::new(None),
145 }
146 }
147
148 pub fn from_overrides(data_dir: &Option<PathBuf>, db_override: Option<PathBuf>) -> Self {
152 let data_dir = data_dir.clone().unwrap_or_else(crate::default_data_dir);
153 let path = db_override.unwrap_or_else(|| data_dir.join("agent_search.db"));
154 Self::new(path)
155 }
156
157 pub fn get(&self, reason: &str) -> std::result::Result<LazyFrankenDbGuard<'_>, LazyDbError> {
162 let mut guard = self.conn.lock();
163 if guard.is_none() {
164 if !self.path.exists() {
165 return Err(LazyDbError::NotFound(self.path.clone()));
166 }
167 let start = Instant::now();
168 let _doctor_guard = acquire_doctor_mutation_db_open_guard(
169 &self.path,
170 DOCTOR_MUTATION_DB_OPEN_LOCK_TIMEOUT,
171 )
172 .map_err(|err| LazyDbError::FrankenOpenFailed {
173 path: self.path.clone(),
174 source: frankensqlite::FrankenError::Internal(err.to_string()),
175 })?;
176 let conn =
177 FrankenConnection::open(self.path.to_string_lossy().into_owned()).map_err(|e| {
178 LazyDbError::FrankenOpenFailed {
179 path: self.path.clone(),
180 source: e,
181 }
182 })?;
183 let elapsed_ms = start.elapsed().as_millis();
184 info!(
185 path = %self.path.display(),
186 elapsed_ms = elapsed_ms,
187 reason = reason,
188 "lazily opened FrankenSQLite database"
189 );
190 *guard = Some(SendFrankenConnection::new(conn));
191 }
192 Ok(LazyFrankenDbGuard(guard))
193 }
194
195 pub fn get_with_timeout(
201 &self,
202 reason: &str,
203 timeout: Duration,
204 ) -> std::result::Result<LazyFrankenDbGuard<'_>, LazyDbError> {
205 let mut guard = self.conn.lock();
206 if guard.is_none() {
207 if !self.path.exists() {
208 return Err(LazyDbError::NotFound(self.path.clone()));
209 }
210 let start = Instant::now();
211 let path_owned = self.path.to_string_lossy().into_owned();
212 let path_for_guard = self.path.clone();
213 let (tx, rx) = std::sync::mpsc::channel();
214 std::thread::spawn(move || {
215 let _doctor_guard =
216 match acquire_doctor_mutation_db_open_guard(&path_for_guard, timeout) {
217 Ok(guard) => guard,
218 Err(err) => {
219 let _ = tx
220 .send(Err(frankensqlite::FrankenError::Internal(err.to_string())));
221 return;
222 }
223 };
224 let _ =
225 tx.send(FrankenConnection::open(path_owned).map(SendFrankenConnection::new));
226 });
227 let conn = rx
228 .recv_timeout(timeout)
229 .map_err(|_| LazyDbError::FrankenOpenFailed {
230 path: self.path.clone(),
231 source: frankensqlite::FrankenError::Internal(format!(
232 "database open timed out after {}s (possible corruption or lock contention)",
233 timeout.as_secs()
234 )),
235 })?
236 .map_err(|e| LazyDbError::FrankenOpenFailed {
237 path: self.path.clone(),
238 source: e,
239 })?;
240 let elapsed_ms = start.elapsed().as_millis();
241 info!(
242 path = %self.path.display(),
243 elapsed_ms = elapsed_ms,
244 reason = reason,
245 "lazily opened FrankenSQLite database (with timeout)"
246 );
247 *guard = Some(conn);
248 }
249 Ok(LazyFrankenDbGuard(guard))
250 }
251
252 pub fn path(&self) -> &Path {
254 &self.path
255 }
256
257 pub fn is_open(&self) -> bool {
259 self.conn.lock().is_some()
260 }
261}
262
263static FRANKEN_RETRY_JITTER_STATE: AtomicU64 = AtomicU64::new(0x9e37_79b9_7f4a_7c15);
264static DOCTOR_MUTATION_DB_OPEN_BYPASS_DEPTH: AtomicUsize = AtomicUsize::new(0);
265static MESSAGE_LOOKUP_TRACE_ENABLED: AtomicBool = AtomicBool::new(false);
266static MESSAGE_LOOKUP_EXACT_IDX_PROBES: AtomicU64 = AtomicU64::new(0);
267static MESSAGE_LOOKUP_BOUNDED_QUERIES: AtomicU64 = AtomicU64::new(0);
268static MESSAGE_LOOKUP_FULL_SCAN_QUERIES: AtomicU64 = AtomicU64::new(0);
269static MESSAGE_LOOKUP_ROWS_MATERIALIZED: AtomicU64 = AtomicU64::new(0);
270
271#[derive(Debug, Clone, Copy, Default, Serialize)]
272pub(crate) struct MessageLookupTraceCounters {
273 pub exact_idx_probes: u64,
274 pub bounded_lookup_queries: u64,
275 pub full_scan_queries: u64,
276 pub rows_materialized: u64,
277}
278
279impl MessageLookupTraceCounters {
280 pub(crate) fn saturating_sub(self, before: Self) -> Self {
281 Self {
282 exact_idx_probes: self
283 .exact_idx_probes
284 .saturating_sub(before.exact_idx_probes),
285 bounded_lookup_queries: self
286 .bounded_lookup_queries
287 .saturating_sub(before.bounded_lookup_queries),
288 full_scan_queries: self
289 .full_scan_queries
290 .saturating_sub(before.full_scan_queries),
291 rows_materialized: self
292 .rows_materialized
293 .saturating_sub(before.rows_materialized),
294 }
295 }
296
297 pub(crate) fn lookups_against_global(self) -> u64 {
298 self.exact_idx_probes.saturating_add(self.rows_materialized)
299 }
300}
301
302pub(crate) fn set_message_lookup_trace_enabled(enabled: bool) -> bool {
303 MESSAGE_LOOKUP_TRACE_ENABLED.swap(enabled, Ordering::Relaxed)
304}
305
306pub(crate) fn message_lookup_trace_snapshot() -> MessageLookupTraceCounters {
307 MessageLookupTraceCounters {
308 exact_idx_probes: MESSAGE_LOOKUP_EXACT_IDX_PROBES.load(Ordering::Relaxed),
309 bounded_lookup_queries: MESSAGE_LOOKUP_BOUNDED_QUERIES.load(Ordering::Relaxed),
310 full_scan_queries: MESSAGE_LOOKUP_FULL_SCAN_QUERIES.load(Ordering::Relaxed),
311 rows_materialized: MESSAGE_LOOKUP_ROWS_MATERIALIZED.load(Ordering::Relaxed),
312 }
313}
314
315fn record_message_lookup_exact_idx_probe() {
316 if MESSAGE_LOOKUP_TRACE_ENABLED.load(Ordering::Relaxed) {
317 MESSAGE_LOOKUP_EXACT_IDX_PROBES.fetch_add(1, Ordering::Relaxed);
318 }
319}
320
321fn record_message_lookup_bounded_queries(query_count: u64, rows: usize) {
322 if MESSAGE_LOOKUP_TRACE_ENABLED.load(Ordering::Relaxed) {
323 MESSAGE_LOOKUP_BOUNDED_QUERIES.fetch_add(query_count, Ordering::Relaxed);
324 MESSAGE_LOOKUP_ROWS_MATERIALIZED.fetch_add(rows as u64, Ordering::Relaxed);
325 }
326}
327
328fn record_message_lookup_full_scan_query(rows: usize) {
329 if MESSAGE_LOOKUP_TRACE_ENABLED.load(Ordering::Relaxed) {
330 MESSAGE_LOOKUP_FULL_SCAN_QUERIES.fetch_add(1, Ordering::Relaxed);
331 MESSAGE_LOOKUP_ROWS_MATERIALIZED.fetch_add(rows as u64, Ordering::Relaxed);
332 }
333}
334
335pub(crate) struct DoctorMutationDbOpenBypassGuard;
336
337impl Drop for DoctorMutationDbOpenBypassGuard {
338 fn drop(&mut self) {
339 DOCTOR_MUTATION_DB_OPEN_BYPASS_DEPTH.fetch_sub(1, Ordering::SeqCst);
340 }
341}
342
343pub(crate) fn enter_doctor_mutation_db_open_bypass() -> DoctorMutationDbOpenBypassGuard {
344 DOCTOR_MUTATION_DB_OPEN_BYPASS_DEPTH.fetch_add(1, Ordering::SeqCst);
345 DoctorMutationDbOpenBypassGuard
346}
347
348fn doctor_mutation_db_open_bypass_active() -> bool {
349 DOCTOR_MUTATION_DB_OPEN_BYPASS_DEPTH.load(Ordering::SeqCst) > 0
350}
351
352fn next_franken_retry_jitter_ms(max_inclusive: u64) -> u64 {
353 let mut value = FRANKEN_RETRY_JITTER_STATE.fetch_add(0x9e37_79b9_7f4a_7c15, Ordering::Relaxed);
354 value ^= value >> 30;
355 value = value.wrapping_mul(0xbf58_476d_1ce4_e5b9);
356 value ^= value >> 27;
357 value = value.wrapping_mul(0x94d0_49bb_1331_11eb);
358 value ^= value >> 31;
359 value % max_inclusive.saturating_add(1)
360}
361
362pub(crate) fn sleep_with_franken_retry_backoff(
365 backoff: &mut Duration,
366 remaining: Duration,
367 max_backoff: Duration,
368) {
369 let capped = (*backoff).min(remaining);
370 let extra_budget = remaining.saturating_sub(capped).min(capped);
371 let extra_ms = extra_budget.as_millis().min(u128::from(u64::MAX)) as u64;
372 let sleep_for = if extra_ms == 0 {
373 capped
374 } else {
375 capped
376 .saturating_add(Duration::from_millis(next_franken_retry_jitter_ms(
377 extra_ms,
378 )))
379 .min(remaining)
380 };
381 std::thread::sleep(sleep_for);
382 *backoff = backoff.saturating_mul(2).min(max_backoff);
383}
384
385struct DoctorMutationDbOpenGuard(Option<fs::File>);
386
387impl Drop for DoctorMutationDbOpenGuard {
388 fn drop(&mut self) {
389 if let Some(file) = self.0.as_ref() {
390 let _ = fs2::FileExt::unlock(file);
391 }
392 }
393}
394
395fn doctor_mutation_lock_path_for_db_open(db_path: &Path) -> Option<PathBuf> {
396 if db_path.file_name().and_then(|name| name.to_str()) != Some("agent_search.db") {
397 return None;
398 }
399
400 Some(
401 db_path
402 .parent()?
403 .join("doctor")
404 .join("locks")
405 .join("doctor-repair.lock"),
406 )
407}
408
409fn doctor_lock_metadata_pid_is_current_process(raw: &str) -> bool {
410 raw.lines().any(|line| {
411 let Some((key, value)) = line.split_once('=') else {
412 return false;
413 };
414 key.trim() == "pid"
415 && value
416 .trim()
417 .parse::<u32>()
418 .is_ok_and(|pid| pid == std::process::id())
419 })
420}
421
422fn doctor_lock_file_pid_is_current_process(file: &fs::File) -> bool {
423 use std::io::Read as _;
424
425 let Ok(mut file) = file.try_clone() else {
426 return false;
427 };
428 let mut raw = String::new();
429 let _ = std::io::Read::take(&mut file, DOCTOR_MUTATION_LOCK_MAX_METADATA_READ)
430 .read_to_string(&mut raw);
431 doctor_lock_metadata_pid_is_current_process(&raw)
432}
433
434fn doctor_mutation_lock_error_is_active(err: &std::io::Error) -> bool {
435 if err.kind() == std::io::ErrorKind::WouldBlock {
436 return true;
437 }
438
439 #[cfg(windows)]
440 {
441 err.raw_os_error() == Some(33)
442 }
443 #[cfg(not(windows))]
444 {
445 false
446 }
447}
448
449fn acquire_doctor_mutation_db_open_guard(
450 db_path: &Path,
451 timeout: Duration,
452) -> Result<DoctorMutationDbOpenGuard> {
453 let Some(lock_path) = doctor_mutation_lock_path_for_db_open(db_path) else {
454 return Ok(DoctorMutationDbOpenGuard(None));
455 };
456 if doctor_mutation_db_open_bypass_active() {
457 return Ok(DoctorMutationDbOpenGuard(None));
458 }
459
460 if let Some(parent) = lock_path.parent() {
461 fs::create_dir_all(parent).with_context(|| {
462 format!(
463 "creating doctor mutation lock directory {} before opening {}",
464 parent.display(),
465 db_path.display()
466 )
467 })?;
468 }
469
470 let deadline = Instant::now() + timeout;
471 let mut backoff = Duration::from_millis(4);
472 loop {
473 let file = fs::OpenOptions::new()
474 .create(true)
475 .truncate(false)
476 .read(true)
477 .write(true)
478 .open(&lock_path)
479 .with_context(|| {
480 format!(
481 "opening doctor mutation lock {} before opening {}",
482 lock_path.display(),
483 db_path.display()
484 )
485 })?;
486
487 if doctor_lock_file_pid_is_current_process(&file) {
488 return Ok(DoctorMutationDbOpenGuard(None));
489 }
490
491 match fs2::FileExt::try_lock_shared(&file) {
492 Ok(()) => return Ok(DoctorMutationDbOpenGuard(Some(file))),
493 Err(err) if doctor_mutation_lock_error_is_active(&err) => {
494 let now = Instant::now();
495 if now >= deadline {
496 return Err(anyhow!(
497 "doctor mutation lock {} is active while opening {}; refusing to open during repair after waiting {}ms",
498 lock_path.display(),
499 db_path.display(),
500 timeout.as_millis()
501 ));
502 }
503 let remaining = deadline.saturating_duration_since(now);
504 sleep_with_franken_retry_backoff(
505 &mut backoff,
506 remaining,
507 Duration::from_millis(128),
508 );
509 }
510 Err(err) => {
511 return Err(anyhow!(
512 "failed to acquire shared doctor mutation lock {} before opening {}: {}",
513 lock_path.display(),
514 db_path.display(),
515 err
516 ));
517 }
518 }
519 }
520}
521
522pub(crate) fn open_franken_storage_with_timeout(
523 path: &Path,
524 timeout: Duration,
525) -> Result<FrankenStorage> {
526 if !path.exists() {
527 return Err(anyhow!("Database not found at {}", path.display()));
528 }
529
530 let deadline = Instant::now() + timeout;
531 let mut backoff = Duration::from_millis(4);
532 loop {
533 match FrankenStorage::open(path) {
534 Ok(storage) => return Ok(storage),
535 Err(err) if retryable_franken_anyhow(&err) => {
536 let now = Instant::now();
537 if now >= deadline {
538 return Err(err);
539 }
540 let remaining = deadline.saturating_duration_since(now);
541 sleep_with_franken_retry_backoff(
542 &mut backoff,
543 remaining,
544 Duration::from_millis(128),
545 );
546 }
547 Err(err) => return Err(err),
548 }
549 }
550}
551
552pub(crate) fn open_current_schema_storage_with_timeout(
553 path: &Path,
554 timeout: Duration,
555) -> Result<Option<FrankenStorage>> {
556 if !path.exists() {
557 return Ok(None);
558 }
559
560 let mut storage = FrankenStorage::new(
561 open_franken_raw_connection_with_timeout(path, timeout)?,
562 path.to_path_buf(),
563 );
564 storage.apply_open_stage_busy_timeout();
565
566 let version = storage
567 .raw()
568 .query("SELECT value FROM meta WHERE key = 'schema_version';")
569 .ok()
570 .and_then(|rows| rows.first().cloned())
571 .and_then(|row| row.get_typed::<String>(0).ok())
572 .and_then(|raw| raw.parse::<i64>().ok());
573
574 if version != Some(CURRENT_SCHEMA_VERSION) {
575 if let Err(close_err) = storage.close_without_checkpoint_in_place() {
576 tracing::debug!(
577 error = %close_err,
578 db_path = %path.display(),
579 "open_current_schema_storage_with_timeout: close_without_checkpoint_in_place failed; falling back to best-effort close"
580 );
581 storage.close_best_effort_in_place();
582 }
583 return Ok(None);
584 }
585
586 transition_from_meta_version(&storage.conn)?;
587 storage.repair_missing_current_schema_objects()?;
588 storage.apply_config()?;
589 Ok(Some(storage))
590}
591
592pub(crate) fn open_franken_readonly_storage_with_timeout(
593 path: &Path,
594 timeout: Duration,
595) -> Result<FrankenStorage> {
596 if !path.exists() {
597 return Err(anyhow!("Database not found at {}", path.display()));
598 }
599
600 let deadline = Instant::now() + timeout;
601 let mut backoff = Duration::from_millis(4);
602 loop {
603 match FrankenStorage::open_readonly(path) {
604 Ok(storage) => return Ok(storage),
605 Err(err) if retryable_franken_anyhow(&err) => {
606 let now = Instant::now();
607 if now >= deadline {
608 return Err(err);
609 }
610 let remaining = deadline.saturating_duration_since(now);
611 sleep_with_franken_retry_backoff(
612 &mut backoff,
613 remaining,
614 Duration::from_millis(128),
615 );
616 }
617 Err(err) => return Err(err),
618 }
619 }
620}
621
622pub(crate) fn open_franken_raw_connection_with_timeout(
623 path: &Path,
624 timeout: Duration,
625) -> Result<FrankenConnection> {
626 if !path.exists() {
627 return Err(anyhow!("Database not found at {}", path.display()));
628 }
629
630 let path_str = path.to_string_lossy().to_string();
631 let deadline = Instant::now() + timeout;
632 let mut backoff = Duration::from_millis(4);
633 loop {
634 let _doctor_guard = acquire_doctor_mutation_db_open_guard(path, timeout)?;
635 match FrankenConnection::open(&path_str)
636 .with_context(|| format!("opening raw frankensqlite db at {}", path.display()))
637 {
638 Ok(conn) => return Ok(conn),
639 Err(err) if retryable_franken_anyhow(&err) => {
640 let now = Instant::now();
641 if now >= deadline {
642 return Err(err);
643 }
644 let remaining = deadline.saturating_duration_since(now);
645 sleep_with_franken_retry_backoff(
646 &mut backoff,
647 remaining,
648 Duration::from_millis(128),
649 );
650 }
651 Err(err) => return Err(err),
652 }
653 }
654}
655
656pub(crate) fn open_franken_raw_readonly_connection_with_timeout(
657 path: &Path,
658 timeout: Duration,
659) -> Result<FrankenConnection> {
660 if !path.exists() {
661 return Err(anyhow!("Database not found at {}", path.display()));
662 }
663
664 let path_str = path.to_string_lossy().to_string();
665 let deadline = Instant::now() + timeout;
666 let mut backoff = Duration::from_millis(4);
667 loop {
668 let _doctor_guard = acquire_doctor_mutation_db_open_guard(path, timeout)?;
669 match open_franken_with_flags(&path_str, FrankenOpenFlags::SQLITE_OPEN_READ_ONLY)
670 .with_context(|| {
671 format!(
672 "opening raw frankensqlite db readonly at {}",
673 path.display()
674 )
675 }) {
676 Ok(conn) => return Ok(conn),
677 Err(err) if retryable_franken_anyhow(&err) => {
678 let now = Instant::now();
679 if now >= deadline {
680 return Err(err);
681 }
682 let remaining = deadline.saturating_duration_since(now);
683 sleep_with_franken_retry_backoff(
684 &mut backoff,
685 remaining,
686 Duration::from_millis(128),
687 );
688 }
689 Err(err) => return Err(err),
690 }
691 }
692}
693
694pub(crate) fn retryable_franken_error(err: &frankensqlite::FrankenError) -> bool {
695 matches!(
696 err,
697 frankensqlite::FrankenError::Busy
698 | frankensqlite::FrankenError::BusyRecovery
699 | frankensqlite::FrankenError::BusySnapshot { .. }
700 | frankensqlite::FrankenError::DatabaseLocked { .. }
701 | frankensqlite::FrankenError::LockFailed { .. }
702 | frankensqlite::FrankenError::WriteConflict { .. }
703 | frankensqlite::FrankenError::SerializationFailure { .. }
704 ) || retryable_storage_error_message(&err.to_string())
705}
706
707pub(crate) fn retryable_storage_error_message(message: &str) -> bool {
708 let lower = message.to_ascii_lowercase();
709 lower.contains("busy")
710 || lower.contains("locked")
711 || lower.contains("locking")
712 || lower.contains("contention")
713 || lower.contains("temporarily unavailable")
714 || lower.contains("would block")
715}
716
717pub(crate) fn retryable_franken_anyhow(err: &anyhow::Error) -> bool {
718 err.chain().any(|cause| {
719 cause
720 .downcast_ref::<frankensqlite::FrankenError>()
721 .is_some_and(retryable_franken_error)
722 || retryable_storage_error_message(&cause.to_string())
723 })
724}
725
726impl Drop for LazyFrankenDb {
727 fn drop(&mut self) {
728 let Some(mut conn) = self.conn.get_mut().take() else {
729 return;
730 };
731 conn.0.close_best_effort_in_place();
732 }
733}
734
735#[derive(Debug, Clone)]
744pub struct ConnectionManagerConfig {
745 pub reader_count: usize,
747 pub max_writers: usize,
749}
750
751impl Default for ConnectionManagerConfig {
752 fn default() -> Self {
753 let cpus = std::thread::available_parallelism()
754 .map(|n| n.get())
755 .unwrap_or(4);
756 Self {
757 reader_count: 4,
758 max_writers: cpus,
759 }
760 }
761}
762
763pub struct FrankenConnectionManager {
773 db_path: PathBuf,
774 readers: Vec<parking_lot::Mutex<SendFrankenConnection>>,
775 reader_idx: std::sync::atomic::AtomicUsize,
776 writer_tokens: (
779 crossbeam_channel::Sender<()>,
780 crossbeam_channel::Receiver<()>,
781 ),
782 config: ConnectionManagerConfig,
783}
784
785unsafe impl Send for FrankenConnectionManager {}
790unsafe impl Sync for FrankenConnectionManager {}
791
792impl FrankenConnectionManager {
793 pub fn new(db_path: impl Into<PathBuf>, config: ConnectionManagerConfig) -> Result<Self> {
798 let db_path = db_path.into();
799 let path_str = db_path.to_string_lossy().to_string();
800
801 let reader_count = config.reader_count.max(1);
802 let mut readers = Vec::with_capacity(reader_count);
803 for _ in 0..reader_count {
804 let conn = FrankenConnection::open(&path_str)
805 .with_context(|| format!("opening reader connection at {}", db_path.display()))?;
806 let _ = conn.execute("PRAGMA busy_timeout = 5000;"); let _ = conn.execute("PRAGMA cache_size = -16384;"); readers.push(parking_lot::Mutex::new(SendFrankenConnection::new(conn)));
810 }
811
812 let max_writers = config.max_writers.max(1);
813
814 let (tx, rx) = crossbeam_channel::bounded(max_writers);
818 for _ in 0..max_writers {
819 tx.send(())
820 .map_err(|_| anyhow!("writer token channel closed during initialization"))?;
821 }
822
823 Ok(Self {
824 db_path,
825 readers,
826 reader_idx: std::sync::atomic::AtomicUsize::new(0),
827 writer_tokens: (tx, rx),
828 config: ConnectionManagerConfig {
829 reader_count,
830 max_writers,
831 },
832 })
833 }
834
835 pub fn reader(&self) -> parking_lot::MutexGuard<'_, SendFrankenConnection> {
840 let idx = self
841 .reader_idx
842 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
843 self.readers[idx % self.readers.len()].lock()
844 }
845
846 pub fn writer(&self) -> Result<WriterGuard<'_>> {
852 self.writer_tokens
853 .1
854 .recv()
855 .map_err(|_| anyhow!("writer token channel closed"))?;
856 let path_str = self.db_path.to_string_lossy().to_string();
857 let conn = match FrankenConnection::open(&path_str) {
858 Ok(c) => c,
859 Err(e) => {
860 let _ = self.writer_tokens.0.send(());
861 return Err(anyhow::Error::from(e).context(format!(
862 "opening writer connection at {}",
863 self.db_path.display()
864 )));
865 }
866 };
867 let storage = FrankenStorage::new(conn, self.db_path.clone());
868 if let Err(e) = storage.apply_config() {
869 let _ = self.writer_tokens.0.send(());
870 return Err(e);
871 }
872 Ok(WriterGuard {
873 storage,
874 mgr: self,
875 committed: false,
876 })
877 }
878
879 pub fn concurrent_writer(&self) -> Result<WriterGuard<'_>> {
884 self.writer_tokens
885 .1
886 .recv()
887 .map_err(|_| anyhow!("writer token channel closed"))?;
888 let path_str = self.db_path.to_string_lossy().to_string();
889 let conn = match FrankenConnection::open(&path_str) {
890 Ok(c) => c,
891 Err(e) => {
892 let _ = self.writer_tokens.0.send(());
893 return Err(anyhow::Error::from(e).context(format!(
894 "opening concurrent writer at {}",
895 self.db_path.display()
896 )));
897 }
898 };
899 let storage = FrankenStorage::new(conn, self.db_path.clone());
900 if let Err(e) = storage.apply_config() {
901 let _ = self.writer_tokens.0.send(());
902 return Err(e);
903 }
904 let _ = storage.raw().execute("PRAGMA cache_size = -4096;");
906 Ok(WriterGuard {
907 storage,
908 mgr: self,
909 committed: false,
910 })
911 }
912
913 pub fn db_path(&self) -> &Path {
915 &self.db_path
916 }
917
918 pub fn reader_count(&self) -> usize {
920 self.readers.len()
921 }
922
923 pub fn max_writers(&self) -> usize {
925 self.config.max_writers
926 }
927}
928
929impl Drop for FrankenConnectionManager {
930 fn drop(&mut self) {
931 for reader in &mut self.readers {
932 reader.get_mut().0.close_best_effort_in_place();
933 }
934 }
935}
936
937pub struct WriterGuard<'a> {
942 storage: FrankenStorage,
943 mgr: &'a FrankenConnectionManager,
944 committed: bool,
945}
946
947impl<'a> WriterGuard<'a> {
948 pub fn storage(&self) -> &FrankenStorage {
950 &self.storage
951 }
952
953 pub fn mark_committed(&mut self) {
958 self.committed = true;
959 }
960}
961
962impl Drop for WriterGuard<'_> {
963 fn drop(&mut self) {
964 if !self.committed {
965 let _ = self.storage.raw().execute("ROLLBACK;");
967 }
968 self.storage.close_best_effort_in_place();
969 let _ = self.mgr.writer_tokens.0.send(());
971 }
972}
973
974fn serialize_json_to_msgpack(value: &serde_json::Value) -> Option<Vec<u8>> {
983 if value.is_null() || value.as_object().is_some_and(|o| o.is_empty()) {
984 return None;
985 }
986 rmp_serde::to_vec(value).ok()
987}
988
989fn deserialize_msgpack_to_json(bytes: &[u8]) -> serde_json::Value {
992 if bytes.is_empty() {
993 return serde_json::Value::Object(serde_json::Map::new());
994 }
995 rmp_serde::from_slice(bytes).unwrap_or_else(|e| {
996 tracing::debug!(
997 error = %e,
998 bytes_len = bytes.len(),
999 "Failed to deserialize metadata - returning empty object"
1000 );
1001 serde_json::Value::Object(serde_json::Map::new())
1002 })
1003}
1004
1005fn franken_read_metadata_compat(
1007 row: &FrankenRow,
1008 json_idx: usize,
1009 bin_idx: usize,
1010) -> serde_json::Value {
1011 if let Ok(Some(bytes)) = row.get_typed::<Option<Vec<u8>>>(bin_idx)
1013 && !bytes.is_empty()
1014 {
1015 return deserialize_msgpack_to_json(&bytes);
1016 }
1017
1018 if let Ok(Some(json_str)) = row.get_typed::<Option<String>>(json_idx) {
1020 return serde_json::from_str(&json_str)
1021 .unwrap_or_else(|_| serde_json::Value::Object(serde_json::Map::new()));
1022 }
1023
1024 serde_json::Value::Object(serde_json::Map::new())
1025}
1026
1027fn franken_read_message_extra_compat(
1028 row: &FrankenRow,
1029 json_idx: usize,
1030 bin_idx: usize,
1031) -> serde_json::Value {
1032 if let Ok(Some(bytes)) = row.get_typed::<Option<Vec<u8>>>(bin_idx)
1033 && !bytes.is_empty()
1034 {
1035 return deserialize_msgpack_to_json(&bytes);
1036 }
1037
1038 if let Ok(Some(json_str)) = row.get_typed::<Option<String>>(json_idx) {
1039 return serde_json::from_str(&json_str).unwrap_or(serde_json::Value::Null);
1040 }
1041
1042 serde_json::Value::Null
1043}
1044
1045#[derive(Debug, Error)]
1051pub enum MigrationError {
1052 #[error("Rebuild required: {reason}")]
1054 RebuildRequired {
1055 reason: String,
1056 backup_path: Option<std::path::PathBuf>,
1057 },
1058
1059 #[error("Database error: {0}")]
1061 Database(#[from] frankensqlite::FrankenError),
1062
1063 #[error("I/O error: {0}")]
1065 Io(#[from] std::io::Error),
1066
1067 #[error("{0}")]
1069 Other(String),
1070}
1071
1072impl From<anyhow::Error> for MigrationError {
1073 fn from(e: anyhow::Error) -> Self {
1074 MigrationError::Other(e.to_string())
1075 }
1076}
1077
1078const MAX_BACKUPS: usize = 3;
1080const BACKUP_VACUUM_BUSY_TIMEOUT_PRAGMA: &str = "PRAGMA busy_timeout = 30000;";
1081
1082const USER_DATA_FILES: &[&str] = &["bookmarks.db", "tui_state.json", "sources.toml", ".env"];
1084
1085pub fn is_user_data_file(path: &Path) -> bool {
1087 path.file_name()
1088 .and_then(|n| n.to_str())
1089 .map(|name| USER_DATA_FILES.contains(&name))
1090 .unwrap_or(false)
1091}
1092
1093pub const FTS5_REGISTER_SQL: &str = "\
1100 CREATE VIRTUAL TABLE IF NOT EXISTS fts_messages USING fts5(\
1101 content, title, agent, workspace, source_path, \
1102 created_at UNINDEXED, \
1103 content='', tokenize='porter'\
1104 )";
1105
1106const FTS_FRANKEN_REBUILD_META_KEY: &str = "fts_frankensqlite_rebuild_generation";
1107const FTS_FRANKEN_REBUILD_FINGERPRINT_META_KEY: &str = "fts_frankensqlite_archive_fingerprint";
1108const FTS_FRANKEN_REBUILD_GENERATION: i64 = 1;
1109const DAILY_STATS_HEALTH_META_KEY: &str = "daily_stats_archive_fingerprint";
1110const DAILY_STATS_HEALTH_GENERATION_META_KEY: &str = "daily_stats_health_generation";
1111const DAILY_STATS_HEALTH_GENERATION: i64 = 1;
1112
1113pub const FTS5_DELETE_ALL_SQL: &str =
1117 "INSERT INTO fts_messages(fts_messages) VALUES('delete-all');";
1118
1119pub const FTS_MESSAGES_REQUIRED_SHADOW_TABLES: [&str; 5] = [
1120 "fts_messages_config",
1121 "fts_messages_content",
1122 "fts_messages_data",
1123 "fts_messages_docsize",
1124 "fts_messages_idx",
1125];
1126
1127pub const FTS_MESSAGES_INTEGRITY_PROBE_SQL: &str = "SELECT * FROM fts_messages LIMIT 0";
1128
1129pub const FTS_MESSAGES_CORRUPTION_RECOVERY_HINT: &str = "Stop all cass index/watch processes, back up the current database, then run \
1130 'cass doctor check --json' for a read-only diagnosis before using a supported \
1131 repair/rebuild path.";
1132
1133#[derive(Debug, Clone, PartialEq, Eq)]
1134pub struct FtsMessagesIntegrityError {
1135 missing_shadow_tables: Vec<&'static str>,
1136 failed_sql: Option<&'static str>,
1137 source_error: Option<String>,
1138}
1139
1140impl FtsMessagesIntegrityError {
1141 fn new(
1142 missing_shadow_tables: Vec<&'static str>,
1143 failed_sql: Option<&'static str>,
1144 source_error: Option<String>,
1145 ) -> Self {
1146 Self {
1147 missing_shadow_tables,
1148 failed_sql,
1149 source_error,
1150 }
1151 }
1152
1153 pub fn missing_shadow_tables(&self) -> &[&'static str] {
1154 &self.missing_shadow_tables
1155 }
1156}
1157
1158impl std::fmt::Display for FtsMessagesIntegrityError {
1159 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1160 write!(
1161 f,
1162 "CASS database FTS5 index is corrupt: fts_messages exists, but required FTS5 shadow tables are missing or unreadable"
1163 )?;
1164 if !self.missing_shadow_tables.is_empty() {
1165 write!(
1166 f,
1167 "; missing shadow tables: {}",
1168 self.missing_shadow_tables.join(", ")
1169 )?;
1170 }
1171 if let Some(sql) = self.failed_sql {
1172 write!(f, "; failed SQL: {sql}")?;
1173 }
1174 if let Some(source_error) = &self.source_error {
1175 write!(f, "; error: {source_error}")?;
1176 }
1177 write!(
1178 f,
1179 ". Suggested recovery: {FTS_MESSAGES_CORRUPTION_RECOVERY_HINT}"
1180 )
1181 }
1182}
1183
1184impl std::error::Error for FtsMessagesIntegrityError {}
1185
1186pub fn fts_messages_integrity_error_from_message(
1187 source_error: impl Into<String>,
1188) -> Option<FtsMessagesIntegrityError> {
1189 let source_error = source_error.into();
1190 let lower = source_error.to_ascii_lowercase();
1191 if !lower.contains("fts_messages") {
1192 return None;
1193 }
1194
1195 let mentions_structural_fts_failure = lower.contains("shadow table")
1196 || lower.contains("vtable constructor failed")
1197 || lower.contains("sqlite_corrupt")
1198 || lower.contains("databasecorrupt")
1199 || lower.contains("database corrupt")
1200 || lower.contains("missing required");
1201 if !mentions_structural_fts_failure {
1202 return None;
1203 }
1204
1205 let missing_shadow_tables = FTS_MESSAGES_REQUIRED_SHADOW_TABLES
1206 .iter()
1207 .copied()
1208 .filter(|table| lower.contains(&table.to_ascii_lowercase()))
1209 .collect::<Vec<_>>();
1210
1211 Some(FtsMessagesIntegrityError::new(
1212 missing_shadow_tables,
1213 Some(FTS_MESSAGES_INTEGRITY_PROBE_SQL),
1214 Some(source_error),
1215 ))
1216}
1217
1218fn fts_schema_tolerates_missing_shadow_metadata(sql: &str) -> bool {
1219 let normalized = sql
1220 .chars()
1221 .filter(|ch| !ch.is_whitespace())
1222 .collect::<String>()
1223 .to_ascii_lowercase();
1224 normalized.contains("usingfts5(")
1225 && normalized.contains("content=''")
1226 && !normalized.contains("message_id")
1227}
1228
1229pub fn validate_fts_messages_integrity_for_connection(conn: &FrankenConnection) -> Result<()> {
1230 let fts_schema_sql: Vec<String> = conn
1231 .query_map_collect(
1232 "SELECT sql FROM sqlite_master WHERE type = 'table' AND name = 'fts_messages'",
1233 fparams![],
1234 |row: &FrankenRow| row.get_typed::<String>(0),
1235 )
1236 .with_context(|| "checking for fts_messages in sqlite_master")?;
1237 if fts_schema_sql.is_empty() {
1238 return Ok(());
1239 }
1240
1241 let probe_error = conn.query(FTS_MESSAGES_INTEGRITY_PROBE_SQL).err();
1242 if probe_error.is_none()
1243 && fts_schema_sql
1244 .iter()
1245 .all(|sql| fts_schema_tolerates_missing_shadow_metadata(sql))
1246 {
1247 return Ok(());
1248 }
1249
1250 let present_shadow_tables: HashSet<String> = conn
1251 .query_map_collect(
1252 "SELECT name FROM sqlite_master
1253 WHERE type = 'table'
1254 AND name IN (
1255 'fts_messages_config',
1256 'fts_messages_content',
1257 'fts_messages_data',
1258 'fts_messages_docsize',
1259 'fts_messages_idx'
1260 )",
1261 fparams![],
1262 |row: &FrankenRow| row.get_typed::<String>(0),
1263 )
1264 .map(|rows| rows.into_iter().collect())
1265 .map_err(|err| {
1266 FtsMessagesIntegrityError::new(
1267 Vec::new(),
1268 Some(
1269 "SELECT name FROM sqlite_master WHERE name IN \
1270 ('fts_messages_config','fts_messages_content','fts_messages_data','fts_messages_docsize','fts_messages_idx')",
1271 ),
1272 Some(err.to_string()),
1273 )
1274 })?;
1275 let missing_shadow_tables = FTS_MESSAGES_REQUIRED_SHADOW_TABLES
1276 .iter()
1277 .copied()
1278 .filter(|table| !present_shadow_tables.contains(*table))
1279 .collect::<Vec<_>>();
1280
1281 if missing_shadow_tables.is_empty() {
1290 return Ok(());
1291 }
1292
1293 Err(FtsMessagesIntegrityError::new(
1294 missing_shadow_tables,
1295 probe_error
1296 .as_ref()
1297 .map(|_| FTS_MESSAGES_INTEGRITY_PROBE_SQL),
1298 probe_error.map(|err| err.to_string()),
1299 )
1300 .into())
1301}
1302
1303#[cfg(test)]
1304pub(crate) fn materialize_fresh_fts_schema_via_rusqlite(db_path: &Path) -> Result<()> {
1305 let storage = FrankenStorage::open(db_path).with_context(|| {
1311 format!(
1312 "opening frankensqlite db at {} for FTS materialization",
1313 db_path.display()
1314 )
1315 })?;
1316 storage.rebuild_fts_via_frankensqlite().map(|_| ())
1317}
1318
1319#[cfg(test)]
1320pub(crate) fn rebuild_fts_via_rusqlite(db_path: &Path) -> Result<usize> {
1321 let storage = FrankenStorage::open(db_path).with_context(|| {
1322 format!(
1323 "opening frankensqlite db at {} for FTS rebuild",
1324 db_path.display()
1325 )
1326 })?;
1327 let inserted = storage.rebuild_fts_via_frankensqlite()?;
1328 storage.record_fts_franken_rebuild_generation()?;
1329 Ok(inserted)
1330}
1331
1332pub(crate) fn ensure_fts_consistency_via_rusqlite(db_path: &Path) -> Result<FtsConsistencyRepair> {
1333 let storage = FrankenStorage::open(db_path).with_context(|| {
1337 format!(
1338 "opening frankensqlite db at {} for FTS consistency check",
1339 db_path.display()
1340 )
1341 })?;
1342 storage.ensure_search_fallback_fts_consistency()
1343}
1344
1345pub fn create_backup(db_path: &Path) -> Result<Option<std::path::PathBuf>, MigrationError> {
1349 if !bundle_path_exists(db_path)? {
1350 return Ok(None);
1351 }
1352
1353 if !copyable_bundle_file_exists(db_path)? {
1354 return Ok(None);
1355 }
1356 let _ = copyable_bundle_sidecar_sources(db_path)?;
1357
1358 let backup_path = unique_backup_path(db_path);
1359 let vacuum_stage_path = vacuum_stage_backup_path(&backup_path);
1360
1361 match vacuum_into_backup_stage(db_path, &vacuum_stage_path) {
1364 Ok(()) => {
1365 fs::rename(&vacuum_stage_path, &backup_path)?;
1366 }
1367 Err(err) if backup_vacuum_error_requires_consistent_retry(&err) => {
1368 tracing::warn!(
1369 db_path = %db_path.display(),
1370 error = %err,
1371 "create_backup: VACUUM INTO hit transient contention; refusing raw WAL bundle copy"
1372 );
1373 return Err(MigrationError::Database(err));
1374 }
1375 Err(err) => {
1376 tracing::warn!(
1377 db_path = %db_path.display(),
1378 error = %err,
1379 "create_backup: VACUUM INTO failed; falling back to raw evidence copy"
1380 );
1381 }
1382 }
1383
1384 if backup_path.exists() {
1385 sync_file_if_exists(&backup_path)?;
1386 if let Some(parent) = backup_path.parent() {
1387 sync_parent_directory(parent)?;
1388 }
1389 return Ok(Some(backup_path));
1390 }
1391
1392 copy_database_bundle(db_path, &backup_path)?;
1397
1398 Ok(Some(backup_path))
1399}
1400
1401fn vacuum_into_backup_stage(
1402 db_path: &Path,
1403 stage_path: &Path,
1404) -> std::result::Result<(), frankensqlite::FrankenError> {
1405 let mut conn = open_franken_with_flags(
1406 &db_path.to_string_lossy(),
1407 FrankenOpenFlags::SQLITE_OPEN_READ_ONLY,
1408 )?;
1409 let result = (|| {
1410 conn.execute(BACKUP_VACUUM_BUSY_TIMEOUT_PRAGMA)?;
1411 let path_str = stage_path.to_string_lossy();
1412 conn.execute_compat("VACUUM INTO ?", fparams![path_str.as_ref()])?;
1413 Ok(())
1414 })();
1415 if let Err(close_err) = conn.close_in_place() {
1416 tracing::warn!(
1417 error = %close_err,
1418 db_path = %db_path.display(),
1419 "create_backup: close_in_place failed after VACUUM INTO; falling back to best-effort close"
1420 );
1421 conn.close_best_effort_in_place();
1422 }
1423 result
1424}
1425
1426fn backup_vacuum_error_requires_consistent_retry(err: &frankensqlite::FrankenError) -> bool {
1427 retryable_franken_error(err)
1428}
1429
1430#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1431pub struct DatabaseBundleMoveResult {
1432 pub database: bool,
1433 pub wal: bool,
1434 pub shm: bool,
1435}
1436
1437impl DatabaseBundleMoveResult {
1438 pub fn moved_any(&self) -> bool {
1439 self.database || self.wal || self.shm
1440 }
1441}
1442
1443fn database_sidecar_path(path: &Path, suffix: &str) -> PathBuf {
1444 PathBuf::from(format!("{}{}", path.to_string_lossy(), suffix))
1445}
1446
1447pub(crate) fn move_database_bundle(
1454 source_root: &Path,
1455 destination_root: &Path,
1456) -> std::io::Result<DatabaseBundleMoveResult> {
1457 let mut moved = DatabaseBundleMoveResult::default();
1458 if let Some(parent) = destination_root.parent() {
1459 fs::create_dir_all(parent)?;
1460 sync_parent_directory(parent)?;
1461 }
1462
1463 if bundle_path_exists(source_root)? {
1464 fs::rename(source_root, destination_root)?;
1465 moved.database = true;
1466 }
1467
1468 let wal_source = database_sidecar_path(source_root, "-wal");
1469 if bundle_path_exists(&wal_source)? {
1470 fs::rename(&wal_source, database_sidecar_path(destination_root, "-wal"))?;
1471 moved.wal = true;
1472 }
1473
1474 let shm_source = database_sidecar_path(source_root, "-shm");
1475 if bundle_path_exists(&shm_source)? {
1476 fs::rename(&shm_source, database_sidecar_path(destination_root, "-shm"))?;
1477 moved.shm = true;
1478 }
1479
1480 if moved.moved_any() {
1481 if let Some(parent) = source_root.parent() {
1482 sync_parent_directory(parent)?;
1483 }
1484 if let Some(parent) = destination_root.parent() {
1485 sync_parent_directory(parent)?;
1486 }
1487 }
1488
1489 Ok(moved)
1490}
1491
1492fn bundle_path_exists(path: &Path) -> std::io::Result<bool> {
1493 match fs::symlink_metadata(path) {
1494 Ok(_) => Ok(true),
1495 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
1496 Err(err) => Err(err),
1497 }
1498}
1499
1500fn copy_database_bundle(source_root: &Path, destination_root: &Path) -> Result<()> {
1501 if let Some(parent) = destination_root.parent() {
1502 fs::create_dir_all(parent).with_context(|| {
1503 format!(
1504 "creating destination directory for database bundle copy: {}",
1505 parent.display()
1506 )
1507 })?;
1508 sync_parent_directory(parent)
1509 .with_context(|| format!("syncing destination directory {}", parent.display()))?;
1510 }
1511
1512 if !copyable_bundle_file_exists(source_root)? {
1513 bail!(
1514 "database bundle root is missing before copy: {}",
1515 source_root.display()
1516 );
1517 }
1518
1519 let sidecars = copyable_bundle_sidecar_sources(source_root)?;
1520
1521 fs::copy(source_root, destination_root).with_context(|| {
1522 format!(
1523 "copying database bundle {} -> {}",
1524 source_root.display(),
1525 destination_root.display()
1526 )
1527 })?;
1528 sync_file_if_exists(destination_root).with_context(|| {
1529 format!(
1530 "syncing copied database bundle {}",
1531 destination_root.display()
1532 )
1533 })?;
1534
1535 for (source_sidecar, suffix) in sidecars {
1536 let destination_sidecar = database_sidecar_path(destination_root, suffix);
1537 fs::copy(&source_sidecar, &destination_sidecar).with_context(|| {
1538 format!(
1539 "copying database bundle sidecar {} -> {}",
1540 source_sidecar.display(),
1541 destination_sidecar.display()
1542 )
1543 })?;
1544 sync_file_if_exists(&destination_sidecar).with_context(|| {
1545 format!(
1546 "syncing copied database bundle sidecar {}",
1547 destination_sidecar.display()
1548 )
1549 })?;
1550 }
1551
1552 if let Some(parent) = destination_root.parent() {
1553 sync_parent_directory(parent)
1554 .with_context(|| format!("syncing destination directory {}", parent.display()))?;
1555 }
1556
1557 Ok(())
1558}
1559
1560fn copyable_bundle_sidecar_sources(source_root: &Path) -> Result<Vec<(PathBuf, &'static str)>> {
1561 let mut sidecars = Vec::new();
1562 for suffix in ["-wal", "-shm"] {
1563 let source_sidecar = database_sidecar_path(source_root, suffix);
1564 if copyable_bundle_file_exists(&source_sidecar)? {
1565 sidecars.push((source_sidecar, suffix));
1566 }
1567 }
1568 Ok(sidecars)
1569}
1570
1571fn copyable_bundle_file_exists(path: &Path) -> Result<bool> {
1572 match fs::symlink_metadata(path) {
1573 Ok(metadata) => {
1574 let file_type = metadata.file_type();
1575 if file_type.is_symlink() {
1576 bail!(
1577 "refusing to copy database bundle symlink: {}",
1578 path.display()
1579 );
1580 }
1581 if !file_type.is_file() {
1582 bail!(
1583 "refusing to copy non-file database bundle path: {}",
1584 path.display()
1585 );
1586 }
1587 Ok(true)
1588 }
1589 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
1590 Err(err) => Err(err).with_context(|| {
1591 format!(
1592 "checking database bundle path before copy: {}",
1593 path.display()
1594 )
1595 }),
1596 }
1597}
1598
1599pub(crate) fn remove_database_files(path: &Path) -> std::io::Result<()> {
1601 let mut removed_any = false;
1602
1603 match fs::remove_file(path) {
1604 Ok(()) => removed_any = true,
1605 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
1606 Err(err) => return Err(err),
1607 }
1608
1609 for suffix in ["-wal", "-shm"] {
1611 match fs::remove_file(database_sidecar_path(path, suffix)) {
1612 Ok(()) => removed_any = true,
1613 Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
1614 Err(err) => return Err(err),
1615 }
1616 }
1617
1618 if removed_any && let Some(parent) = path.parent() {
1619 sync_parent_directory(parent)?;
1620 }
1621
1622 Ok(())
1623}
1624
1625#[cfg(not(windows))]
1626fn sync_parent_directory(path: &Path) -> std::io::Result<()> {
1627 fs::File::open(path)?.sync_all()
1628}
1629
1630#[cfg(windows)]
1631fn sync_parent_directory(_path: &Path) -> std::io::Result<()> {
1632 Ok(())
1633}
1634
1635#[cfg(not(windows))]
1636fn sync_file_if_exists(path: &Path) -> std::io::Result<()> {
1637 if path.exists() {
1638 fs::File::open(path)?.sync_all()?;
1639 }
1640 Ok(())
1641}
1642
1643#[cfg(windows)]
1644fn sync_file_if_exists(path: &Path) -> std::io::Result<()> {
1645 if path.exists() {
1646 fs::OpenOptions::new()
1647 .read(true)
1648 .write(true)
1649 .open(path)?
1650 .sync_all()?;
1651 }
1652 Ok(())
1653}
1654
1655pub fn cleanup_old_backups(db_path: &Path, keep_count: usize) -> Result<(), std::io::Error> {
1657 let parent = match db_path.parent() {
1658 Some(p) => p,
1659 None => return Ok(()),
1660 };
1661
1662 let db_name = db_path.file_name().and_then(|n| n.to_str()).unwrap_or("db");
1663
1664 let prefix = format!("{}.backup.", db_name);
1665
1666 let mut backups: Vec<(std::path::PathBuf, SystemTime)> = Vec::new();
1668
1669 if let Ok(entries) = fs::read_dir(parent) {
1670 for entry in entries.flatten() {
1671 let path = entry.path();
1672 if let Some(name) = path.file_name().and_then(|n| n.to_str())
1673 && is_backup_root_name(name, &prefix)
1674 && let Ok(meta) = fs::metadata(&path)
1675 && meta.is_file()
1676 && let Ok(mtime) = meta.modified()
1677 {
1678 backups.push((path, mtime));
1679 }
1680 }
1681 }
1682
1683 backups.sort_by_key(|entry| std::cmp::Reverse(entry.1));
1685
1686 for (path, _) in backups.into_iter().skip(keep_count) {
1688 let _ = fs::remove_file(&path);
1689
1690 let _ = fs::remove_file(database_sidecar_path(&path, "-wal"));
1692 let _ = fs::remove_file(database_sidecar_path(&path, "-shm"));
1693 }
1694
1695 Ok(())
1696}
1697
1698#[derive(Debug, Clone)]
1699pub(crate) struct HistoricalDatabaseBundle {
1700 root_path: PathBuf,
1701 total_bytes: u64,
1702 modified_at_ms: i64,
1703 supports_direct_readonly: bool,
1704 probe: HistoricalBundleProbe,
1705}
1706
1707#[derive(Debug, Clone, Copy, Default)]
1708struct HistoricalBundleProbe {
1709 schema_version: Option<i64>,
1710 fts_schema_rows: Option<i64>,
1711 fts_queryable: bool,
1712 max_message_id: i64,
1713}
1714
1715#[cfg(test)]
1716#[allow(dead_code)]
1717#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1718pub(crate) struct SqliteDatabaseHealthProbe {
1719 pub schema_version: Option<i64>,
1720 pub quick_check_ok: bool,
1721 pub fts_schema_rows: i64,
1722 pub fts_queryable: bool,
1723 pub message_count: i64,
1724 pub max_message_id: i64,
1725}
1726
1727#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1728pub(crate) enum FtsConsistencyRepair {
1729 AlreadyHealthy {
1730 rows: usize,
1731 },
1732 IncrementalCatchUp {
1733 inserted_rows: usize,
1734 total_rows: usize,
1735 },
1736 Rebuilt {
1737 inserted_rows: usize,
1738 },
1739}
1740
1741#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
1742pub struct HistoricalSalvageOutcome {
1743 pub bundles_considered: usize,
1744 pub bundles_imported: usize,
1745 pub conversations_imported: usize,
1746 pub messages_imported: usize,
1747}
1748
1749impl HistoricalSalvageOutcome {
1750 pub(crate) fn accumulate(&mut self, other: Self) {
1751 self.bundles_considered += other.bundles_considered;
1752 self.bundles_imported += other.bundles_imported;
1753 self.conversations_imported += other.conversations_imported;
1754 self.messages_imported += other.messages_imported;
1755 }
1756}
1757
1758#[derive(Debug)]
1759struct HistoricalReadConnection {
1760 conn: FrankenConnection,
1761 method: &'static str,
1762 root_path: PathBuf,
1763 _tempdir: Option<tempfile::TempDir>,
1764}
1765
1766const HISTORICAL_RECOVERY_CORE_SCHEMA: &str = r"
1767CREATE TABLE sources (
1768 id TEXT PRIMARY KEY,
1769 kind TEXT,
1770 host_label TEXT,
1771 machine_id TEXT,
1772 platform TEXT,
1773 config_json TEXT,
1774 created_at INTEGER,
1775 updated_at INTEGER
1776);
1777CREATE TABLE agents (
1778 id INTEGER PRIMARY KEY,
1779 slug TEXT,
1780 name TEXT,
1781 version TEXT,
1782 kind TEXT,
1783 created_at INTEGER,
1784 updated_at INTEGER
1785);
1786CREATE TABLE workspaces (
1787 id INTEGER PRIMARY KEY,
1788 path TEXT,
1789 display_name TEXT
1790);
1791CREATE TABLE conversations (
1792 id INTEGER PRIMARY KEY,
1793 agent_id INTEGER,
1794 workspace_id INTEGER,
1795 source_id TEXT,
1796 external_id TEXT,
1797 title TEXT,
1798 source_path TEXT,
1799 started_at INTEGER,
1800 ended_at INTEGER,
1801 approx_tokens INTEGER,
1802 metadata_json TEXT,
1803 origin_host TEXT,
1804 metadata_bin BLOB,
1805 total_input_tokens INTEGER,
1806 total_output_tokens INTEGER,
1807 total_cache_read_tokens INTEGER,
1808 total_cache_creation_tokens INTEGER,
1809 grand_total_tokens INTEGER,
1810 estimated_cost_usd REAL,
1811 primary_model TEXT,
1812 api_call_count INTEGER,
1813 tool_call_count INTEGER,
1814 user_message_count INTEGER,
1815 assistant_message_count INTEGER,
1816 last_message_idx INTEGER,
1817 last_message_created_at INTEGER
1818);
1819CREATE TABLE messages (
1820 id INTEGER PRIMARY KEY,
1821 conversation_id INTEGER,
1822 idx INTEGER,
1823 role TEXT,
1824 author TEXT,
1825 created_at INTEGER,
1826 content TEXT,
1827 extra_json TEXT,
1828 extra_bin BLOB
1829);
1830CREATE TABLE snippets (
1831 id INTEGER PRIMARY KEY,
1832 message_id INTEGER,
1833 file_path TEXT,
1834 start_line INTEGER,
1835 end_line INTEGER,
1836 language TEXT,
1837 snippet_text TEXT
1838);
1839";
1840const HISTORICAL_SALVAGE_LEDGER_VERSION: u32 = 2;
1841const HISTORICAL_SALVAGE_PROGRESS_VERSION: u32 = 1;
1842const SOURCE_PATH_MERGE_START_TOLERANCE_MS: i64 = 5 * 60 * 1000;
1843
1844#[derive(Debug, Clone, Serialize, Deserialize)]
1845struct HistoricalBundleProgress {
1846 progress_version: u32,
1847 path: String,
1848 bytes: u64,
1849 modified_at_ms: i64,
1850 method: String,
1851 last_completed_source_row_id: i64,
1852 conversations_imported: usize,
1853 messages_imported: usize,
1854 updated_at_ms: i64,
1855}
1856
1857#[derive(Debug, Clone)]
1858struct HistoricalBatchEntry {
1859 source_row_id: i64,
1860 agent_id: i64,
1861 workspace_id: Option<i64>,
1862 conversation: Conversation,
1863}
1864
1865#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
1866struct HistoricalBatchImportTotals {
1867 inserted_source_rows: usize,
1868 inserted_messages: usize,
1869}
1870
1871fn historical_bundle_root_paths(db_path: &Path) -> Vec<PathBuf> {
1872 let mut roots = Vec::new();
1873 let Some(parent) = db_path.parent() else {
1874 return roots;
1875 };
1876 let db_name = db_path
1877 .file_name()
1878 .and_then(|n| n.to_str())
1879 .unwrap_or("agent_search.db");
1880 let db_stem = db_path
1881 .file_stem()
1882 .and_then(|n| n.to_str())
1883 .unwrap_or("agent_search");
1884
1885 let mut push_root = |path: PathBuf| {
1886 if path == db_path {
1887 return;
1888 }
1889 if !roots.iter().any(|existing| existing == &path) {
1890 roots.push(path);
1891 }
1892 };
1893
1894 if let Ok(entries) = fs::read_dir(parent) {
1895 for entry in entries.flatten() {
1896 let path = entry.path();
1897 let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
1898 continue;
1899 };
1900 if has_db_sidecar_suffix(name) {
1901 continue;
1902 }
1903 if name.starts_with(&format!("{db_name}.backup."))
1904 || name.starts_with(&format!("{db_stem}.corrupt."))
1905 {
1906 push_root(path);
1907 }
1908 }
1909 }
1910
1911 let backups_dir = parent.join("backups");
1912 if let Ok(entries) = fs::read_dir(backups_dir) {
1913 for entry in entries.flatten() {
1914 let path = entry.path();
1915 let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
1916 continue;
1917 };
1918 if has_db_sidecar_suffix(name) {
1919 continue;
1920 }
1921 if name.starts_with(&format!("{db_name}.")) && name.ends_with(".bak") {
1922 push_root(path);
1923 }
1924 }
1925 }
1926
1927 push_named_database_children(&mut roots, db_path, &parent.join("repair-lab"), db_name);
1928 push_named_database_children(&mut roots, db_path, &parent.join("snapshots"), db_name);
1929
1930 roots
1931}
1932
1933fn push_named_database_children(
1934 roots: &mut Vec<PathBuf>,
1935 canonical_db_path: &Path,
1936 dir: &Path,
1937 db_name: &str,
1938) {
1939 if let Ok(entries) = fs::read_dir(dir) {
1940 for entry in entries.flatten() {
1941 let candidate = entry.path().join(db_name);
1942 if candidate == canonical_db_path {
1943 continue;
1944 }
1945 if candidate.exists() && !roots.iter().any(|existing| existing == &candidate) {
1946 roots.push(candidate);
1947 }
1948 }
1949 }
1950}
1951
1952fn file_mtime_ms(path: &Path) -> i64 {
1953 fs::metadata(path)
1954 .and_then(|meta| meta.modified())
1955 .ok()
1956 .and_then(|ts| ts.duration_since(UNIX_EPOCH).ok())
1957 .map(|d| d.as_millis() as i64)
1958 .unwrap_or(0)
1959}
1960
1961fn bundle_total_bytes(root_path: &Path) -> u64 {
1962 let mut total = fs::metadata(root_path).map(|meta| meta.len()).unwrap_or(0);
1963 for suffix in ["-wal", "-shm"] {
1964 let sidecar = database_sidecar_path(root_path, suffix);
1965 total = total.saturating_add(fs::metadata(sidecar).map(|meta| meta.len()).unwrap_or(0));
1966 }
1967 total
1968}
1969
1970pub(crate) fn discover_historical_database_bundles(
1971 db_path: &Path,
1972) -> Vec<HistoricalDatabaseBundle> {
1973 let mut bundles: Vec<_> = historical_bundle_root_paths(db_path)
1974 .into_iter()
1975 .filter(|root| root.exists())
1976 .map(|root_path| {
1977 let modified_at_ms = file_mtime_ms(&root_path);
1978 let total_bytes = bundle_total_bytes(&root_path);
1979 let supports_direct_readonly = historical_bundle_supports_direct_readonly(&root_path);
1980 let probe = probe_historical_bundle(&root_path);
1981 HistoricalDatabaseBundle {
1982 modified_at_ms,
1983 total_bytes,
1984 supports_direct_readonly,
1985 root_path,
1986 probe,
1987 }
1988 })
1989 .filter(|bundle| bundle.total_bytes > 0)
1990 .collect();
1991
1992 fn bundle_priority(path: &Path) -> i32 {
1993 let path_str = path.to_string_lossy();
1994 if path_str.contains("/repair-lab/replay-") {
1995 return 5;
1996 }
1997 if path_str.contains("/repair-lab/") {
1998 return 4;
1999 }
2000 if path_str.contains("/snapshots/") {
2001 return 3;
2002 }
2003 if path_str.contains(".corrupt.") || path_str.contains("failed-baseline-seed") {
2004 return 0;
2005 }
2006 1
2007 }
2008
2009 fn bundle_health_rank(bundle: &HistoricalDatabaseBundle) -> i32 {
2010 let fts_clean = match bundle.probe.fts_schema_rows {
2033 Some(1) => bundle.probe.fts_queryable,
2034 Some(0) => bundle.probe.schema_version == Some(CURRENT_SCHEMA_VERSION),
2035 _ => false,
2036 };
2037
2038 let clean_schema14_fts =
2039 bundle.probe.schema_version == Some(CURRENT_SCHEMA_VERSION) && fts_clean;
2040 if clean_schema14_fts {
2041 return 5;
2042 }
2043
2044 if fts_clean {
2045 return 4;
2046 }
2047
2048 if bundle.probe.schema_version == Some(CURRENT_SCHEMA_VERSION)
2049 && bundle.supports_direct_readonly
2050 {
2051 return 3;
2052 }
2053
2054 if bundle.supports_direct_readonly {
2055 return 2;
2056 }
2057
2058 1
2059 }
2060
2061 bundles.sort_by(|left, right| {
2062 bundle_health_rank(right)
2063 .cmp(&bundle_health_rank(left))
2064 .then_with(|| right.probe.max_message_id.cmp(&left.probe.max_message_id))
2065 .then_with(|| bundle_priority(&right.root_path).cmp(&bundle_priority(&left.root_path)))
2066 .then_with(|| {
2067 right
2068 .supports_direct_readonly
2069 .cmp(&left.supports_direct_readonly)
2070 })
2071 .then_with(|| right.total_bytes.cmp(&left.total_bytes))
2072 .then_with(|| right.modified_at_ms.cmp(&left.modified_at_ms))
2073 .then_with(|| right.root_path.cmp(&left.root_path))
2074 });
2075 bundles
2076}
2077
2078fn probe_historical_bundle(root_path: &Path) -> HistoricalBundleProbe {
2079 let Ok(conn) = open_historical_bundle_readonly(root_path) else {
2080 return probe_historical_bundle_via_sqlite3_metadata(root_path).unwrap_or_default();
2081 };
2082
2083 let schema_version = read_meta_schema_version(&conn).ok().flatten();
2084 let fts_schema_rows: Option<i64> = conn
2085 .query_row_map(
2086 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
2087 fparams![],
2088 |row| row.get_typed(0),
2089 )
2090 .ok();
2091 let fts_queryable =
2092 historical_bundle_fts_queryable_via_frankensqlite(root_path, fts_schema_rows);
2093 let max_message_id: i64 = conn
2094 .query_row_map(
2095 "SELECT COALESCE(MAX(id), 0) FROM messages",
2096 fparams![],
2097 |row| row.get_typed(0),
2098 )
2099 .unwrap_or(0);
2100
2101 let probe = HistoricalBundleProbe {
2102 schema_version,
2103 fts_schema_rows,
2104 fts_queryable,
2105 max_message_id,
2106 };
2107
2108 if probe.schema_version.is_none()
2109 && probe.fts_schema_rows.is_none()
2110 && probe.max_message_id == 0
2111 {
2112 return probe_historical_bundle_via_sqlite3_metadata(root_path).unwrap_or(probe);
2113 }
2114
2115 probe
2116}
2117
2118fn probe_historical_bundle_via_sqlite3_metadata(root_path: &Path) -> Option<HistoricalBundleProbe> {
2119 let bundle_uri = format!("file:{}?immutable=1", root_path.to_string_lossy());
2120 let output = Command::new("sqlite3")
2121 .arg("-batch")
2122 .arg("-noheader")
2123 .arg(&bundle_uri)
2124 .arg(
2125 "PRAGMA writable_schema=ON;
2126 SELECT COALESCE((SELECT value FROM meta WHERE key = 'schema_version'), '');
2127 SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages';
2128 SELECT COALESCE(MAX(id), 0) FROM messages;",
2129 )
2130 .output()
2131 .ok()?;
2132 if !output.status.success() {
2133 return None;
2134 }
2135
2136 let stdout = String::from_utf8(output.stdout).ok()?;
2137 let mut lines = stdout.lines();
2138 let schema_version = lines.next().and_then(|raw| raw.trim().parse::<i64>().ok());
2139 let fts_schema_rows = lines.next().and_then(|raw| raw.trim().parse::<i64>().ok());
2140 let max_message_id = lines
2141 .next()
2142 .and_then(|raw| raw.trim().parse::<i64>().ok())
2143 .unwrap_or(0);
2144
2145 Some(HistoricalBundleProbe {
2146 schema_version,
2147 fts_schema_rows,
2148 fts_queryable: false,
2149 max_message_id,
2150 })
2151}
2152
2153fn historical_bundle_fts_queryable_via_frankensqlite(
2154 root_path: &Path,
2155 fts_schema_rows: Option<i64>,
2156) -> bool {
2157 matches!(fts_schema_rows, Some(1))
2158 && FrankenStorage::open_readonly(root_path)
2159 .map(|storage| {
2160 storage
2161 .raw()
2162 .query("SELECT COUNT(*) FROM fts_messages")
2163 .is_ok()
2164 })
2165 .unwrap_or(false)
2166}
2167
2168fn historical_bundle_supports_direct_readonly(root_path: &Path) -> bool {
2169 open_historical_bundle_readonly(root_path)
2170 .and_then(|conn| historical_bundle_has_queryable_core_tables(&conn))
2171 .is_ok()
2172}
2173
2174fn historical_table_exists(conn: &FrankenConnection, table: &str) -> Result<bool> {
2175 let found: Option<i64> = conn
2176 .query_row_map(
2177 "SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = ?1 LIMIT 1",
2178 fparams![table],
2179 |row| row.get_typed(0),
2180 )
2181 .optional()
2182 .with_context(|| format!("checking for historical table {table}"))?;
2183 Ok(found.is_some())
2184}
2185
2186fn probe_historical_table_reads(conn: &FrankenConnection, table: &str) -> Result<()> {
2187 if !historical_table_exists(conn, table)? {
2188 return Err(anyhow!(
2189 "historical database missing required table {table}"
2190 ));
2191 }
2192
2193 let sql = format!("SELECT rowid FROM {table} LIMIT 1");
2194 let _: Option<i64> = conn
2195 .query_row_map(&sql, fparams![], |row| row.get_typed(0))
2196 .optional()
2197 .with_context(|| format!("probing rows from historical table {table}"))?;
2198 Ok(())
2199}
2200
2201fn historical_bundle_has_queryable_core_tables(conn: &FrankenConnection) -> Result<()> {
2202 probe_historical_table_reads(conn, "conversations")?;
2203 probe_historical_table_reads(conn, "messages")?;
2204 Ok(())
2205}
2206
2207fn open_historical_bundle_readonly(root_path: &Path) -> Result<FrankenConnection> {
2208 let path_str = root_path.to_string_lossy();
2209 let flags = FrankenOpenFlags::SQLITE_OPEN_READ_ONLY;
2210 let conn = open_franken_with_flags(&path_str, flags)
2211 .with_context(|| format!("opening historical database {}", root_path.display()))?;
2212 Ok(conn)
2213}
2214
2215fn is_recoverable_insert_line(line: &str) -> bool {
2216 [
2217 "sources",
2218 "agents",
2219 "workspaces",
2220 "conversations",
2221 "messages",
2222 "snippets",
2223 ]
2224 .iter()
2225 .any(|table| {
2226 line.starts_with(&format!("INSERT INTO '{table}'"))
2227 || line.starts_with(&format!("INSERT OR IGNORE INTO '{table}'"))
2228 || line.starts_with(&format!("INSERT INTO \"{table}\""))
2229 || line.starts_with(&format!("INSERT OR IGNORE INTO \"{table}\""))
2230 })
2231}
2232
2233fn recover_historical_bundle_via_sqlite3(
2234 bundle: &HistoricalDatabaseBundle,
2235) -> Result<HistoricalReadConnection> {
2236 let tempdir = tempfile::TempDir::new().context("creating temporary salvage directory")?;
2237 let recovered_db = tempdir.path().join("historical-recovered.db");
2238 let temp_conn = FrankenConnection::open(recovered_db.to_string_lossy().as_ref())
2239 .with_context(|| format!("creating recovered database {}", recovered_db.display()))?;
2240 temp_conn
2241 .execute_batch(HISTORICAL_RECOVERY_CORE_SCHEMA)
2242 .with_context(|| format!("initializing recovered schema {}", recovered_db.display()))?;
2243 drop(temp_conn);
2244
2245 let bundle_uri = format!("file:{}?immutable=1", bundle.root_path.to_string_lossy());
2246 let mut recover = Command::new("sqlite3")
2247 .arg(&bundle_uri)
2248 .arg(".recover")
2249 .stdout(Stdio::piped())
2250 .spawn()
2251 .with_context(|| {
2252 format!(
2253 "launching sqlite3 .recover for historical bundle {}",
2254 bundle.root_path.display()
2255 )
2256 })?;
2257 let recover_stdout = recover
2258 .stdout
2259 .take()
2260 .context("capturing sqlite3 .recover stdout")?;
2261
2262 let mut importer = Command::new("sqlite3")
2263 .arg(&recovered_db)
2264 .stdin(Stdio::piped())
2265 .spawn()
2266 .with_context(|| {
2267 format!(
2268 "launching sqlite3 importer for recovered bundle {}",
2269 recovered_db.display()
2270 )
2271 })?;
2272
2273 {
2274 let importer_stdin = importer
2275 .stdin
2276 .as_mut()
2277 .context("opening sqlite3 importer stdin")?;
2278 importer_stdin
2279 .write_all(b"BEGIN;\n")
2280 .context("starting recovery import transaction")?;
2281
2282 let reader = BufReader::new(recover_stdout);
2283 for line in reader.lines() {
2284 let line = line.context("reading sqlite3 .recover output")?;
2285 if is_recoverable_insert_line(&line) {
2286 importer_stdin
2287 .write_all(line.as_bytes())
2288 .context("writing recovered INSERT")?;
2289 importer_stdin
2290 .write_all(b"\n")
2291 .context("writing recovered INSERT newline")?;
2292 }
2293 }
2294
2295 importer_stdin
2296 .write_all(b"COMMIT;\n")
2297 .context("committing recovery import transaction")?;
2298 }
2299
2300 let importer_status = importer
2301 .wait()
2302 .context("waiting for sqlite3 recovery importer")?;
2303 let recover_status = recover
2304 .wait()
2305 .context("waiting for sqlite3 .recover process")?;
2306 if !importer_status.success() {
2307 anyhow::bail!(
2308 "sqlite3 recovery importer exited with status {} for {} after sqlite3 .recover exited with status {}",
2309 importer_status,
2310 recovered_db.display(),
2311 recover_status
2312 );
2313 }
2314
2315 let conn = open_historical_bundle_readonly(&recovered_db)?;
2316 historical_bundle_has_queryable_core_tables(&conn)?;
2317 if !recover_status.success() {
2318 let (conversations, messages) = historical_bundle_counts(&conn)?;
2319 if conversations == 0 && messages == 0 {
2320 anyhow::bail!(
2321 "sqlite3 .recover exited with status {} for {} and recovered no core rows",
2322 recover_status,
2323 bundle.root_path.display()
2324 );
2325 }
2326 tracing::warn!(
2327 path = %bundle.root_path.display(),
2328 status = %recover_status,
2329 conversations,
2330 messages,
2331 "sqlite3 .recover exited nonzero after emitting recoverable core rows; continuing with recovered subset"
2332 );
2333 }
2334 Ok(HistoricalReadConnection {
2335 conn,
2336 method: "sqlite3-recover",
2337 root_path: recovered_db,
2338 _tempdir: Some(tempdir),
2339 })
2340}
2341
2342fn open_historical_bundle_for_salvage(
2343 bundle: &HistoricalDatabaseBundle,
2344) -> Result<HistoricalReadConnection> {
2345 match open_historical_bundle_readonly(&bundle.root_path) {
2346 Ok(conn) => {
2347 if historical_bundle_has_queryable_core_tables(&conn).is_ok() {
2348 return Ok(HistoricalReadConnection {
2349 conn,
2350 method: "direct-readonly",
2351 root_path: bundle.root_path.clone(),
2352 _tempdir: None,
2353 });
2354 }
2355 }
2356 Err(err) => {
2357 tracing::warn!(
2358 path = %bundle.root_path.display(),
2359 error = %err,
2360 "historical bundle direct open failed; falling back to sqlite3 .recover"
2361 );
2362 }
2363 }
2364
2365 recover_historical_bundle_via_sqlite3(bundle)
2366}
2367
2368fn historical_bundle_counts(conn: &FrankenConnection) -> Result<(usize, usize)> {
2369 let conversations: i64 =
2370 conn.query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
2371 row.get_typed(0)
2372 })?;
2373 let messages: i64 = conn.query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
2374 row.get_typed(0)
2375 })?;
2376 Ok((
2377 usize::try_from(conversations.max(0)).unwrap_or(usize::MAX),
2378 usize::try_from(messages.max(0)).unwrap_or(usize::MAX),
2379 ))
2380}
2381
2382fn clear_seeded_runtime_meta(conn: &FrankenConnection) -> Result<()> {
2383 conn.execute(
2384 "DELETE FROM meta
2385 WHERE key LIKE 'historical_bundle_salvaged:%'
2386 OR key IN ('last_scan_ts', 'last_indexed_at', 'last_embedded_message_id')",
2387 )?;
2388 Ok(())
2389}
2390
2391fn record_historical_bundle_import(
2392 conn: &FrankenConnection,
2393 bundle: &HistoricalDatabaseBundle,
2394 method: &str,
2395 conversations_imported: usize,
2396 messages_imported: usize,
2397) -> Result<()> {
2398 let key = FrankenStorage::historical_bundle_meta_key(bundle);
2399 let value = serde_json::json!({
2400 "salvage_version": HISTORICAL_SALVAGE_LEDGER_VERSION,
2401 "path": bundle.root_path.display().to_string(),
2402 "bytes": bundle.total_bytes,
2403 "modified_at_ms": bundle.modified_at_ms,
2404 "method": method,
2405 "conversations_imported": conversations_imported,
2406 "messages_imported": messages_imported,
2407 "recorded_at_ms": FrankenStorage::now_millis(),
2408 });
2409 let value_str = serde_json::to_string(&value)?;
2410 conn.execute_compat(
2411 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
2412 fparams![key, value_str],
2413 )?;
2414 Ok(())
2415}
2416
2417fn scrub_staged_derived_fts_metadata_via_sqlite3(staged_db_path: &Path) -> Result<()> {
2418 let scrub_sql = "PRAGMA writable_schema = ON;
2419 DELETE FROM sqlite_master
2420 WHERE name = 'fts_messages'
2421 OR tbl_name = 'fts_messages'
2422 OR name IN (
2423 'fts_messages_config',
2424 'fts_messages_content',
2425 'fts_messages_data',
2426 'fts_messages_docsize',
2427 'fts_messages_idx'
2428 )
2429 OR tbl_name IN (
2430 'fts_messages_config',
2431 'fts_messages_content',
2432 'fts_messages_data',
2433 'fts_messages_docsize',
2434 'fts_messages_idx'
2435 );
2436 PRAGMA writable_schema = OFF;";
2437
2438 let run_scrub = |disable_defensive: bool| -> Result<std::process::Output> {
2439 let mut command = Command::new("sqlite3");
2440 command.arg("-batch").arg(staged_db_path);
2441 if disable_defensive {
2442 command.arg(".dbconfig defensive off");
2443 }
2444 command.arg(scrub_sql).output().with_context(|| {
2445 format!(
2446 "running sqlite3 staged FTS metadata scrub for {}",
2447 staged_db_path.display()
2448 )
2449 })
2450 };
2451 let render_output = |output: &std::process::Output| -> String {
2452 format!(
2453 "status {}; stdout: {}; stderr: {}",
2454 output.status,
2455 String::from_utf8_lossy(&output.stdout).trim(),
2456 String::from_utf8_lossy(&output.stderr).trim()
2457 )
2458 };
2459
2460 let defensive_off_output = run_scrub(true)?;
2461 if defensive_off_output.status.success() {
2462 return Ok(());
2463 }
2464
2465 let fallback_output = run_scrub(false)?;
2466 if !fallback_output.status.success() {
2467 anyhow::bail!(
2468 "sqlite3 staged FTS metadata scrub failed for {}; defensive-off attempt {}; fallback without .dbconfig {}",
2469 staged_db_path.display(),
2470 render_output(&defensive_off_output),
2471 render_output(&fallback_output)
2472 );
2473 }
2474 Ok(())
2475}
2476
2477fn ensure_seeded_canonical_fts_consistency(staged_db_path: &Path) -> Result<FtsConsistencyRepair> {
2478 match ensure_fts_consistency_via_rusqlite(staged_db_path) {
2479 Ok(repair) => Ok(repair),
2480 Err(err) => {
2481 if fts_messages_integrity_error_from_message(format!("{err:#}")).is_none() {
2482 return Err(err).with_context(|| {
2483 format!(
2484 "repairing staged canonical FTS consistency before finalization: {}",
2485 staged_db_path.display()
2486 )
2487 });
2488 }
2489
2490 tracing::warn!(
2491 path = %staged_db_path.display(),
2492 error = %err,
2493 "staged historical seed has malformed derived FTS metadata; scrubbing and rebuilding FTS on staged copy"
2494 );
2495 scrub_staged_derived_fts_metadata_via_sqlite3(staged_db_path).with_context(|| {
2496 format!(
2497 "scrubbing malformed staged FTS metadata before finalization: {}",
2498 staged_db_path.display()
2499 )
2500 })?;
2501 ensure_fts_consistency_via_rusqlite(staged_db_path).with_context(|| {
2502 format!(
2503 "repairing staged canonical FTS consistency after metadata scrub: {}",
2504 staged_db_path.display()
2505 )
2506 })
2507 }
2508 }
2509}
2510
2511fn finalize_seeded_canonical_bundle_via_rusqlite(
2512 canonical_db_path: &Path,
2513 bundle: &HistoricalDatabaseBundle,
2514) -> Result<(usize, usize)> {
2515 let _fts_repair = ensure_seeded_canonical_fts_consistency(canonical_db_path)?;
2516
2517 let path_str = canonical_db_path.to_string_lossy();
2518 let conn = FrankenConnection::open(path_str.as_ref()).with_context(|| {
2519 format!(
2520 "opening seeded canonical database for post-seed finalization: {}",
2521 canonical_db_path.display()
2522 )
2523 })?;
2524 conn.execute("PRAGMA busy_timeout = 30000;")
2525 .with_context(|| {
2526 format!(
2527 "configuring busy timeout for seeded canonical database {}",
2528 canonical_db_path.display()
2529 )
2530 })?;
2531 let schema_version = read_meta_schema_version(&conn)?;
2532
2533 if let Some(version) = schema_version
2534 && version < CURRENT_SCHEMA_VERSION
2535 && version != 13
2536 {
2537 anyhow::bail!(
2538 "seeded canonical bundle schema_version {version} is too old for baseline import and cannot be finalized automatically"
2539 );
2540 }
2541
2542 clear_seeded_runtime_meta(&conn)?;
2543 let (conversations_imported, messages_imported) = historical_bundle_counts(&conn)?;
2544
2545 conn.execute_compat(
2546 "INSERT OR REPLACE INTO meta(key, value) VALUES('schema_version', ?1)",
2547 fparams![CURRENT_SCHEMA_VERSION.to_string()],
2548 )?;
2549
2550 conn.execute_compat(
2551 "INSERT OR IGNORE INTO _schema_migrations(version, name) VALUES(?1, 'fts_contentless')",
2552 fparams![CURRENT_SCHEMA_VERSION],
2553 )?;
2554 record_historical_bundle_import(
2555 &conn,
2556 bundle,
2557 "baseline-bulk-sql-copy",
2558 conversations_imported,
2559 messages_imported,
2560 )?;
2561 Ok((conversations_imported, messages_imported))
2562}
2563
2564fn read_meta_schema_version(conn: &FrankenConnection) -> Result<Option<i64>> {
2565 let version: Option<String> = conn
2566 .query_row_map(
2567 "SELECT value FROM meta WHERE key = 'schema_version'",
2568 fparams![],
2569 |row| row.get_typed(0),
2570 )
2571 .optional()?;
2572 Ok(version.and_then(|raw| raw.parse::<i64>().ok()))
2573}
2574
2575#[cfg(test)]
2576fn franken_fts_schema_rows(conn: &FrankenConnection) -> Result<i64> {
2577 conn.query_row_map(
2578 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
2579 fparams![],
2580 |row| row.get_typed(0),
2581 )
2582 .context("counting sqlite_master rows for fts_messages via frankensqlite")
2583}
2584
2585#[cfg(test)]
2586fn franken_fts_limit_probe(conn: &FrankenConnection) -> bool {
2587 conn.query("SELECT COUNT(*) FROM fts_messages").is_ok()
2588}
2589
2590#[cfg(test)]
2591#[allow(dead_code)]
2592pub(crate) fn probe_database_health_via_frankensqlite(
2593 db_path: &Path,
2594) -> Result<SqliteDatabaseHealthProbe> {
2595 let path_str = db_path.to_string_lossy();
2596 let conn = FrankenConnection::open(path_str.as_ref()).with_context(|| {
2597 format!(
2598 "opening frankensqlite db at {} for database health probe",
2599 db_path.display()
2600 )
2601 })?;
2602 conn.execute_batch("PRAGMA busy_timeout = 30000;")
2603 .with_context(|| {
2604 format!(
2605 "configuring busy timeout for database health probe at {}",
2606 db_path.display()
2607 )
2608 })?;
2609
2610 let schema_version = read_meta_schema_version(&conn)?;
2611 let quick_check_status: String = conn
2612 .query_row_map("PRAGMA quick_check(1)", fparams![], |row| row.get_typed(0))
2613 .with_context(|| format!("running PRAGMA quick_check(1) for {}", db_path.display()))?;
2614 let quick_check_ok = quick_check_status.trim().eq_ignore_ascii_case("ok");
2615 let fts_schema_rows = franken_fts_schema_rows(&conn)?;
2616 let fts_queryable = fts_schema_rows == 1 && franken_fts_limit_probe(&conn);
2617
2618 if !quick_check_ok {
2619 return Ok(SqliteDatabaseHealthProbe {
2620 schema_version,
2621 quick_check_ok,
2622 fts_schema_rows,
2623 fts_queryable,
2624 message_count: 0,
2625 max_message_id: 0,
2626 });
2627 }
2628
2629 let message_count: i64 = conn
2630 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
2631 row.get_typed(0)
2632 })
2633 .context("counting messages during frankensqlite database health probe")?;
2634 let max_message_id: i64 = conn
2635 .query_row_map(
2636 "SELECT COALESCE(MAX(id), 0) FROM messages",
2637 fparams![],
2638 |row| row.get_typed(0),
2639 )
2640 .context("reading max message id during frankensqlite database health probe")?;
2641
2642 Ok(SqliteDatabaseHealthProbe {
2643 schema_version,
2644 quick_check_ok,
2645 fts_schema_rows,
2646 fts_queryable,
2647 message_count,
2648 max_message_id,
2649 })
2650}
2651
2652struct StagedHistoricalSeed {
2653 tempdir: tempfile::TempDir,
2654 db_path: PathBuf,
2655}
2656
2657fn stage_historical_bundle_for_seed(
2658 canonical_db_path: &Path,
2659 source_root_path: &Path,
2660) -> Result<StagedHistoricalSeed> {
2661 let canonical_parent = canonical_db_path.parent().unwrap_or_else(|| Path::new("."));
2662 fs::create_dir_all(canonical_parent).with_context(|| {
2663 format!(
2664 "creating canonical database directory before bulk historical seed import: {}",
2665 canonical_parent.display()
2666 )
2667 })?;
2668 let tempdir = tempfile::TempDir::new_in(canonical_parent)
2669 .context("creating temporary baseline seed directory")?;
2670 let staged_seed_db = tempdir.path().join("baseline-seed-output.db");
2671 copy_database_bundle(source_root_path, &staged_seed_db)?;
2672
2673 Ok(StagedHistoricalSeed {
2674 tempdir,
2675 db_path: staged_seed_db,
2676 })
2677}
2678
2679fn stage_and_finalize_historical_seed(
2680 canonical_db_path: &Path,
2681 bundle: &HistoricalDatabaseBundle,
2682 source_root_path: &Path,
2683) -> Result<(StagedHistoricalSeed, usize, usize)> {
2684 let staged_seed = stage_historical_bundle_for_seed(canonical_db_path, source_root_path)?;
2685 let (conversations_imported, messages_imported) =
2686 finalize_seeded_canonical_bundle_via_rusqlite(&staged_seed.db_path, bundle)?;
2687 Ok((staged_seed, conversations_imported, messages_imported))
2688}
2689
2690fn promote_staged_historical_seed(
2691 canonical_db_path: &Path,
2692 staged_seed: &StagedHistoricalSeed,
2693) -> Result<()> {
2694 let canonical_backup = staged_seed
2695 .tempdir
2696 .path()
2697 .join("pre-seed-canonical-backup.db");
2698 let had_canonical = canonical_db_path.exists()
2699 || database_sidecar_path(canonical_db_path, "-wal").exists()
2700 || database_sidecar_path(canonical_db_path, "-shm").exists();
2701
2702 if had_canonical {
2703 move_database_bundle(canonical_db_path, &canonical_backup).with_context(|| {
2704 format!(
2705 "backing up canonical database before promoting staged historical seed import: {}",
2706 canonical_db_path.display()
2707 )
2708 })?;
2709 }
2710
2711 if let Err(err) =
2712 move_database_bundle(&staged_seed.db_path, canonical_db_path).with_context(|| {
2713 format!(
2714 "promoting staged historical seed database bundle {} into canonical path {}",
2715 staged_seed.db_path.display(),
2716 canonical_db_path.display()
2717 )
2718 })
2719 {
2720 if had_canonical {
2721 let _ = move_database_bundle(&canonical_backup, canonical_db_path);
2722 }
2723 return Err(err);
2724 }
2725
2726 Ok(())
2727}
2728
2729pub(crate) fn seed_canonical_from_best_historical_bundle(
2730 canonical_db_path: &Path,
2731) -> Result<Option<HistoricalSalvageOutcome>> {
2732 let ordered_bundles = discover_historical_database_bundles(canonical_db_path);
2733 let mut last_seed_error: Option<anyhow::Error> = None;
2734 for bundle in ordered_bundles {
2735 if let Some(version) = bundle.probe.schema_version
2736 && version < 13
2737 {
2738 let err = anyhow!(
2739 "historical bundle {} schema_version {version} is too old for baseline import",
2740 bundle.root_path.display()
2741 );
2742 tracing::warn!(
2743 path = %bundle.root_path.display(),
2744 schema_version = version,
2745 "historical bundle is too old for baseline seed import"
2746 );
2747 last_seed_error = Some(err);
2748 continue;
2749 }
2750
2751 let (staged_seed, conversations_imported, messages_imported) =
2752 match stage_and_finalize_historical_seed(canonical_db_path, &bundle, &bundle.root_path)
2753 {
2754 Ok(result) => result,
2755 Err(primary_err) => {
2756 tracing::warn!(
2757 path = %bundle.root_path.display(),
2758 error = %primary_err,
2759 "direct bulk baseline seed from historical bundle failed; trying sqlite3 salvage copy"
2760 );
2761 let source = match open_historical_bundle_for_salvage(&bundle).with_context(
2762 || {
2763 format!(
2764 "opening historical seed bundle {} for baseline import",
2765 bundle.root_path.display()
2766 )
2767 },
2768 ) {
2769 Ok(source) => source,
2770 Err(salvage_err) => {
2771 last_seed_error = Some(anyhow!(
2772 "direct baseline seed from {} failed: {primary_err:#}; sqlite3 salvage open also failed: {salvage_err:#}",
2773 bundle.root_path.display()
2774 ));
2775 continue;
2776 }
2777 };
2778 match stage_and_finalize_historical_seed(
2779 canonical_db_path,
2780 &bundle,
2781 &source.root_path,
2782 ) {
2783 Ok(result) => result,
2784 Err(err) => {
2785 tracing::warn!(
2786 path = %bundle.root_path.display(),
2787 source_path = %source.root_path.display(),
2788 error = %err,
2789 "bulk baseline seed staging from sqlite3-salvaged historical bundle failed; trying next candidate"
2790 );
2791 last_seed_error = Some(err);
2792 continue;
2793 }
2794 }
2795 }
2796 };
2797
2798 if conversations_imported == 0 && messages_imported == 0 {
2799 let err = anyhow!(
2800 "historical bundle {} has no core rows for baseline import",
2801 bundle.root_path.display()
2802 );
2803 tracing::warn!(
2804 path = %bundle.root_path.display(),
2805 "historical bundle has no core rows for baseline seed import"
2806 );
2807 last_seed_error = Some(err);
2808 continue;
2809 }
2810
2811 if let Err(err) = promote_staged_historical_seed(canonical_db_path, &staged_seed) {
2812 tracing::warn!(
2813 path = %bundle.root_path.display(),
2814 error = %err,
2815 "promoting staged historical seed import failed; trying next candidate"
2816 );
2817 last_seed_error = Some(err);
2818 continue;
2819 }
2820
2821 tracing::info!(
2822 path = %bundle.root_path.display(),
2823 conversations_imported,
2824 messages_imported,
2825 "seeded empty canonical database from largest healthy historical bundle"
2826 );
2827
2828 return Ok(Some(HistoricalSalvageOutcome {
2829 bundles_considered: 0,
2830 bundles_imported: 1,
2831 conversations_imported,
2832 messages_imported,
2833 }));
2834 }
2835 if let Some(err) = last_seed_error {
2836 return Err(err);
2837 }
2838 Ok(None)
2839}
2840
2841fn parse_json_column(value: Option<String>) -> serde_json::Value {
2842 value
2843 .and_then(|raw| serde_json::from_str(&raw).ok())
2844 .unwrap_or(serde_json::Value::Null)
2845}
2846
2847const HISTORICAL_RAW_JSON_SENTINEL_KEY: &str = "__cass_historical_raw_json__";
2848
2849fn wrap_historical_raw_json(raw: String) -> serde_json::Value {
2850 serde_json::json!({ HISTORICAL_RAW_JSON_SENTINEL_KEY: raw })
2851}
2852
2853fn historical_raw_json(value: &serde_json::Value) -> Option<&str> {
2854 match value {
2855 serde_json::Value::Object(map) if map.len() == 1 => map
2856 .get(HISTORICAL_RAW_JSON_SENTINEL_KEY)
2857 .and_then(serde_json::Value::as_str),
2858 _ => None,
2859 }
2860}
2861
2862fn parse_historical_json_column(value: Option<String>) -> serde_json::Value {
2863 match value {
2864 Some(raw) if raw.trim().is_empty() => serde_json::Value::Null,
2865 Some(raw) => wrap_historical_raw_json(raw),
2866 None => serde_json::Value::Null,
2867 }
2868}
2869
2870fn historical_salvage_debug_enabled() -> bool {
2871 std::env::var_os("CASS_DEBUG_HISTORICAL_SALVAGE").is_some()
2872}
2873
2874#[derive(Debug, Clone, Copy)]
2875struct HistoricalImportBatchLimits {
2876 conversations: usize,
2877 messages: usize,
2878 payload_chars: usize,
2879}
2880
2881fn env_positive_usize(key: &str) -> Option<usize> {
2882 dotenvy::var(key)
2883 .ok()
2884 .and_then(|value| value.parse::<usize>().ok())
2885 .filter(|value| *value > 0)
2886}
2887
2888fn historical_import_batch_limits() -> HistoricalImportBatchLimits {
2889 let cpu_count = std::thread::available_parallelism()
2890 .map(std::num::NonZeroUsize::get)
2891 .unwrap_or(1);
2892
2893 let default_limits = if cpu_count >= 32 {
2894 HistoricalImportBatchLimits {
2895 conversations: 128,
2896 messages: 16_384,
2897 payload_chars: 12_000_000,
2898 }
2899 } else {
2900 HistoricalImportBatchLimits {
2901 conversations: 32,
2902 messages: 4_096,
2903 payload_chars: 3_000_000,
2904 }
2905 };
2906
2907 HistoricalImportBatchLimits {
2908 conversations: env_positive_usize("CASS_HISTORICAL_IMPORT_BATCH_CONVERSATIONS")
2909 .unwrap_or(default_limits.conversations),
2910 messages: env_positive_usize("CASS_HISTORICAL_IMPORT_BATCH_MESSAGES")
2911 .unwrap_or(default_limits.messages),
2912 payload_chars: env_positive_usize("CASS_HISTORICAL_IMPORT_BATCH_CHARS")
2913 .unwrap_or(default_limits.payload_chars),
2914 }
2915}
2916
2917fn json_value_size_hint(value: &serde_json::Value) -> usize {
2918 if let Some(raw) = historical_raw_json(value) {
2919 return raw.len();
2920 }
2921 match value {
2922 serde_json::Value::Null => 0,
2923 other => serde_json::to_string(other)
2924 .map(|raw| raw.len())
2925 .unwrap_or(0),
2926 }
2927}
2928
2929fn message_payload_size_hint(message: &Message) -> usize {
2930 message
2931 .content
2932 .len()
2933 .saturating_add(json_value_size_hint(&message.extra_json))
2934}
2935
2936fn is_backup_root_name(name: &str, prefix: &str) -> bool {
2937 name.starts_with(prefix) && !name.ends_with("-wal") && !name.ends_with("-shm")
2938}
2939
2940fn has_db_sidecar_suffix(name: &str) -> bool {
2947 const SIDECAR_SUFFIXES: &[&str] = &[
2948 "-wal",
2949 "-shm",
2950 "-lock-shared",
2951 "-lock-reserved",
2952 "-lock-pending",
2953 ];
2954 SIDECAR_SUFFIXES.iter().any(|suffix| name.ends_with(suffix))
2955}
2956
2957pub const CURRENT_SCHEMA_VERSION: i64 = 20;
2959const MIN_IN_PLACE_MIGRATION_SCHEMA_VERSION: i64 = 13;
2960
2961#[derive(Debug, Clone)]
2963pub enum SchemaCheck {
2964 Compatible,
2966 NeedsMigration,
2968 NeedsRebuild(String),
2970}
2971
2972fn schema_check_error_requires_rebuild(err: &frankensqlite::FrankenError) -> bool {
2973 matches!(
2977 err,
2978 frankensqlite::FrankenError::DatabaseCorrupt { .. }
2979 | frankensqlite::FrankenError::WalCorrupt { .. }
2980 | frankensqlite::FrankenError::NotADatabase { .. }
2981 | frankensqlite::FrankenError::ShortRead { .. }
2982 )
2983}
2984
2985fn unique_backup_path(path: &Path) -> PathBuf {
2986 static NEXT_NONCE: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0);
2987
2988 let timestamp = SystemTime::now()
2989 .duration_since(UNIX_EPOCH)
2990 .map(|d| d.as_nanos())
2991 .unwrap_or(0);
2992 let nonce = NEXT_NONCE.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
2993 let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("db");
2994
2995 path.with_file_name(format!(
2996 "{file_name}.backup.{}.{}.{}",
2997 std::process::id(),
2998 timestamp,
2999 nonce
3000 ))
3001}
3002
3003fn vacuum_stage_backup_path(backup_path: &Path) -> PathBuf {
3004 let file_name = backup_path
3005 .file_name()
3006 .and_then(|name| name.to_str())
3007 .unwrap_or("db.backup");
3008 backup_path.with_file_name(format!(".{file_name}.vacuum-in-progress"))
3009}
3010
3011fn check_schema_compatibility(
3015 path: &Path,
3016) -> std::result::Result<SchemaCheck, frankensqlite::FrankenError> {
3017 let mut conn = open_franken_with_flags(
3018 &path.to_string_lossy(),
3019 FrankenOpenFlags::SQLITE_OPEN_READ_ONLY,
3020 )?;
3021
3022 let result = (|| {
3023 let meta_exists: i32 = conn.query_row_map(
3025 "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='meta'",
3026 fparams![],
3027 |row| row.get_typed(0),
3028 )?;
3029
3030 if meta_exists == 0 {
3031 let table_count: i32 = conn.query_row_map(
3034 "SELECT COUNT(*) FROM sqlite_master WHERE type='table'",
3035 fparams![],
3036 |row| row.get_typed(0),
3037 )?;
3038
3039 if table_count == 0 {
3040 return Ok(SchemaCheck::NeedsMigration);
3042 }
3043
3044 return Ok(SchemaCheck::NeedsRebuild(
3046 "Database missing schema version metadata".to_string(),
3047 ));
3048 }
3049
3050 let version: Option<i64> = conn
3052 .query_row_map(
3053 "SELECT value FROM meta WHERE key = 'schema_version'",
3054 fparams![],
3055 |row| Ok(row.get_typed::<String>(0)?.parse().ok()),
3056 )
3057 .ok()
3058 .flatten();
3059
3060 match version {
3061 Some(v) if v == SCHEMA_VERSION => Ok(SchemaCheck::Compatible),
3062 Some(v) if (MIN_IN_PLACE_MIGRATION_SCHEMA_VERSION..SCHEMA_VERSION).contains(&v) => {
3063 Ok(SchemaCheck::NeedsMigration)
3064 }
3065 Some(v) if v > 0 && v < MIN_IN_PLACE_MIGRATION_SCHEMA_VERSION => {
3066 Ok(SchemaCheck::NeedsRebuild(format!(
3067 "Schema version {} is too old for in-place migration; supported upgrade path starts at version {}",
3068 v, MIN_IN_PLACE_MIGRATION_SCHEMA_VERSION
3069 )))
3070 }
3071 Some(v) => {
3072 Ok(SchemaCheck::NeedsRebuild(format!(
3074 "Schema version {} is newer than supported version {}",
3075 v, SCHEMA_VERSION
3076 )))
3077 }
3078 None => Ok(SchemaCheck::NeedsRebuild(
3079 "Schema version not found or invalid".to_string(),
3080 )),
3081 }
3082 })();
3083
3084 if let Err(close_err) = conn.close_in_place() {
3085 tracing::warn!(
3086 error = %close_err,
3087 db_path = %path.display(),
3088 "check_schema_compatibility: close_in_place failed; falling back to best-effort close"
3089 );
3090 conn.close_best_effort_in_place();
3091 }
3092
3093 result
3094}
3095
3096const SCHEMA_VERSION: i64 = CURRENT_SCHEMA_VERSION;
3097
3098#[cfg(test)]
3099const MIGRATION_V1: &str = r"
3100PRAGMA foreign_keys = ON;
3101
3102CREATE TABLE IF NOT EXISTS meta (
3103 key TEXT PRIMARY KEY,
3104 value TEXT NOT NULL
3105);
3106
3107CREATE TABLE IF NOT EXISTS agents (
3108 id INTEGER PRIMARY KEY,
3109 slug TEXT NOT NULL UNIQUE,
3110 name TEXT NOT NULL,
3111 version TEXT,
3112 kind TEXT NOT NULL,
3113 created_at INTEGER NOT NULL,
3114 updated_at INTEGER NOT NULL
3115);
3116
3117CREATE TABLE IF NOT EXISTS workspaces (
3118 id INTEGER PRIMARY KEY,
3119 path TEXT NOT NULL UNIQUE,
3120 display_name TEXT
3121);
3122
3123CREATE TABLE IF NOT EXISTS conversations (
3124 id INTEGER PRIMARY KEY,
3125 agent_id INTEGER NOT NULL REFERENCES agents(id),
3126 workspace_id INTEGER REFERENCES workspaces(id),
3127 external_id TEXT,
3128 title TEXT,
3129 source_path TEXT NOT NULL,
3130 started_at INTEGER,
3131 ended_at INTEGER,
3132 approx_tokens INTEGER,
3133 metadata_json TEXT,
3134 UNIQUE(agent_id, external_id)
3135);
3136
3137CREATE TABLE IF NOT EXISTS messages (
3138 id INTEGER PRIMARY KEY,
3139 conversation_id INTEGER NOT NULL REFERENCES conversations(id) ON DELETE CASCADE,
3140 idx INTEGER NOT NULL,
3141 role TEXT NOT NULL,
3142 author TEXT,
3143 created_at INTEGER,
3144 content TEXT NOT NULL,
3145 extra_json TEXT,
3146 UNIQUE(conversation_id, idx)
3147);
3148
3149CREATE TABLE IF NOT EXISTS snippets (
3150 id INTEGER PRIMARY KEY,
3151 message_id INTEGER NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
3152 file_path TEXT,
3153 start_line INTEGER,
3154 end_line INTEGER,
3155 language TEXT,
3156 snippet_text TEXT
3157);
3158
3159CREATE TABLE IF NOT EXISTS tags (
3160 id INTEGER PRIMARY KEY,
3161 name TEXT NOT NULL UNIQUE
3162);
3163
3164CREATE TABLE IF NOT EXISTS conversation_tags (
3165 conversation_id INTEGER NOT NULL REFERENCES conversations(id) ON DELETE CASCADE,
3166 tag_id INTEGER NOT NULL REFERENCES tags(id) ON DELETE CASCADE,
3167 PRIMARY KEY (conversation_id, tag_id)
3168);
3169
3170CREATE INDEX IF NOT EXISTS idx_conversations_agent_started
3171 ON conversations(agent_id, started_at DESC);
3172
3173CREATE INDEX IF NOT EXISTS idx_messages_conv_idx
3174 ON messages(conversation_id, idx);
3175
3176";
3177
3178#[cfg(test)]
3179const MIGRATION_V2: &str = r"
3180CREATE VIRTUAL TABLE IF NOT EXISTS fts_messages USING fts5(
3181 content,
3182 title,
3183 agent,
3184 workspace,
3185 source_path,
3186 created_at UNINDEXED,
3187 message_id UNINDEXED,
3188 tokenize='porter'
3189);
3190INSERT INTO fts_messages(content, title, agent, workspace, source_path, created_at, message_id)
3191SELECT
3192 m.content,
3193 c.title,
3194 a.slug,
3195 w.path,
3196 c.source_path,
3197 m.created_at,
3198 m.id
3199FROM messages m
3200JOIN conversations c ON m.conversation_id = c.id
3201JOIN agents a ON c.agent_id = a.id
3202LEFT JOIN workspaces w ON c.workspace_id = w.id;
3203";
3204
3205#[cfg(test)]
3206#[allow(dead_code)]
3207const MIGRATION_V3: &str = r"
3208DROP TABLE IF EXISTS fts_messages;
3209CREATE VIRTUAL TABLE fts_messages USING fts5(
3210 content,
3211 title,
3212 agent,
3213 workspace,
3214 source_path,
3215 created_at UNINDEXED,
3216 message_id UNINDEXED,
3217 tokenize='porter'
3218);
3219INSERT INTO fts_messages(content, title, agent, workspace, source_path, created_at, message_id)
3220SELECT
3221 m.content,
3222 c.title,
3223 a.slug,
3224 w.path,
3225 c.source_path,
3226 m.created_at,
3227 m.id
3228FROM messages m
3229JOIN conversations c ON m.conversation_id = c.id
3230JOIN agents a ON c.agent_id = a.id
3231LEFT JOIN workspaces w ON c.workspace_id = w.id;
3232";
3233
3234#[cfg(test)]
3235const MIGRATION_V4: &str = r"
3236-- Sources table for tracking where conversations come from
3237CREATE TABLE IF NOT EXISTS sources (
3238 id TEXT PRIMARY KEY, -- source_id (e.g., 'local', 'work-laptop')
3239 kind TEXT NOT NULL, -- 'local', 'ssh', etc.
3240 host_label TEXT, -- display label
3241 machine_id TEXT, -- optional stable machine id
3242 platform TEXT, -- 'macos', 'linux', 'windows'
3243 config_json TEXT, -- JSON blob for extra config (SSH params, path rewrites)
3244 created_at INTEGER NOT NULL,
3245 updated_at INTEGER NOT NULL
3246);
3247
3248-- Bootstrap: Insert the default 'local' source
3249INSERT OR IGNORE INTO sources (id, kind, host_label, created_at, updated_at)
3250VALUES ('local', 'local', NULL, strftime('%s','now')*1000, strftime('%s','now')*1000);
3251";
3252
3253#[cfg(test)]
3254const MIGRATION_V5: &str = r"
3255-- Add provenance columns to conversations table
3256-- SQLite cannot alter unique constraints, so we need to recreate the table
3257
3258-- Create new table with provenance columns and updated unique constraint
3259CREATE TABLE conversations_new (
3260 id INTEGER PRIMARY KEY,
3261 agent_id INTEGER NOT NULL REFERENCES agents(id),
3262 workspace_id INTEGER REFERENCES workspaces(id),
3263 source_id TEXT NOT NULL DEFAULT 'local' REFERENCES sources(id),
3264 external_id TEXT,
3265 title TEXT,
3266 source_path TEXT NOT NULL,
3267 started_at INTEGER,
3268 ended_at INTEGER,
3269 approx_tokens INTEGER,
3270 metadata_json TEXT,
3271 origin_host TEXT,
3272 UNIQUE(source_id, agent_id, external_id)
3273);
3274
3275-- Copy data from old table (all existing conversations get source_id='local')
3276INSERT INTO conversations_new (id, agent_id, workspace_id, source_id, external_id, title,
3277 source_path, started_at, ended_at, approx_tokens, metadata_json, origin_host)
3278SELECT id, agent_id, workspace_id, 'local', external_id, title,
3279 source_path, started_at, ended_at, approx_tokens, metadata_json, NULL
3280FROM conversations;
3281
3282-- Drop old table and rename new
3283DROP TABLE conversations;
3284ALTER TABLE conversations_new RENAME TO conversations;
3285
3286-- Recreate indexes
3287CREATE INDEX IF NOT EXISTS idx_conversations_agent_started ON conversations(agent_id, started_at DESC);
3288CREATE INDEX IF NOT EXISTS idx_conversations_source_id ON conversations(source_id);
3289";
3290
3291#[cfg(test)]
3292const MIGRATION_V6: &str = r"
3293-- Optimize lookup by source_path (used by TUI detail view)
3294CREATE INDEX IF NOT EXISTS idx_conversations_source_path ON conversations(source_path);
3295";
3296
3297#[cfg(test)]
3298const MIGRATION_V7: &str = r"
3299-- Add binary columns for MessagePack serialization (Opt 3.1)
3300-- Binary format is 50-70% smaller than JSON and faster to parse
3301ALTER TABLE conversations ADD COLUMN metadata_bin BLOB;
3302ALTER TABLE messages ADD COLUMN extra_bin BLOB;
3303";
3304
3305#[cfg(test)]
3306const MIGRATION_V8: &str = r"
3307-- Opt 3.2: Daily stats materialized table for O(1) time-range histograms
3308-- Provides fast aggregated queries for stats/dashboard without full table scans
3309
3310CREATE TABLE IF NOT EXISTS daily_stats (
3311 day_id INTEGER NOT NULL, -- Days since 2020-01-01 (Unix epoch + offset)
3312 agent_slug TEXT NOT NULL, -- 'all' for totals, or specific agent slug
3313 source_id TEXT NOT NULL DEFAULT 'all', -- 'all' for totals, or specific source
3314 session_count INTEGER NOT NULL DEFAULT 0,
3315 message_count INTEGER NOT NULL DEFAULT 0,
3316 total_chars INTEGER NOT NULL DEFAULT 0,
3317 last_updated INTEGER NOT NULL,
3318 PRIMARY KEY (day_id, agent_slug, source_id)
3319);
3320
3321CREATE INDEX IF NOT EXISTS idx_daily_stats_agent ON daily_stats(agent_slug, day_id);
3322CREATE INDEX IF NOT EXISTS idx_daily_stats_source ON daily_stats(source_id, day_id);
3323";
3324
3325#[cfg(test)]
3326const MIGRATION_V9: &str = r"
3327-- Background embedding jobs tracking table
3328CREATE TABLE IF NOT EXISTS embedding_jobs (
3329 id INTEGER PRIMARY KEY AUTOINCREMENT,
3330 db_path TEXT NOT NULL,
3331 model_id TEXT NOT NULL,
3332 status TEXT NOT NULL DEFAULT 'pending',
3333 total_docs INTEGER NOT NULL DEFAULT 0,
3334 completed_docs INTEGER NOT NULL DEFAULT 0,
3335 error_message TEXT,
3336 created_at TEXT NOT NULL DEFAULT (datetime('now')),
3337 started_at TEXT,
3338 completed_at TEXT
3339);
3340
3341-- Only one pending or running job per (db_path, model_id) at a time.
3342-- Multiple completed/failed/cancelled jobs are allowed for history.
3343CREATE UNIQUE INDEX IF NOT EXISTS idx_embedding_jobs_active
3344ON embedding_jobs(db_path, model_id)
3345WHERE status IN ('pending', 'running');
3346";
3347
3348#[cfg(test)]
3349const MIGRATION_V10: &str = r"
3350-- Token analytics: per-message token usage ledger
3351CREATE TABLE IF NOT EXISTS token_usage (
3352 id INTEGER PRIMARY KEY AUTOINCREMENT,
3353 message_id INTEGER NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
3354 conversation_id INTEGER NOT NULL,
3355 agent_id INTEGER NOT NULL,
3356 workspace_id INTEGER,
3357 source_id TEXT NOT NULL DEFAULT 'local',
3358
3359 -- Timing
3360 timestamp_ms INTEGER NOT NULL,
3361 day_id INTEGER NOT NULL,
3362
3363 -- Model identification
3364 model_name TEXT,
3365 model_family TEXT,
3366 model_tier TEXT,
3367 service_tier TEXT,
3368 provider TEXT,
3369
3370 -- Token counts (nullable — not all agents provide all fields)
3371 input_tokens INTEGER,
3372 output_tokens INTEGER,
3373 cache_read_tokens INTEGER,
3374 cache_creation_tokens INTEGER,
3375 thinking_tokens INTEGER,
3376 total_tokens INTEGER,
3377
3378 -- Cost estimation
3379 estimated_cost_usd REAL,
3380
3381 -- Message context
3382 role TEXT NOT NULL,
3383 content_chars INTEGER NOT NULL,
3384 has_tool_calls INTEGER NOT NULL DEFAULT 0,
3385 tool_call_count INTEGER NOT NULL DEFAULT 0,
3386
3387 -- Data quality
3388 data_source TEXT NOT NULL DEFAULT 'api',
3389
3390 UNIQUE(message_id)
3391);
3392
3393CREATE INDEX IF NOT EXISTS idx_token_usage_day ON token_usage(day_id, agent_id);
3394CREATE INDEX IF NOT EXISTS idx_token_usage_conv ON token_usage(conversation_id);
3395CREATE INDEX IF NOT EXISTS idx_token_usage_model ON token_usage(model_family, day_id);
3396CREATE INDEX IF NOT EXISTS idx_token_usage_workspace ON token_usage(workspace_id, day_id);
3397CREATE INDEX IF NOT EXISTS idx_token_usage_timestamp ON token_usage(timestamp_ms);
3398
3399-- Token analytics: pre-aggregated daily rollups
3400CREATE TABLE IF NOT EXISTS token_daily_stats (
3401 day_id INTEGER NOT NULL,
3402 agent_slug TEXT NOT NULL,
3403 source_id TEXT NOT NULL DEFAULT 'all',
3404 model_family TEXT NOT NULL DEFAULT 'all',
3405
3406 api_call_count INTEGER NOT NULL DEFAULT 0,
3407 user_message_count INTEGER NOT NULL DEFAULT 0,
3408 assistant_message_count INTEGER NOT NULL DEFAULT 0,
3409 tool_message_count INTEGER NOT NULL DEFAULT 0,
3410
3411 total_input_tokens INTEGER NOT NULL DEFAULT 0,
3412 total_output_tokens INTEGER NOT NULL DEFAULT 0,
3413 total_cache_read_tokens INTEGER NOT NULL DEFAULT 0,
3414 total_cache_creation_tokens INTEGER NOT NULL DEFAULT 0,
3415 total_thinking_tokens INTEGER NOT NULL DEFAULT 0,
3416 grand_total_tokens INTEGER NOT NULL DEFAULT 0,
3417
3418 total_content_chars INTEGER NOT NULL DEFAULT 0,
3419 total_tool_calls INTEGER NOT NULL DEFAULT 0,
3420
3421 estimated_cost_usd REAL NOT NULL DEFAULT 0.0,
3422
3423 session_count INTEGER NOT NULL DEFAULT 0,
3424
3425 last_updated INTEGER NOT NULL,
3426
3427 PRIMARY KEY (day_id, agent_slug, source_id, model_family)
3428);
3429
3430CREATE INDEX IF NOT EXISTS idx_token_daily_stats_agent ON token_daily_stats(agent_slug, day_id);
3431CREATE INDEX IF NOT EXISTS idx_token_daily_stats_model ON token_daily_stats(model_family, day_id);
3432
3433-- Model pricing lookup table
3434CREATE TABLE IF NOT EXISTS model_pricing (
3435 model_pattern TEXT NOT NULL,
3436 provider TEXT NOT NULL,
3437 input_cost_per_mtok REAL NOT NULL,
3438 output_cost_per_mtok REAL NOT NULL,
3439 cache_read_cost_per_mtok REAL,
3440 cache_creation_cost_per_mtok REAL,
3441 effective_date TEXT NOT NULL,
3442 PRIMARY KEY (model_pattern, effective_date)
3443);
3444
3445-- Seed with current pricing (as of 2026-02)
3446INSERT OR IGNORE INTO model_pricing VALUES
3447 ('claude-opus-4%', 'anthropic', 15.0, 75.0, 1.5, 18.75, '2025-10-01'),
3448 ('claude-sonnet-4%', 'anthropic', 3.0, 15.0, 0.3, 3.75, '2025-10-01'),
3449 ('claude-haiku-4%', 'anthropic', 0.80, 4.0, 0.08, 1.0, '2025-10-01'),
3450 ('gpt-4o%', 'openai', 2.50, 10.0, NULL, NULL, '2025-01-01'),
3451 ('gpt-4-turbo%', 'openai', 10.0, 30.0, NULL, NULL, '2024-04-01'),
3452 ('gpt-4.1%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
3453 ('o3%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
3454 ('o4-mini%', 'openai', 1.10, 4.40, NULL, NULL, '2025-04-01'),
3455 ('gemini-2%flash%', 'google', 0.075, 0.30, NULL, NULL, '2025-01-01'),
3456 ('gemini-2%pro%', 'google', 1.25, 10.0, NULL, NULL, '2025-01-01');
3457
3458-- Extend conversations table with token summary columns
3459ALTER TABLE conversations ADD COLUMN total_input_tokens INTEGER;
3460ALTER TABLE conversations ADD COLUMN total_output_tokens INTEGER;
3461ALTER TABLE conversations ADD COLUMN total_cache_read_tokens INTEGER;
3462ALTER TABLE conversations ADD COLUMN total_cache_creation_tokens INTEGER;
3463ALTER TABLE conversations ADD COLUMN grand_total_tokens INTEGER;
3464ALTER TABLE conversations ADD COLUMN estimated_cost_usd REAL;
3465ALTER TABLE conversations ADD COLUMN primary_model TEXT;
3466ALTER TABLE conversations ADD COLUMN api_call_count INTEGER;
3467ALTER TABLE conversations ADD COLUMN tool_call_count INTEGER;
3468ALTER TABLE conversations ADD COLUMN user_message_count INTEGER;
3469ALTER TABLE conversations ADD COLUMN assistant_message_count INTEGER;
3470";
3471
3472const MIGRATION_V14: &str = r"
3473-- Switch FTS5 from internal-content to contentless mode (CASS #163).
3474-- Drop the old V13 internal-content fts_messages first so that
3475-- sqlite_schema does not contain two conflicting CREATE VIRTUAL TABLE
3476-- entries, which makes the database completely unreadable.
3477-- The current contentless table is recreated lazily after open() only when the
3478-- frankensqlite FTS consistency check finds it missing or malformed.
3479DROP TABLE IF EXISTS fts_messages;
3480";
3481
3482const MIGRATION_V15_TAIL_STATE_TABLE: &str = r"
3483CREATE TABLE IF NOT EXISTS conversation_tail_state (
3484 -- Deliberately no FOREIGN KEY: this hot row is maintained by insert/append
3485 -- paths, and FK metadata keeps frankensqlite off the direct rowid update path.
3486 conversation_id INTEGER PRIMARY KEY,
3487 ended_at INTEGER,
3488 last_message_idx INTEGER,
3489 last_message_created_at INTEGER
3490);
3491";
3492
3493const MIGRATION_V16: &str = r"
3494-- UNIQUE(conversation_id, idx) already creates sqlite_autoindex_messages_1,
3495-- which covers the same lookup/order key as idx_messages_conv_idx. Keeping both
3496-- doubles message insert index maintenance on the hot indexing path.
3497DROP INDEX IF EXISTS idx_messages_conv_idx;
3498";
3499
3500const MIGRATION_V17: &str = r"
3501-- Drop the global messages(created_at) secondary index from the ingest hot
3502-- path. Search/time filters are served by the derived search layer and
3503-- conversation/analytics indexes, while this index is maintained on every
3504-- message insert.
3505DROP INDEX IF EXISTS idx_messages_created;
3506";
3507
3508const MIGRATION_V18: &str = r"
3509-- Move append-tail state out of the wide, indexed conversations row. The hot
3510-- append path updates this cache for every appended conversation; keeping it in
3511-- a tiny rowid table avoids rewriting the large conversation record.
3512CREATE TABLE IF NOT EXISTS conversation_tail_state (
3513 -- Deliberately no FOREIGN KEY: this hot row is maintained by insert/append
3514 -- paths, and FK metadata keeps frankensqlite off the direct rowid update path.
3515 conversation_id INTEGER PRIMARY KEY,
3516 ended_at INTEGER,
3517 last_message_idx INTEGER,
3518 last_message_created_at INTEGER
3519);
3520
3521INSERT OR REPLACE INTO conversation_tail_state (
3522 conversation_id, ended_at, last_message_idx, last_message_created_at
3523)
3524SELECT id, ended_at, last_message_idx, last_message_created_at
3525FROM conversations
3526WHERE ended_at IS NOT NULL
3527 OR last_message_idx IS NOT NULL
3528 OR last_message_created_at IS NOT NULL;
3529";
3530
3531const MIGRATION_V19: &str = r"
3532-- Materialize external conversation provenance into one compact lookup key.
3533-- This keeps the hot append/new-conversation probe on a single primary-key
3534-- lookup instead of a composite conversations-table predicate.
3535CREATE TABLE IF NOT EXISTS conversation_external_lookup (
3536 lookup_key TEXT PRIMARY KEY,
3537 conversation_id INTEGER NOT NULL
3538);
3539
3540INSERT OR REPLACE INTO conversation_external_lookup (lookup_key, conversation_id)
3541SELECT
3542 CAST(length(source_id) AS TEXT) || ':' || source_id || ':' ||
3543 CAST(agent_id AS TEXT) || ':' ||
3544 CAST(length(external_id) AS TEXT) || ':' || external_id,
3545 id
3546FROM conversations
3547WHERE external_id IS NOT NULL;
3548";
3549
3550const MIGRATION_V20: &str = r"
3551-- Fuse external conversation lookup with append-tail state. Append-heavy
3552-- workloads can resolve both the conversation id and tail plan from one
3553-- primary-key probe.
3554CREATE TABLE IF NOT EXISTS conversation_external_tail_lookup (
3555 lookup_key TEXT PRIMARY KEY,
3556 conversation_id INTEGER NOT NULL,
3557 ended_at INTEGER,
3558 last_message_idx INTEGER,
3559 last_message_created_at INTEGER
3560);
3561
3562INSERT OR REPLACE INTO conversation_external_tail_lookup (
3563 lookup_key,
3564 conversation_id,
3565 ended_at,
3566 last_message_idx,
3567 last_message_created_at
3568)
3569SELECT
3570 CAST(length(c.source_id) AS TEXT) || ':' || c.source_id || ':' ||
3571 CAST(c.agent_id AS TEXT) || ':' ||
3572 CAST(length(c.external_id) AS TEXT) || ':' || c.external_id,
3573 c.id,
3574 (SELECT ts.ended_at
3575 FROM conversation_tail_state ts
3576 WHERE ts.conversation_id = c.id),
3577 (SELECT ts.last_message_idx
3578 FROM conversation_tail_state ts
3579 WHERE ts.conversation_id = c.id),
3580 (SELECT ts.last_message_created_at
3581 FROM conversation_tail_state ts
3582 WHERE ts.conversation_id = c.id)
3583FROM conversations c
3584WHERE c.external_id IS NOT NULL;
3585";
3586
3587#[derive(Debug, Clone)]
3589pub struct EmbeddingJobRow {
3590 pub id: i64,
3591 pub db_path: String,
3592 pub model_id: String,
3593 pub status: String,
3594 pub total_docs: i64,
3595 pub completed_docs: i64,
3596 pub error_message: Option<String>,
3597 pub created_at: String,
3598 pub started_at: Option<String>,
3599 pub completed_at: Option<String>,
3600}
3601
3602#[derive(Debug, Clone)]
3609pub struct LexicalRebuildConversationRow {
3610 pub id: Option<i64>,
3611 pub agent_slug: String,
3612 pub workspace: Option<PathBuf>,
3613 pub external_id: Option<String>,
3614 pub title: Option<String>,
3615 pub source_path: PathBuf,
3616 pub started_at: Option<i64>,
3617 pub ended_at: Option<i64>,
3618 pub source_id: String,
3619 pub origin_host: Option<String>,
3620}
3621
3622#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3625pub struct LexicalRebuildConversationFootprintRow {
3626 pub conversation_id: i64,
3627 pub message_count: usize,
3628 pub message_bytes: usize,
3629}
3630
3631pub(crate) const LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE: usize = 4 * 1024;
3632const LEXICAL_REBUILD_FOOTPRINT_POINT_TAIL_FALLBACK_LIMIT: usize = 64;
3633
3634fn lexical_rebuild_tail_metadata_coverage_is_sufficient(
3635 total_conversations: usize,
3636 covered_conversations: usize,
3637) -> bool {
3638 total_conversations == 0
3639 || total_conversations.saturating_sub(covered_conversations.min(total_conversations))
3640 <= LEXICAL_REBUILD_FOOTPRINT_POINT_TAIL_FALLBACK_LIMIT
3641}
3642
3643fn lexical_rebuild_message_count_from_tail_idx(last_message_idx: Option<i64>) -> Option<usize> {
3644 let last_message_idx = u64::try_from(last_message_idx?).ok()?;
3645 let high_water = last_message_idx.checked_add(1)?;
3646 usize::try_from(high_water).ok()
3647}
3648
3649fn lexical_rebuild_conversation_footprint_from_count(
3650 conversation_id: i64,
3651 message_count: usize,
3652) -> LexicalRebuildConversationFootprintRow {
3653 LexicalRebuildConversationFootprintRow {
3654 conversation_id,
3655 message_count,
3656 message_bytes: message_count
3657 .saturating_mul(LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE),
3658 }
3659}
3660
3661#[derive(Debug, Clone)]
3663pub struct LexicalRebuildMessageRow {
3664 pub conversation_id: i64,
3665 pub id: i64,
3666 pub idx: i64,
3667 pub role: String,
3668 pub author: Option<String>,
3669 pub created_at: Option<i64>,
3670 pub content: String,
3671}
3672
3673#[derive(Debug, Clone, PartialEq, Eq)]
3677pub struct LexicalRebuildGroupedMessageRow {
3678 pub idx: i64,
3679 pub is_tool_role: bool,
3680 pub created_at: Option<i64>,
3681 pub content: String,
3682}
3683
3684pub type LexicalRebuildGroupedMessageRows = SmallVec<[LexicalRebuildGroupedMessageRow; 32]>;
3685
3686pub type SqliteStorage = FrankenStorage;
3688
3689pub struct FrankenStorage {
3691 conn: FrankenConnection,
3692 db_path: PathBuf,
3693 ephemeral_writer_preflight_verified: AtomicBool,
3694 index_writer_checkpoint_pages: AtomicI64,
3695 index_writer_busy_timeout_ms: AtomicU64,
3696 cached_ephemeral_writer: parking_lot::Mutex<CachedEphemeralWriter>,
3697 ensured_agents: Arc<parking_lot::Mutex<HashMap<EnsuredAgentKey, i64>>>,
3698 ensured_workspaces: Arc<parking_lot::Mutex<HashMap<EnsuredWorkspaceKey, i64>>>,
3699 ensured_conversation_sources: Arc<parking_lot::Mutex<HashSet<EnsuredConversationSourceKey>>>,
3700 ensured_daily_stats_keys: Arc<parking_lot::Mutex<HashSet<EnsuredDailyStatsKey>>>,
3701 fts_messages_present_cache: AtomicI8,
3702}
3703
3704const DEFAULT_WAL_AUTOCHECKPOINT_PAGES: i64 = 4096;
3708const UNSET_INDEX_WRITER_CHECKPOINT_PAGES: i64 = i64::MIN;
3709const UNSET_INDEX_WRITER_BUSY_TIMEOUT_MS: u64 = 0;
3710const FTS_MESSAGES_PRESENT_UNKNOWN: i8 = 0;
3711const FTS_MESSAGES_PRESENT_ABSENT: i8 = 1;
3712const FTS_MESSAGES_PRESENT_PRESENT: i8 = 2;
3713
3714enum CachedEphemeralWriter {
3715 Uninitialized,
3716 Cached(Box<SendFrankenConnection>),
3717 InUse,
3718}
3719
3720#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3721struct EnsuredAgentKey {
3722 slug: String,
3723 name: String,
3724 version: Option<String>,
3725 kind: String,
3726}
3727
3728impl EnsuredAgentKey {
3729 fn from_agent(agent: &Agent) -> Self {
3730 Self {
3731 slug: agent.slug.clone(),
3732 name: agent.name.clone(),
3733 version: agent.version.clone(),
3734 kind: agent_kind_str(agent.kind.clone()),
3735 }
3736 }
3737}
3738
3739#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3740struct EnsuredWorkspaceKey {
3741 path: String,
3742 display_name: Option<String>,
3743}
3744
3745impl EnsuredWorkspaceKey {
3746 fn new(path: String, display_name: Option<&str>) -> Self {
3747 Self {
3748 path,
3749 display_name: display_name.map(str::to_owned),
3750 }
3751 }
3752}
3753
3754#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3755struct EnsuredConversationSourceKey {
3756 id: String,
3757 kind: SourceKind,
3758 host_label: Option<String>,
3759}
3760
3761impl EnsuredConversationSourceKey {
3762 fn from_source(source: &Source) -> Self {
3763 Self {
3764 id: source.id.clone(),
3765 kind: source.kind,
3766 host_label: source.host_label.clone(),
3767 }
3768 }
3769}
3770
3771#[derive(Debug, Clone, PartialEq, Eq, Hash)]
3772struct EnsuredDailyStatsKey {
3773 day_id: i64,
3774 agent_slug: String,
3775 source_id: String,
3776}
3777
3778impl EnsuredDailyStatsKey {
3779 fn new(day_id: i64, agent_slug: &str, source_id: &str) -> Self {
3780 Self {
3781 day_id,
3782 agent_slug: agent_slug.to_owned(),
3783 source_id: source_id.to_owned(),
3784 }
3785 }
3786}
3787
3788const AUTOCOMMIT_RETAIN_OFF_PRAGMAS: [&str; 2] = [
3789 "PRAGMA fsqlite.autocommit_retain = OFF;",
3790 "PRAGMA autocommit_retain = OFF;",
3791];
3792
3793fn disable_autocommit_retain<E>(
3794 mut execute: impl FnMut(&'static str) -> std::result::Result<(), E>,
3795) -> Result<&'static str>
3796where
3797 E: std::fmt::Display,
3798{
3799 let mut failures = Vec::new();
3800 for pragma in AUTOCOMMIT_RETAIN_OFF_PRAGMAS {
3801 match execute(pragma) {
3802 Ok(()) => return Ok(pragma),
3803 Err(err) => {
3804 let error = err.to_string();
3805 tracing::debug!(
3806 %pragma,
3807 error = %error,
3808 "autocommit_retain PRAGMA variant not supported"
3809 );
3810 failures.push(format!("{pragma}: {error}"));
3811 }
3812 }
3813 }
3814
3815 Err(anyhow!(
3816 "failed to disable autocommit_retain on frankensqlite connection; \
3817 refusing to keep a long-lived MVCC connection that may accumulate \
3818 unbounded write snapshots. Upgrade frankensqlite to a version that \
3819 supports one of these PRAGMAs or use a short-lived connection path. \
3820 attempts: {}",
3821 failures.join("; ")
3822 ))
3823}
3824
3825impl FrankenStorage {
3826 fn new(conn: FrankenConnection, db_path: PathBuf) -> Self {
3827 Self::new_with_shared_caches(
3828 conn,
3829 db_path,
3830 Arc::new(parking_lot::Mutex::new(HashMap::new())),
3831 Arc::new(parking_lot::Mutex::new(HashMap::new())),
3832 Arc::new(parking_lot::Mutex::new(HashSet::new())),
3833 Arc::new(parking_lot::Mutex::new(HashSet::new())),
3834 )
3835 }
3836
3837 fn new_with_shared_caches(
3838 conn: FrankenConnection,
3839 db_path: PathBuf,
3840 ensured_agents: Arc<parking_lot::Mutex<HashMap<EnsuredAgentKey, i64>>>,
3841 ensured_workspaces: Arc<parking_lot::Mutex<HashMap<EnsuredWorkspaceKey, i64>>>,
3842 ensured_conversation_sources: Arc<
3843 parking_lot::Mutex<HashSet<EnsuredConversationSourceKey>>,
3844 >,
3845 ensured_daily_stats_keys: Arc<parking_lot::Mutex<HashSet<EnsuredDailyStatsKey>>>,
3846 ) -> Self {
3847 Self {
3848 conn,
3849 db_path,
3850 ephemeral_writer_preflight_verified: AtomicBool::new(false),
3851 index_writer_checkpoint_pages: AtomicI64::new(UNSET_INDEX_WRITER_CHECKPOINT_PAGES),
3852 index_writer_busy_timeout_ms: AtomicU64::new(UNSET_INDEX_WRITER_BUSY_TIMEOUT_MS),
3853 cached_ephemeral_writer: parking_lot::Mutex::new(CachedEphemeralWriter::Uninitialized),
3854 ensured_agents,
3855 ensured_workspaces,
3856 ensured_conversation_sources,
3857 ensured_daily_stats_keys,
3858 fts_messages_present_cache: AtomicI8::new(FTS_MESSAGES_PRESENT_UNKNOWN),
3859 }
3860 }
3861
3862 fn apply_open_stage_busy_timeout(&self) {
3863 if let Err(err) = self.conn.execute("PRAGMA busy_timeout = 5000;") {
3864 tracing::debug!(
3865 error = %err,
3866 "failed to apply open-stage busy_timeout before migrations"
3867 );
3868 }
3869 }
3870
3871 pub fn open(path: &Path) -> Result<Self> {
3877 if let Some(parent) = path.parent() {
3878 fs::create_dir_all(parent)
3879 .with_context(|| format!("creating db directory {}", parent.display()))?;
3880 }
3881
3882 let path_str = path.to_string_lossy().to_string();
3883 let _doctor_guard =
3884 acquire_doctor_mutation_db_open_guard(path, DOCTOR_MUTATION_DB_OPEN_LOCK_TIMEOUT)?;
3885 let conn = FrankenConnection::open(&path_str)
3886 .with_context(|| format!("opening frankensqlite db at {}", path.display()))?;
3887 let storage = Self::new(conn, path.to_path_buf());
3888 storage.apply_open_stage_busy_timeout();
3889 storage.run_migrations()?;
3890 storage.repair_missing_current_schema_objects()?;
3891 storage.apply_config()?;
3892 Ok(storage)
3893 }
3894
3895 pub fn open_writer(path: &Path) -> Result<Self> {
3901 Self::open_writer_with_shared_caches(
3902 path,
3903 Arc::new(parking_lot::Mutex::new(HashMap::new())),
3904 Arc::new(parking_lot::Mutex::new(HashMap::new())),
3905 Arc::new(parking_lot::Mutex::new(HashSet::new())),
3906 Arc::new(parking_lot::Mutex::new(HashSet::new())),
3907 )
3908 }
3909
3910 fn open_writer_with_shared_caches(
3911 path: &Path,
3912 ensured_agents: Arc<parking_lot::Mutex<HashMap<EnsuredAgentKey, i64>>>,
3913 ensured_workspaces: Arc<parking_lot::Mutex<HashMap<EnsuredWorkspaceKey, i64>>>,
3914 ensured_conversation_sources: Arc<
3915 parking_lot::Mutex<HashSet<EnsuredConversationSourceKey>>,
3916 >,
3917 ensured_daily_stats_keys: Arc<parking_lot::Mutex<HashSet<EnsuredDailyStatsKey>>>,
3918 ) -> Result<Self> {
3919 let path_str = path.to_string_lossy().to_string();
3920 let _doctor_guard =
3921 acquire_doctor_mutation_db_open_guard(path, DOCTOR_MUTATION_DB_OPEN_LOCK_TIMEOUT)?;
3922 let conn = FrankenConnection::open(&path_str)
3923 .with_context(|| format!("opening frankensqlite writer at {}", path.display()))?;
3924 let storage = Self::new_with_shared_caches(
3925 conn,
3926 path.to_path_buf(),
3927 ensured_agents,
3928 ensured_workspaces,
3929 ensured_conversation_sources,
3930 ensured_daily_stats_keys,
3931 );
3932 storage.apply_config()?;
3933 Ok(storage)
3934 }
3935
3936 pub(crate) fn acquire_cached_ephemeral_writer(&self) -> Result<(Self, bool)> {
3937 let mut cached = self.cached_ephemeral_writer.lock();
3938 match std::mem::replace(&mut *cached, CachedEphemeralWriter::InUse) {
3939 CachedEphemeralWriter::Cached(conn) => {
3940 let (conn, checkpoint_pages, busy_timeout_ms) = (*conn).into_parts();
3941 let writer = Self::new_with_shared_caches(
3942 conn,
3943 self.db_path.clone(),
3944 Arc::clone(&self.ensured_agents),
3945 Arc::clone(&self.ensured_workspaces),
3946 Arc::clone(&self.ensured_conversation_sources),
3947 Arc::clone(&self.ensured_daily_stats_keys),
3948 );
3949 writer
3950 .index_writer_checkpoint_pages
3951 .store(checkpoint_pages, Ordering::Relaxed);
3952 writer
3953 .index_writer_busy_timeout_ms
3954 .store(busy_timeout_ms, Ordering::Relaxed);
3955 Ok((writer, true))
3956 }
3957 CachedEphemeralWriter::Uninitialized => {
3958 drop(cached);
3959 match Self::open_writer_with_shared_caches(
3960 &self.db_path,
3961 Arc::clone(&self.ensured_agents),
3962 Arc::clone(&self.ensured_workspaces),
3963 Arc::clone(&self.ensured_conversation_sources),
3964 Arc::clone(&self.ensured_daily_stats_keys),
3965 ) {
3966 Ok(writer) => Ok((writer, true)),
3967 Err(err) => {
3968 let mut cached = self.cached_ephemeral_writer.lock();
3969 if matches!(&*cached, CachedEphemeralWriter::InUse) {
3970 *cached = CachedEphemeralWriter::Uninitialized;
3971 }
3972 Err(err)
3973 }
3974 }
3975 }
3976 CachedEphemeralWriter::InUse => {
3977 *cached = CachedEphemeralWriter::InUse;
3978 drop(cached);
3979 Ok((
3980 Self::open_writer_with_shared_caches(
3981 &self.db_path,
3982 Arc::clone(&self.ensured_agents),
3983 Arc::clone(&self.ensured_workspaces),
3984 Arc::clone(&self.ensured_conversation_sources),
3985 Arc::clone(&self.ensured_daily_stats_keys),
3986 )?,
3987 false,
3988 ))
3989 }
3990 }
3991 }
3992
3993 pub(crate) fn release_cached_ephemeral_writer(&self, writer: Self) {
3994 let checkpoint_pages = writer.index_writer_checkpoint_pages.load(Ordering::Relaxed);
3995 let busy_timeout_ms = writer.index_writer_busy_timeout_ms.load(Ordering::Relaxed);
3996 let conn = writer.into_raw();
3997 let mut cached = self.cached_ephemeral_writer.lock();
3998 debug_assert!(
3999 matches!(&*cached, CachedEphemeralWriter::InUse),
4000 "cached ephemeral writer state should be in-use when releasing"
4001 );
4002 *cached = CachedEphemeralWriter::Cached(Box::new(
4003 SendFrankenConnection::new_with_index_writer_state(
4004 conn,
4005 checkpoint_pages,
4006 busy_timeout_ms,
4007 ),
4008 ));
4009 }
4010
4011 pub(crate) fn discard_cached_ephemeral_writer(&self, mut writer: Self) {
4012 writer.close_best_effort_in_place();
4013 let mut cached = self.cached_ephemeral_writer.lock();
4014 if matches!(&*cached, CachedEphemeralWriter::InUse) {
4015 *cached = CachedEphemeralWriter::Uninitialized;
4016 }
4017 }
4018
4019 fn cached_agent_id(&self, key: &EnsuredAgentKey) -> Option<i64> {
4020 self.ensured_agents.lock().get(key).copied()
4021 }
4022
4023 fn mark_agent_ensured(&self, key: EnsuredAgentKey, id: i64) {
4024 self.ensured_agents.lock().insert(key, id);
4025 }
4026
4027 fn cached_workspace_id(&self, key: &EnsuredWorkspaceKey) -> Option<i64> {
4028 self.ensured_workspaces.lock().get(key).copied()
4029 }
4030
4031 fn mark_workspace_ensured(&self, key: EnsuredWorkspaceKey, id: i64) {
4032 self.ensured_workspaces.lock().insert(key, id);
4033 }
4034
4035 fn conversation_source_already_ensured(&self, key: &EnsuredConversationSourceKey) -> bool {
4036 self.ensured_conversation_sources.lock().contains(key)
4037 }
4038
4039 fn mark_conversation_source_ensured(&self, key: EnsuredConversationSourceKey) {
4040 self.ensured_conversation_sources.lock().insert(key);
4041 }
4042
4043 fn daily_stats_key_already_ensured(&self, key: &EnsuredDailyStatsKey) -> bool {
4044 self.ensured_daily_stats_keys.lock().contains(key)
4045 }
4046
4047 fn daily_stats_keys_already_ensured(&self, keys: &[EnsuredDailyStatsKey; 4]) -> bool {
4048 let ensured = self.ensured_daily_stats_keys.lock();
4049 keys.iter().all(|key| ensured.contains(key))
4050 }
4051
4052 fn mark_daily_stats_key_ensured(&self, key: EnsuredDailyStatsKey) {
4053 self.ensured_daily_stats_keys.lock().insert(key);
4054 }
4055
4056 fn fts_messages_present_cached(&self, tx: &FrankenTransaction<'_>) -> bool {
4057 match self.fts_messages_present_cache.load(Ordering::Acquire) {
4058 FTS_MESSAGES_PRESENT_PRESENT => return true,
4059 FTS_MESSAGES_PRESENT_ABSENT => return false,
4060 _ => {}
4061 }
4062
4063 let present = tx
4064 .query_row_map(
4065 "SELECT COUNT(*) FROM sqlite_master
4066 WHERE name = 'fts_messages'
4067 AND rootpage > 0",
4068 fparams![],
4069 |row| row.get_typed::<i64>(0),
4070 )
4071 .map(|count| count > 0)
4072 .unwrap_or_else(|err| {
4073 tracing::debug!(
4074 error = %err,
4075 "failed to probe fts_messages presence; skipping db-resident FTS maintenance"
4076 );
4077 false
4078 });
4079 self.set_fts_messages_present_cache(present);
4080 present
4081 }
4082
4083 fn set_fts_messages_present_cache(&self, present: bool) {
4084 self.fts_messages_present_cache.store(
4085 if present {
4086 FTS_MESSAGES_PRESENT_PRESENT
4087 } else {
4088 FTS_MESSAGES_PRESENT_ABSENT
4089 },
4090 Ordering::Release,
4091 );
4092 }
4093
4094 fn invalidate_fts_messages_present_cache(&self) {
4095 self.fts_messages_present_cache
4096 .store(FTS_MESSAGES_PRESENT_UNKNOWN, Ordering::Release);
4097 }
4098
4099 fn invalidate_conversation_source_cache(&self, source_id: &str) {
4100 self.ensured_conversation_sources
4101 .lock()
4102 .retain(|key| key.id != source_id);
4103 }
4104
4105 fn close_cached_ephemeral_writer_best_effort_in_place(&mut self) {
4106 let cached = self.cached_ephemeral_writer.get_mut();
4107 if let CachedEphemeralWriter::Cached(conn) =
4108 std::mem::replace(cached, CachedEphemeralWriter::Uninitialized)
4109 {
4110 let mut conn = conn;
4111 conn.0.close_best_effort_in_place();
4112 }
4113 }
4114
4115 fn close_cached_ephemeral_writer_without_checkpoint_in_place(&mut self) -> Result<()> {
4116 let cached = self.cached_ephemeral_writer.get_mut();
4117 match std::mem::replace(cached, CachedEphemeralWriter::Uninitialized) {
4118 CachedEphemeralWriter::Cached(mut conn) => conn
4119 .0
4120 .close_without_checkpoint_in_place()
4121 .with_context(|| "closing cached frankensqlite writer without final checkpoint"),
4122 CachedEphemeralWriter::Uninitialized | CachedEphemeralWriter::InUse => Ok(()),
4123 }
4124 }
4125
4126 pub fn open_readonly(path: &Path) -> Result<Self> {
4128 Self::open_readonly_with_doctor_lock_timeout(path, DOCTOR_MUTATION_DB_OPEN_LOCK_TIMEOUT)
4129 }
4130
4131 pub fn open_readonly_with_doctor_lock_timeout(path: &Path, timeout: Duration) -> Result<Self> {
4136 let path_str = path.to_string_lossy().to_string();
4137 let _doctor_guard = acquire_doctor_mutation_db_open_guard(path, timeout)?;
4138 let conn = open_franken_with_flags(&path_str, FrankenOpenFlags::SQLITE_OPEN_READ_ONLY)
4139 .with_context(|| format!("opening frankensqlite db readonly at {}", path.display()))?;
4140 let storage = Self::new(conn, path.to_path_buf());
4141 storage.apply_readonly_config()?;
4142 Ok(storage)
4143 }
4144
4145 pub fn close(self) -> Result<()> {
4146 let mut this = self;
4147 this.close_cached_ephemeral_writer_best_effort_in_place();
4148 this.conn
4149 .close()
4150 .with_context(|| "closing frankensqlite connection")
4151 }
4152
4153 pub fn close_without_checkpoint(self) -> Result<()> {
4154 let mut this = self;
4155 this.close_cached_ephemeral_writer_without_checkpoint_in_place()?;
4156 this.conn
4157 .close_without_checkpoint()
4158 .with_context(|| "closing frankensqlite connection without final checkpoint")
4159 }
4160
4161 pub fn close_best_effort_in_place(&mut self) {
4162 self.close_cached_ephemeral_writer_best_effort_in_place();
4163 self.conn.close_best_effort_in_place();
4164 }
4165
4166 pub fn close_without_checkpoint_in_place(&mut self) -> Result<()> {
4167 self.close_cached_ephemeral_writer_without_checkpoint_in_place()?;
4168 self.conn
4169 .close_without_checkpoint_in_place()
4170 .with_context(|| "closing frankensqlite connection without final checkpoint")
4171 }
4172
4173 pub fn raw(&self) -> &FrankenConnection {
4175 &self.conn
4176 }
4177
4178 pub fn into_raw(self) -> FrankenConnection {
4181 let mut this = self;
4182 this.close_cached_ephemeral_writer_best_effort_in_place();
4183 this.conn
4184 }
4185
4186 pub fn apply_config(&self) -> Result<()> {
4193 self.conn
4197 .execute("PRAGMA journal_mode = WAL;")
4198 .with_context(|| "setting journal_mode")?;
4199 self.conn
4200 .execute("PRAGMA synchronous = NORMAL;")
4201 .with_context(|| "setting synchronous")?;
4202
4203 self.conn
4205 .execute("PRAGMA cache_size = -65536;")
4206 .with_context(|| "setting cache_size")?;
4207
4208 self.conn
4210 .execute("PRAGMA foreign_keys = ON;")
4211 .with_context(|| "setting foreign_keys")?;
4212
4213 self.conn
4215 .execute("PRAGMA busy_timeout = 5000;")
4216 .with_context(|| "setting busy_timeout")?;
4217
4218 let checkpoint_pragma =
4226 format!("PRAGMA wal_autocheckpoint = {DEFAULT_WAL_AUTOCHECKPOINT_PAGES};");
4227 let _ = self.conn.execute(&checkpoint_pragma);
4228 self.index_writer_checkpoint_pages
4229 .store(DEFAULT_WAL_AUTOCHECKPOINT_PAGES, Ordering::Relaxed);
4230 let _ = self.conn.execute("PRAGMA fsqlite.concurrent_mode = ON;");
4233 let _ = self.conn.execute("PRAGMA concurrent_mode = ON;");
4234 let autocommit_pragma =
4245 disable_autocommit_retain(|pragma| self.conn.execute(pragma).map(|_| ()))?;
4246 tracing::debug!(
4247 pragma = autocommit_pragma,
4248 "disabled frankensqlite autocommit_retain for storage connection"
4249 );
4250
4251 Ok(())
4252 }
4253
4254 fn apply_readonly_config(&self) -> Result<()> {
4255 self.conn
4256 .execute("PRAGMA query_only = 1;")
4257 .with_context(|| "setting query_only")?;
4258 self.conn
4259 .execute("PRAGMA busy_timeout = 5000;")
4260 .with_context(|| "setting busy_timeout")?;
4261 self.conn
4262 .execute("PRAGMA cache_size = -65536;")
4263 .with_context(|| "setting cache_size")?;
4264 self.conn
4265 .execute("PRAGMA foreign_keys = ON;")
4266 .with_context(|| "setting foreign_keys")?;
4267 Ok(())
4268 }
4269
4270 pub fn run_migrations(&self) -> Result<()> {
4288 transition_from_meta_version(&self.conn)?;
4289
4290 let base_result = build_cass_migrations_before_tail_cache()
4291 .run(&self.conn)
4292 .with_context(|| "running base schema migrations")?;
4293
4294 let mut applied = base_result.applied;
4295 if apply_conversation_tail_state_cache_migration(&self.conn)
4296 .with_context(|| "running conversation tail-state cache migration")?
4297 {
4298 applied.push(15);
4299 }
4300
4301 let post_result = build_cass_migrations_after_tail_cache()
4302 .run(&self.conn)
4303 .with_context(|| "running post-tail-cache schema migrations")?;
4304 applied.extend(post_result.applied);
4305
4306 let current = self.schema_version()?;
4307 if !applied.is_empty() {
4308 info!(
4309 applied = ?applied,
4310 current,
4311 was_fresh = base_result.was_fresh,
4312 "frankensqlite schema migrations applied"
4313 );
4314 }
4315
4316 self.sync_meta_schema_version(current)?;
4318
4319 Ok(())
4320 }
4321
4322 fn repair_missing_current_schema_objects(&self) -> Result<()> {
4327 let mut missing_tables = Vec::new();
4328 for &(table_name, probe_sql) in REQUIRED_CURRENT_SCHEMA_TABLE_PROBES {
4329 if let Err(err) = self.conn.query(probe_sql) {
4330 if error_indicates_missing_table(&err) {
4331 missing_tables.push(table_name);
4332 continue;
4333 }
4334 return Err(err).with_context(|| {
4335 format!("probing required schema table {table_name} for completeness")
4336 });
4337 }
4338 }
4339
4340 if !missing_tables.is_empty() {
4341 info!(
4342 missing_tables = ?missing_tables,
4343 "repairing missing current-schema tables on an already-versioned cass database"
4344 );
4345
4346 for batch in current_schema_repair_batches_for_missing_tables(&missing_tables)? {
4347 self.conn
4348 .execute_batch(batch.sql)
4349 .with_context(|| format!("repairing current-schema batch {}", batch.name))?;
4350 }
4351
4352 for &(table_name, probe_sql) in REQUIRED_CURRENT_SCHEMA_TABLE_PROBES {
4353 if !missing_tables.contains(&table_name) {
4354 continue;
4355 }
4356 self.conn
4357 .query(probe_sql)
4358 .with_context(|| format!("verifying repaired schema table {table_name}"))?;
4359 }
4360 }
4361 self.repair_missing_conversation_token_columns()?;
4362 Ok(())
4363 }
4364
4365 fn repair_missing_conversation_token_columns(&self) -> Result<()> {
4366 let columns = franken_table_column_names(&self.conn, "conversations")
4367 .with_context(|| "inspecting conversations columns for token-summary repair")?;
4368 let mut missing_columns = Vec::new();
4369 for &(column_name, column_type) in REQUIRED_CONVERSATION_TOKEN_COLUMNS {
4370 if columns.contains(column_name) {
4371 continue;
4372 }
4373 let sql = format!("ALTER TABLE conversations ADD COLUMN {column_name} {column_type};");
4374 self.conn.execute(&sql).with_context(|| {
4375 format!("adding missing conversations.{column_name} token-summary column")
4376 })?;
4377 missing_columns.push(column_name);
4378 }
4379 if !missing_columns.is_empty() {
4380 tracing::warn!(
4381 target: "cass::schema_repair",
4382 db_path = %self.db_path.display(),
4383 missing_columns = ?missing_columns,
4384 "cass#222: repaired missing conversations token-summary columns"
4385 );
4386 }
4387 Ok(())
4388 }
4389
4390 pub(crate) fn cleanup_orphan_fk_rows(&self) -> Result<OrphanFkCleanupReport> {
4409 let mut report = OrphanFkCleanupReport::default();
4410 let orphan_message_ids = match collect_orphan_message_ids(&self.conn) {
4411 Ok(ids) => ids,
4412 Err(err) if error_indicates_missing_table(&err) => {
4413 tracing::debug!(
4414 target: "cass::fk_repair",
4415 child_table = "messages",
4416 error = %err,
4417 "skipping orphan-message probe (table or column unavailable)"
4418 );
4419 Vec::new()
4420 }
4421 Err(err) => return Err(err),
4422 };
4423 if !orphan_message_ids.is_empty() {
4424 report.record("messages", orphan_message_ids.len() as i64);
4425 }
4426
4427 if !orphan_message_ids.is_empty() {
4428 delete_orphan_message_ids_bisecting_oom(&self.conn, &orphan_message_ids)
4429 .context("deleting orphan message rows and dependent children")?;
4430 }
4431
4432 for entry in ORPHAN_DIRECT_CHILD_TABLES {
4433 loop {
4434 let ids = match collect_direct_orphan_id_page(&self.conn, entry) {
4435 Ok(ids) => ids,
4436 Err(err)
4437 if error_indicates_missing_table(&err)
4438 || error_indicates_missing_column(&err) =>
4439 {
4440 tracing::debug!(
4444 target: "cass::fk_repair",
4445 child_table = entry.child_table,
4446 error = %err,
4447 "skipping orphan probe (table or column unavailable)"
4448 );
4449 break;
4450 }
4451 Err(err) => {
4452 return Err(err).with_context(|| {
4453 format!("probing orphan rows in {}", entry.child_table)
4454 });
4455 }
4456 };
4457 if ids.is_empty() {
4458 break;
4459 }
4460
4461 let deleted = delete_direct_orphan_ids_bisecting_oom(&self.conn, entry, &ids)
4462 .with_context(|| format!("deleting orphan rows from {}", entry.child_table))?;
4463 if deleted == 0 {
4464 break;
4465 }
4466 report.record(
4467 entry.child_table,
4468 i64::try_from(deleted).unwrap_or(i64::MAX),
4469 );
4470 }
4471 }
4472
4473 if report.total == 0 {
4474 return Ok(report);
4475 }
4476
4477 tracing::warn!(
4482 target: "cass::fk_repair",
4483 db_path = %self.db_path.display(),
4484 total_orphans = report.total,
4485 per_table = ?report.per_table,
4486 "cass#202: removed orphan rows left behind by interrupted index transactions"
4487 );
4488
4489 Ok(report)
4490 }
4491
4492 pub fn schema_version(&self) -> Result<i64> {
4494 let rows = self
4495 .conn
4496 .query("SELECT MAX(version) FROM _schema_migrations;")
4497 .with_context(|| "reading schema version from _schema_migrations")?;
4498
4499 if let Some(row) = rows.first()
4500 && let Ok(v) = row.get_typed::<Option<i64>>(0)
4501 {
4502 return Ok(v.unwrap_or(0));
4503 }
4504 Ok(0)
4505 }
4506
4507 fn sync_meta_schema_version(&self, version: i64) -> Result<()> {
4509 if self.conn.query("SELECT key FROM meta LIMIT 1;").is_err() {
4512 return Ok(());
4513 }
4514
4515 if let Ok(rows) = self
4517 .conn
4518 .query("SELECT value FROM meta WHERE key = 'schema_version';")
4519 && let Some(row) = rows.first()
4520 && let Ok(val) = row.get_typed::<String>(0)
4521 && val == version.to_string()
4522 {
4523 return Ok(()); }
4525
4526 self.conn
4527 .execute_compat(
4528 "INSERT OR REPLACE INTO meta(key, value) VALUES('schema_version', ?1);",
4529 &[ParamValue::from(version.to_string())],
4530 )
4531 .with_context(|| "syncing meta schema_version")?;
4532
4533 Ok(())
4534 }
4535
4536 pub fn database_path(&self) -> Result<PathBuf> {
4538 Ok(self.db_path.clone())
4539 }
4540
4541 pub(crate) fn ephemeral_writer_preflight_verified(&self) -> bool {
4542 self.ephemeral_writer_preflight_verified
4543 .load(Ordering::Relaxed)
4544 }
4545
4546 pub(crate) fn mark_ephemeral_writer_preflight_verified(&self) {
4547 self.ephemeral_writer_preflight_verified
4548 .store(true, Ordering::Relaxed);
4549 }
4550
4551 pub(crate) fn index_writer_checkpoint_pages(&self) -> Option<i64> {
4552 let pages = self.index_writer_checkpoint_pages.load(Ordering::Relaxed);
4553 (pages != UNSET_INDEX_WRITER_CHECKPOINT_PAGES).then_some(pages)
4554 }
4555
4556 pub(crate) fn mark_index_writer_checkpoint_pages(&self, pages: i64) {
4557 self.index_writer_checkpoint_pages
4558 .store(pages, Ordering::Relaxed);
4559 }
4560
4561 pub(crate) fn index_writer_busy_timeout_ms(&self) -> Option<u64> {
4562 let timeout_ms = self.index_writer_busy_timeout_ms.load(Ordering::Relaxed);
4563 (timeout_ms != UNSET_INDEX_WRITER_BUSY_TIMEOUT_MS).then_some(timeout_ms)
4564 }
4565
4566 pub(crate) fn mark_index_writer_busy_timeout_ms(&self, timeout_ms: u64) {
4567 self.index_writer_busy_timeout_ms
4568 .store(timeout_ms, Ordering::Relaxed);
4569 }
4570
4571 pub fn open_or_rebuild(path: &Path) -> std::result::Result<Self, MigrationError> {
4573 if let Some(parent) = path.parent() {
4574 fs::create_dir_all(parent)?;
4575 }
4576
4577 if path.exists() {
4578 let check_result = check_schema_compatibility(path);
4579 match check_result {
4580 Ok(SchemaCheck::Compatible) | Ok(SchemaCheck::NeedsMigration) => {
4581 }
4583 Ok(SchemaCheck::NeedsRebuild(reason)) => {
4584 let backup_path = create_backup(path)?;
4585 cleanup_old_backups(path, MAX_BACKUPS)?;
4586 remove_database_files(path)?;
4587 return Err(MigrationError::RebuildRequired {
4588 reason,
4589 backup_path,
4590 });
4591 }
4592 Err(err) if schema_check_error_requires_rebuild(&err) => {
4593 let backup_path = create_backup(path)?;
4594 cleanup_old_backups(path, MAX_BACKUPS)?;
4595 remove_database_files(path)?;
4596 return Err(MigrationError::RebuildRequired {
4597 reason: format!("Database appears corrupted: {err}"),
4598 backup_path,
4599 });
4600 }
4601 Err(err) => return Err(MigrationError::Database(err)),
4602 }
4603 }
4604
4605 let storage = Self::open(path).map_err(|e| MigrationError::Other(e.to_string()))?;
4606 Ok(storage)
4607 }
4608}
4609
4610fn build_cass_migrations_before_tail_cache() -> MigrationRunner {
4626 MigrationRunner::new()
4627 .add(13, "full_schema_v13", MIGRATION_FRESH_SCHEMA)
4628 .add(14, "fts_contentless", MIGRATION_V14)
4629}
4630
4631fn build_cass_migrations_after_tail_cache() -> MigrationRunner {
4632 MigrationRunner::new()
4633 .add(16, "drop_redundant_message_conv_idx", MIGRATION_V16)
4634 .add(17, "drop_message_created_idx", MIGRATION_V17)
4635 .add(18, "conversation_tail_state_hot_table", MIGRATION_V18)
4636 .add(19, "conversation_external_lookup", MIGRATION_V19)
4637 .add(20, "conversation_external_tail_lookup", MIGRATION_V20)
4638}
4639
4640fn schema_migration_is_applied(conn: &FrankenConnection, version: i64) -> Result<bool> {
4641 let rows = conn
4642 .query_with_params(
4643 "SELECT 1 FROM _schema_migrations WHERE version = ?1 LIMIT 1;",
4644 &[SqliteValue::from(version)],
4645 )
4646 .with_context(|| format!("checking schema migration version {version}"))?;
4647 Ok(!rows.is_empty())
4648}
4649
4650fn apply_conversation_tail_state_cache_migration(conn: &FrankenConnection) -> Result<bool> {
4651 conn.execute("BEGIN IMMEDIATE;")
4652 .with_context(|| "starting v15 conversation tail-state migration transaction")?;
4653
4654 let result = (|| -> Result<bool> {
4655 if schema_migration_is_applied(conn, 15)? {
4656 conn.execute("COMMIT;")
4657 .with_context(|| "committing already-applied v15 migration transaction")?;
4658 return Ok(false);
4659 }
4660
4661 let started = Instant::now();
4662 let conversation_columns = franken_table_column_names(conn, "conversations")
4663 .with_context(|| "inspecting conversations columns before v15 migration")?;
4664 if !conversation_columns.contains("last_message_idx") {
4665 conn.execute("ALTER TABLE conversations ADD COLUMN last_message_idx INTEGER;")
4666 .with_context(|| "adding v15 conversations.last_message_idx column")?;
4667 }
4668 if !conversation_columns.contains("last_message_created_at") {
4669 conn.execute("ALTER TABLE conversations ADD COLUMN last_message_created_at INTEGER;")
4670 .with_context(|| "adding v15 conversations.last_message_created_at column")?;
4671 }
4672 conn.execute_batch(MIGRATION_V15_TAIL_STATE_TABLE)
4673 .with_context(|| "applying v15 conversation tail-state table schema")?;
4674 conn.execute_compat(
4675 "INSERT INTO _schema_migrations (version, name) VALUES (?1, ?2);",
4676 fparams![15_i64, "conversation_tail_state_cache"],
4677 )
4678 .with_context(|| "recording v15 conversation tail-state migration")?;
4679 conn.execute("COMMIT;")
4680 .with_context(|| "committing v15 conversation tail-state migration")?;
4681 info!(
4682 elapsed_ms = started.elapsed().as_millis(),
4683 "applied v15 conversation tail-state cache migration"
4684 );
4685 Ok(true)
4686 })();
4687
4688 if result.is_err() {
4689 let _ = conn.execute("ROLLBACK;");
4690 }
4691
4692 result
4693}
4694
4695fn franken_table_column_names(
4696 conn: &FrankenConnection,
4697 table_name: &str,
4698) -> Result<HashSet<String>> {
4699 if !table_name
4700 .chars()
4701 .all(|c| c.is_ascii_alphanumeric() || c == '_')
4702 {
4703 return Err(anyhow!(
4704 "unsafe table name for PRAGMA table_info: {table_name}"
4705 ));
4706 }
4707
4708 conn.query_map_collect(
4709 &format!("PRAGMA table_info({table_name})"),
4710 fparams![],
4711 |row: &FrankenRow| row.get_typed::<String>(1),
4712 )
4713 .with_context(|| format!("reading PRAGMA table_info({table_name})"))
4714 .map(|columns| columns.into_iter().collect())
4715}
4716
4717const MIGRATION_FRESH_SCHEMA: &str = r"
4727-- Core tables (V1)
4728CREATE TABLE IF NOT EXISTS meta (
4729 key TEXT PRIMARY KEY,
4730 value TEXT NOT NULL
4731);
4732
4733CREATE TABLE IF NOT EXISTS agents (
4734 id INTEGER PRIMARY KEY,
4735 slug TEXT NOT NULL UNIQUE,
4736 name TEXT NOT NULL,
4737 version TEXT,
4738 kind TEXT NOT NULL,
4739 created_at INTEGER NOT NULL,
4740 updated_at INTEGER NOT NULL
4741);
4742
4743CREATE TABLE IF NOT EXISTS workspaces (
4744 id INTEGER PRIMARY KEY,
4745 path TEXT NOT NULL UNIQUE,
4746 display_name TEXT
4747);
4748
4749-- Sources (V4)
4750CREATE TABLE IF NOT EXISTS sources (
4751 id TEXT PRIMARY KEY,
4752 kind TEXT NOT NULL,
4753 host_label TEXT,
4754 machine_id TEXT,
4755 platform TEXT,
4756 config_json TEXT,
4757 created_at INTEGER NOT NULL,
4758 updated_at INTEGER NOT NULL
4759);
4760
4761INSERT OR IGNORE INTO sources (id, kind, host_label, created_at, updated_at)
4762VALUES ('local', 'local', NULL, strftime('%s','now')*1000, strftime('%s','now')*1000);
4763
4764-- Conversations: V1 base + V5 provenance + V7 metadata_bin + V10 token summary
4765CREATE TABLE IF NOT EXISTS conversations (
4766 id INTEGER PRIMARY KEY,
4767 agent_id INTEGER NOT NULL REFERENCES agents(id),
4768 workspace_id INTEGER REFERENCES workspaces(id),
4769 source_id TEXT NOT NULL DEFAULT 'local' REFERENCES sources(id),
4770 external_id TEXT,
4771 title TEXT,
4772 source_path TEXT NOT NULL,
4773 started_at INTEGER,
4774 ended_at INTEGER,
4775 approx_tokens INTEGER,
4776 metadata_json TEXT,
4777 origin_host TEXT,
4778 metadata_bin BLOB,
4779 total_input_tokens INTEGER,
4780 total_output_tokens INTEGER,
4781 total_cache_read_tokens INTEGER,
4782 total_cache_creation_tokens INTEGER,
4783 grand_total_tokens INTEGER,
4784 estimated_cost_usd REAL,
4785 primary_model TEXT,
4786 api_call_count INTEGER,
4787 tool_call_count INTEGER,
4788 user_message_count INTEGER,
4789 assistant_message_count INTEGER,
4790 -- V15 columns are included in the fresh schema so fresh DB creation does
4791 -- not need ALTER TABLE on conversations. That ALTER path can duplicate
4792 -- provenance autoindex state in frankensqlite when the named unique
4793 -- provenance index already exists.
4794 last_message_idx INTEGER,
4795 last_message_created_at INTEGER
4796);
4797
4798-- Named unique index avoids autoindex issues if table is ever recreated
4799CREATE UNIQUE INDEX IF NOT EXISTS idx_conversations_provenance
4800 ON conversations(source_id, agent_id, external_id);
4801
4802-- Messages: V1 base + V7 extra_bin
4803CREATE TABLE IF NOT EXISTS messages (
4804 id INTEGER PRIMARY KEY,
4805 conversation_id INTEGER NOT NULL REFERENCES conversations(id) ON DELETE CASCADE,
4806 idx INTEGER NOT NULL,
4807 role TEXT NOT NULL,
4808 author TEXT,
4809 created_at INTEGER,
4810 content TEXT NOT NULL,
4811 extra_json TEXT,
4812 extra_bin BLOB,
4813 UNIQUE(conversation_id, idx)
4814);
4815
4816CREATE TABLE IF NOT EXISTS snippets (
4817 id INTEGER PRIMARY KEY,
4818 message_id INTEGER NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
4819 file_path TEXT,
4820 start_line INTEGER,
4821 end_line INTEGER,
4822 language TEXT,
4823 snippet_text TEXT
4824);
4825
4826CREATE TABLE IF NOT EXISTS tags (
4827 id INTEGER PRIMARY KEY,
4828 name TEXT NOT NULL UNIQUE
4829);
4830
4831CREATE TABLE IF NOT EXISTS conversation_tags (
4832 conversation_id INTEGER NOT NULL REFERENCES conversations(id) ON DELETE CASCADE,
4833 tag_id INTEGER NOT NULL REFERENCES tags(id) ON DELETE CASCADE,
4834 PRIMARY KEY (conversation_id, tag_id)
4835);
4836
4837-- Daily stats (V8)
4838CREATE TABLE IF NOT EXISTS daily_stats (
4839 day_id INTEGER NOT NULL,
4840 agent_slug TEXT NOT NULL,
4841 source_id TEXT NOT NULL DEFAULT 'all',
4842 session_count INTEGER NOT NULL DEFAULT 0,
4843 message_count INTEGER NOT NULL DEFAULT 0,
4844 total_chars INTEGER NOT NULL DEFAULT 0,
4845 last_updated INTEGER NOT NULL,
4846 PRIMARY KEY (day_id, agent_slug, source_id)
4847);
4848
4849-- Embedding jobs (V9)
4850CREATE TABLE IF NOT EXISTS embedding_jobs (
4851 id INTEGER PRIMARY KEY AUTOINCREMENT,
4852 db_path TEXT NOT NULL,
4853 model_id TEXT NOT NULL,
4854 status TEXT NOT NULL DEFAULT 'pending',
4855 total_docs INTEGER NOT NULL DEFAULT 0,
4856 completed_docs INTEGER NOT NULL DEFAULT 0,
4857 error_message TEXT,
4858 created_at TEXT NOT NULL DEFAULT (datetime('now')),
4859 started_at TEXT,
4860 completed_at TEXT
4861);
4862
4863CREATE UNIQUE INDEX IF NOT EXISTS idx_embedding_jobs_active
4864ON embedding_jobs(db_path, model_id)
4865WHERE status IN ('pending', 'running');
4866
4867-- Token usage ledger (V10)
4868CREATE TABLE IF NOT EXISTS token_usage (
4869 id INTEGER PRIMARY KEY AUTOINCREMENT,
4870 message_id INTEGER NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
4871 conversation_id INTEGER NOT NULL,
4872 agent_id INTEGER NOT NULL,
4873 workspace_id INTEGER,
4874 source_id TEXT NOT NULL DEFAULT 'local',
4875 timestamp_ms INTEGER NOT NULL,
4876 day_id INTEGER NOT NULL,
4877 model_name TEXT,
4878 model_family TEXT,
4879 model_tier TEXT,
4880 service_tier TEXT,
4881 provider TEXT,
4882 input_tokens INTEGER,
4883 output_tokens INTEGER,
4884 cache_read_tokens INTEGER,
4885 cache_creation_tokens INTEGER,
4886 thinking_tokens INTEGER,
4887 total_tokens INTEGER,
4888 estimated_cost_usd REAL,
4889 role TEXT NOT NULL,
4890 content_chars INTEGER NOT NULL,
4891 has_tool_calls INTEGER NOT NULL DEFAULT 0,
4892 tool_call_count INTEGER NOT NULL DEFAULT 0,
4893 data_source TEXT NOT NULL DEFAULT 'api',
4894 UNIQUE(message_id)
4895);
4896
4897-- Token daily stats (V10)
4898CREATE TABLE IF NOT EXISTS token_daily_stats (
4899 day_id INTEGER NOT NULL,
4900 agent_slug TEXT NOT NULL,
4901 source_id TEXT NOT NULL DEFAULT 'all',
4902 model_family TEXT NOT NULL DEFAULT 'all',
4903 api_call_count INTEGER NOT NULL DEFAULT 0,
4904 user_message_count INTEGER NOT NULL DEFAULT 0,
4905 assistant_message_count INTEGER NOT NULL DEFAULT 0,
4906 tool_message_count INTEGER NOT NULL DEFAULT 0,
4907 total_input_tokens INTEGER NOT NULL DEFAULT 0,
4908 total_output_tokens INTEGER NOT NULL DEFAULT 0,
4909 total_cache_read_tokens INTEGER NOT NULL DEFAULT 0,
4910 total_cache_creation_tokens INTEGER NOT NULL DEFAULT 0,
4911 total_thinking_tokens INTEGER NOT NULL DEFAULT 0,
4912 grand_total_tokens INTEGER NOT NULL DEFAULT 0,
4913 total_content_chars INTEGER NOT NULL DEFAULT 0,
4914 total_tool_calls INTEGER NOT NULL DEFAULT 0,
4915 estimated_cost_usd REAL NOT NULL DEFAULT 0.0,
4916 session_count INTEGER NOT NULL DEFAULT 0,
4917 last_updated INTEGER NOT NULL,
4918 PRIMARY KEY (day_id, agent_slug, source_id, model_family)
4919);
4920
4921-- Model pricing (V10)
4922CREATE TABLE IF NOT EXISTS model_pricing (
4923 model_pattern TEXT NOT NULL,
4924 provider TEXT NOT NULL,
4925 input_cost_per_mtok REAL NOT NULL,
4926 output_cost_per_mtok REAL NOT NULL,
4927 cache_read_cost_per_mtok REAL,
4928 cache_creation_cost_per_mtok REAL,
4929 effective_date TEXT NOT NULL,
4930 PRIMARY KEY (model_pattern, effective_date)
4931);
4932
4933INSERT OR IGNORE INTO model_pricing VALUES
4934 ('claude-opus-4%', 'anthropic', 15.0, 75.0, 1.5, 18.75, '2025-10-01'),
4935 ('claude-sonnet-4%', 'anthropic', 3.0, 15.0, 0.3, 3.75, '2025-10-01'),
4936 ('claude-haiku-4%', 'anthropic', 0.80, 4.0, 0.08, 1.0, '2025-10-01'),
4937 ('gpt-4o%', 'openai', 2.50, 10.0, NULL, NULL, '2025-01-01'),
4938 ('gpt-4-turbo%', 'openai', 10.0, 30.0, NULL, NULL, '2024-04-01'),
4939 ('gpt-4.1%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
4940 ('o3%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
4941 ('o4-mini%', 'openai', 1.10, 4.40, NULL, NULL, '2025-04-01'),
4942 ('gemini-2%flash%', 'google', 0.075, 0.30, NULL, NULL, '2025-01-01'),
4943 ('gemini-2%pro%', 'google', 1.25, 10.0, NULL, NULL, '2025-01-01');
4944
4945-- Message metrics: V11 base + V12 model dimensions
4946CREATE TABLE IF NOT EXISTS message_metrics (
4947 message_id INTEGER PRIMARY KEY REFERENCES messages(id) ON DELETE CASCADE,
4948 created_at_ms INTEGER NOT NULL,
4949 hour_id INTEGER NOT NULL,
4950 day_id INTEGER NOT NULL,
4951 agent_slug TEXT NOT NULL,
4952 workspace_id INTEGER NOT NULL DEFAULT 0,
4953 source_id TEXT NOT NULL DEFAULT 'local',
4954 role TEXT NOT NULL,
4955 content_chars INTEGER NOT NULL,
4956 content_tokens_est INTEGER NOT NULL,
4957 api_input_tokens INTEGER,
4958 api_output_tokens INTEGER,
4959 api_cache_read_tokens INTEGER,
4960 api_cache_creation_tokens INTEGER,
4961 api_thinking_tokens INTEGER,
4962 api_service_tier TEXT,
4963 api_data_source TEXT NOT NULL DEFAULT 'estimated',
4964 tool_call_count INTEGER NOT NULL DEFAULT 0,
4965 has_tool_calls INTEGER NOT NULL DEFAULT 0,
4966 has_plan INTEGER NOT NULL DEFAULT 0,
4967 model_name TEXT,
4968 model_family TEXT NOT NULL DEFAULT 'unknown',
4969 model_tier TEXT NOT NULL DEFAULT 'unknown',
4970 provider TEXT NOT NULL DEFAULT 'unknown'
4971);
4972
4973-- Hourly rollups: V11 base + V13 plan columns
4974CREATE TABLE IF NOT EXISTS usage_hourly (
4975 hour_id INTEGER NOT NULL,
4976 agent_slug TEXT NOT NULL,
4977 workspace_id INTEGER NOT NULL DEFAULT 0,
4978 source_id TEXT NOT NULL DEFAULT 'local',
4979 message_count INTEGER NOT NULL DEFAULT 0,
4980 user_message_count INTEGER NOT NULL DEFAULT 0,
4981 assistant_message_count INTEGER NOT NULL DEFAULT 0,
4982 tool_call_count INTEGER NOT NULL DEFAULT 0,
4983 plan_message_count INTEGER NOT NULL DEFAULT 0,
4984 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
4985 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4986 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
4987 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
4988 api_tokens_total INTEGER NOT NULL DEFAULT 0,
4989 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
4990 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
4991 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
4992 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
4993 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
4994 last_updated INTEGER NOT NULL DEFAULT 0,
4995 plan_content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
4996 plan_api_tokens_total INTEGER NOT NULL DEFAULT 0,
4997 PRIMARY KEY (hour_id, agent_slug, workspace_id, source_id)
4998);
4999
5000-- Daily rollups: V11 base + V13 plan columns
5001CREATE TABLE IF NOT EXISTS usage_daily (
5002 day_id INTEGER NOT NULL,
5003 agent_slug TEXT NOT NULL,
5004 workspace_id INTEGER NOT NULL DEFAULT 0,
5005 source_id TEXT NOT NULL DEFAULT 'local',
5006 message_count INTEGER NOT NULL DEFAULT 0,
5007 user_message_count INTEGER NOT NULL DEFAULT 0,
5008 assistant_message_count INTEGER NOT NULL DEFAULT 0,
5009 tool_call_count INTEGER NOT NULL DEFAULT 0,
5010 plan_message_count INTEGER NOT NULL DEFAULT 0,
5011 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
5012 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
5013 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
5014 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
5015 api_tokens_total INTEGER NOT NULL DEFAULT 0,
5016 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
5017 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
5018 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
5019 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
5020 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
5021 last_updated INTEGER NOT NULL DEFAULT 0,
5022 plan_content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
5023 plan_api_tokens_total INTEGER NOT NULL DEFAULT 0,
5024 PRIMARY KEY (day_id, agent_slug, workspace_id, source_id)
5025);
5026
5027-- Model daily rollups (V12)
5028CREATE TABLE IF NOT EXISTS usage_models_daily (
5029 day_id INTEGER NOT NULL,
5030 agent_slug TEXT NOT NULL,
5031 workspace_id INTEGER NOT NULL DEFAULT 0,
5032 source_id TEXT NOT NULL DEFAULT 'local',
5033 model_family TEXT NOT NULL DEFAULT 'unknown',
5034 model_tier TEXT NOT NULL DEFAULT 'unknown',
5035 message_count INTEGER NOT NULL DEFAULT 0,
5036 user_message_count INTEGER NOT NULL DEFAULT 0,
5037 assistant_message_count INTEGER NOT NULL DEFAULT 0,
5038 tool_call_count INTEGER NOT NULL DEFAULT 0,
5039 plan_message_count INTEGER NOT NULL DEFAULT 0,
5040 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
5041 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
5042 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
5043 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
5044 api_tokens_total INTEGER NOT NULL DEFAULT 0,
5045 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
5046 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
5047 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
5048 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
5049 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
5050 last_updated INTEGER NOT NULL DEFAULT 0,
5051 PRIMARY KEY (day_id, agent_slug, workspace_id, source_id, model_family, model_tier)
5052);
5053
5054-- All indexes
5055CREATE INDEX IF NOT EXISTS idx_conversations_agent_started ON conversations(agent_id, started_at DESC);
5056CREATE INDEX IF NOT EXISTS idx_conversations_source_id ON conversations(source_id);
5057CREATE INDEX IF NOT EXISTS idx_conversations_source_path ON conversations(source_path);
5058CREATE INDEX IF NOT EXISTS idx_daily_stats_agent ON daily_stats(agent_slug, day_id);
5059CREATE INDEX IF NOT EXISTS idx_daily_stats_source ON daily_stats(source_id, day_id);
5060CREATE INDEX IF NOT EXISTS idx_token_usage_day ON token_usage(day_id, agent_id);
5061CREATE INDEX IF NOT EXISTS idx_token_usage_conv ON token_usage(conversation_id);
5062CREATE INDEX IF NOT EXISTS idx_token_usage_model ON token_usage(model_family, day_id);
5063CREATE INDEX IF NOT EXISTS idx_token_usage_workspace ON token_usage(workspace_id, day_id);
5064CREATE INDEX IF NOT EXISTS idx_token_usage_timestamp ON token_usage(timestamp_ms);
5065CREATE INDEX IF NOT EXISTS idx_token_daily_stats_agent ON token_daily_stats(agent_slug, day_id);
5066CREATE INDEX IF NOT EXISTS idx_token_daily_stats_model ON token_daily_stats(model_family, day_id);
5067CREATE INDEX IF NOT EXISTS idx_mm_hour ON message_metrics(hour_id);
5068CREATE INDEX IF NOT EXISTS idx_mm_day ON message_metrics(day_id);
5069CREATE INDEX IF NOT EXISTS idx_mm_agent_hour ON message_metrics(agent_slug, hour_id);
5070CREATE INDEX IF NOT EXISTS idx_mm_agent_day ON message_metrics(agent_slug, day_id);
5071CREATE INDEX IF NOT EXISTS idx_mm_workspace_hour ON message_metrics(workspace_id, hour_id);
5072CREATE INDEX IF NOT EXISTS idx_mm_source_hour ON message_metrics(source_id, hour_id);
5073CREATE INDEX IF NOT EXISTS idx_mm_model_family_day ON message_metrics(model_family, day_id);
5074CREATE INDEX IF NOT EXISTS idx_mm_provider_day ON message_metrics(provider, day_id);
5075CREATE INDEX IF NOT EXISTS idx_uh_agent ON usage_hourly(agent_slug, hour_id);
5076CREATE INDEX IF NOT EXISTS idx_uh_workspace ON usage_hourly(workspace_id, hour_id);
5077CREATE INDEX IF NOT EXISTS idx_uh_source ON usage_hourly(source_id, hour_id);
5078CREATE INDEX IF NOT EXISTS idx_ud_agent ON usage_daily(agent_slug, day_id);
5079CREATE INDEX IF NOT EXISTS idx_ud_workspace ON usage_daily(workspace_id, day_id);
5080CREATE INDEX IF NOT EXISTS idx_ud_source ON usage_daily(source_id, day_id);
5081CREATE INDEX IF NOT EXISTS idx_umd_model_day ON usage_models_daily(model_family, day_id);
5082CREATE INDEX IF NOT EXISTS idx_umd_agent_day ON usage_models_daily(agent_slug, day_id);
5083CREATE INDEX IF NOT EXISTS idx_umd_workspace_day ON usage_models_daily(workspace_id, day_id);
5084CREATE INDEX IF NOT EXISTS idx_umd_source_day ON usage_models_daily(source_id, day_id);
5085";
5086
5087#[derive(Clone, Copy)]
5088struct SchemaRepairBatch {
5089 name: &'static str,
5090 tables: &'static [&'static str],
5091 sql: &'static str,
5092}
5093
5094const CURRENT_SCHEMA_REPAIR_SOURCES_SQL: &str = r"
5095CREATE TABLE IF NOT EXISTS sources (
5096 id TEXT PRIMARY KEY,
5097 kind TEXT NOT NULL,
5098 host_label TEXT,
5099 machine_id TEXT,
5100 platform TEXT,
5101 config_json TEXT,
5102 created_at INTEGER NOT NULL,
5103 updated_at INTEGER NOT NULL
5104);
5105
5106INSERT OR IGNORE INTO sources (id, kind, host_label, created_at, updated_at)
5107VALUES ('local', 'local', NULL, strftime('%s','now')*1000, strftime('%s','now')*1000);
5108";
5109
5110const CURRENT_SCHEMA_REPAIR_DAILY_STATS_SQL: &str = r"
5111CREATE TABLE IF NOT EXISTS daily_stats (
5112 day_id INTEGER NOT NULL,
5113 agent_slug TEXT NOT NULL,
5114 source_id TEXT NOT NULL DEFAULT 'all',
5115 session_count INTEGER NOT NULL DEFAULT 0,
5116 message_count INTEGER NOT NULL DEFAULT 0,
5117 total_chars INTEGER NOT NULL DEFAULT 0,
5118 last_updated INTEGER NOT NULL,
5119 PRIMARY KEY (day_id, agent_slug, source_id)
5120);
5121
5122CREATE INDEX IF NOT EXISTS idx_daily_stats_agent ON daily_stats(agent_slug, day_id);
5123CREATE INDEX IF NOT EXISTS idx_daily_stats_source ON daily_stats(source_id, day_id);
5124";
5125
5126const CURRENT_SCHEMA_REPAIR_CONVERSATION_EXTERNAL_LOOKUP_SQL: &str = r"
5127CREATE TABLE IF NOT EXISTS conversation_external_lookup (
5128 lookup_key TEXT PRIMARY KEY,
5129 conversation_id INTEGER NOT NULL
5130);
5131
5132INSERT OR REPLACE INTO conversation_external_lookup (lookup_key, conversation_id)
5133SELECT
5134 CAST(length(source_id) AS TEXT) || ':' || source_id || ':' ||
5135 CAST(agent_id AS TEXT) || ':' ||
5136 CAST(length(external_id) AS TEXT) || ':' || external_id,
5137 id
5138FROM conversations
5139WHERE external_id IS NOT NULL;
5140";
5141
5142const CURRENT_SCHEMA_REPAIR_CONVERSATION_EXTERNAL_TAIL_LOOKUP_SQL: &str = r"
5143CREATE TABLE IF NOT EXISTS conversation_tail_state (
5144 conversation_id INTEGER PRIMARY KEY,
5145 ended_at INTEGER,
5146 last_message_idx INTEGER,
5147 last_message_created_at INTEGER
5148);
5149
5150CREATE TABLE IF NOT EXISTS conversation_external_tail_lookup (
5151 lookup_key TEXT PRIMARY KEY,
5152 conversation_id INTEGER NOT NULL,
5153 ended_at INTEGER,
5154 last_message_idx INTEGER,
5155 last_message_created_at INTEGER
5156);
5157
5158INSERT OR REPLACE INTO conversation_external_tail_lookup (
5159 lookup_key,
5160 conversation_id,
5161 ended_at,
5162 last_message_idx,
5163 last_message_created_at
5164)
5165SELECT
5166 CAST(length(c.source_id) AS TEXT) || ':' || c.source_id || ':' ||
5167 CAST(c.agent_id AS TEXT) || ':' ||
5168 CAST(length(c.external_id) AS TEXT) || ':' || c.external_id,
5169 c.id,
5170 ts.ended_at,
5171 ts.last_message_idx,
5172 ts.last_message_created_at
5173FROM conversations c
5174LEFT JOIN conversation_tail_state ts ON ts.conversation_id = c.id
5175WHERE c.external_id IS NOT NULL;
5176";
5177
5178const CURRENT_SCHEMA_REPAIR_EMBEDDING_JOBS_SQL: &str = r"
5179CREATE TABLE IF NOT EXISTS embedding_jobs (
5180 id INTEGER PRIMARY KEY AUTOINCREMENT,
5181 db_path TEXT NOT NULL,
5182 model_id TEXT NOT NULL,
5183 status TEXT NOT NULL DEFAULT 'pending',
5184 total_docs INTEGER NOT NULL DEFAULT 0,
5185 completed_docs INTEGER NOT NULL DEFAULT 0,
5186 error_message TEXT,
5187 created_at TEXT NOT NULL DEFAULT (datetime('now')),
5188 started_at TEXT,
5189 completed_at TEXT
5190);
5191
5192CREATE UNIQUE INDEX IF NOT EXISTS idx_embedding_jobs_active
5193ON embedding_jobs(db_path, model_id)
5194WHERE status IN ('pending', 'running');
5195";
5196
5197const CURRENT_SCHEMA_REPAIR_TOKEN_ANALYTICS_SQL: &str = r"
5198CREATE TABLE IF NOT EXISTS token_usage (
5199 id INTEGER PRIMARY KEY AUTOINCREMENT,
5200 message_id INTEGER NOT NULL REFERENCES messages(id) ON DELETE CASCADE,
5201 conversation_id INTEGER NOT NULL,
5202 agent_id INTEGER NOT NULL,
5203 workspace_id INTEGER,
5204 source_id TEXT NOT NULL DEFAULT 'local',
5205 timestamp_ms INTEGER NOT NULL,
5206 day_id INTEGER NOT NULL,
5207 model_name TEXT,
5208 model_family TEXT,
5209 model_tier TEXT,
5210 service_tier TEXT,
5211 provider TEXT,
5212 input_tokens INTEGER,
5213 output_tokens INTEGER,
5214 cache_read_tokens INTEGER,
5215 cache_creation_tokens INTEGER,
5216 thinking_tokens INTEGER,
5217 total_tokens INTEGER,
5218 estimated_cost_usd REAL,
5219 role TEXT NOT NULL,
5220 content_chars INTEGER NOT NULL,
5221 has_tool_calls INTEGER NOT NULL DEFAULT 0,
5222 tool_call_count INTEGER NOT NULL DEFAULT 0,
5223 data_source TEXT NOT NULL DEFAULT 'api',
5224 UNIQUE(message_id)
5225);
5226
5227CREATE INDEX IF NOT EXISTS idx_token_usage_day ON token_usage(day_id, agent_id);
5228CREATE INDEX IF NOT EXISTS idx_token_usage_conv ON token_usage(conversation_id);
5229CREATE INDEX IF NOT EXISTS idx_token_usage_model ON token_usage(model_family, day_id);
5230CREATE INDEX IF NOT EXISTS idx_token_usage_workspace ON token_usage(workspace_id, day_id);
5231CREATE INDEX IF NOT EXISTS idx_token_usage_timestamp ON token_usage(timestamp_ms);
5232
5233CREATE TABLE IF NOT EXISTS token_daily_stats (
5234 day_id INTEGER NOT NULL,
5235 agent_slug TEXT NOT NULL,
5236 source_id TEXT NOT NULL DEFAULT 'all',
5237 model_family TEXT NOT NULL DEFAULT 'all',
5238 api_call_count INTEGER NOT NULL DEFAULT 0,
5239 user_message_count INTEGER NOT NULL DEFAULT 0,
5240 assistant_message_count INTEGER NOT NULL DEFAULT 0,
5241 tool_message_count INTEGER NOT NULL DEFAULT 0,
5242 total_input_tokens INTEGER NOT NULL DEFAULT 0,
5243 total_output_tokens INTEGER NOT NULL DEFAULT 0,
5244 total_cache_read_tokens INTEGER NOT NULL DEFAULT 0,
5245 total_cache_creation_tokens INTEGER NOT NULL DEFAULT 0,
5246 total_thinking_tokens INTEGER NOT NULL DEFAULT 0,
5247 grand_total_tokens INTEGER NOT NULL DEFAULT 0,
5248 total_content_chars INTEGER NOT NULL DEFAULT 0,
5249 total_tool_calls INTEGER NOT NULL DEFAULT 0,
5250 estimated_cost_usd REAL NOT NULL DEFAULT 0.0,
5251 session_count INTEGER NOT NULL DEFAULT 0,
5252 last_updated INTEGER NOT NULL,
5253 PRIMARY KEY (day_id, agent_slug, source_id, model_family)
5254);
5255
5256CREATE INDEX IF NOT EXISTS idx_token_daily_stats_agent ON token_daily_stats(agent_slug, day_id);
5257CREATE INDEX IF NOT EXISTS idx_token_daily_stats_model ON token_daily_stats(model_family, day_id);
5258
5259CREATE TABLE IF NOT EXISTS model_pricing (
5260 model_pattern TEXT NOT NULL,
5261 provider TEXT NOT NULL,
5262 input_cost_per_mtok REAL NOT NULL,
5263 output_cost_per_mtok REAL NOT NULL,
5264 cache_read_cost_per_mtok REAL,
5265 cache_creation_cost_per_mtok REAL,
5266 effective_date TEXT NOT NULL,
5267 PRIMARY KEY (model_pattern, effective_date)
5268);
5269
5270INSERT OR IGNORE INTO model_pricing VALUES
5271 ('claude-opus-4%', 'anthropic', 15.0, 75.0, 1.5, 18.75, '2025-10-01'),
5272 ('claude-sonnet-4%', 'anthropic', 3.0, 15.0, 0.3, 3.75, '2025-10-01'),
5273 ('claude-haiku-4%', 'anthropic', 0.80, 4.0, 0.08, 1.0, '2025-10-01'),
5274 ('gpt-4o%', 'openai', 2.50, 10.0, NULL, NULL, '2025-01-01'),
5275 ('gpt-4-turbo%', 'openai', 10.0, 30.0, NULL, NULL, '2024-04-01'),
5276 ('gpt-4.1%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
5277 ('o3%', 'openai', 2.0, 8.0, NULL, NULL, '2025-04-01'),
5278 ('o4-mini%', 'openai', 1.10, 4.40, NULL, NULL, '2025-04-01'),
5279 ('gemini-2%flash%', 'google', 0.075, 0.30, NULL, NULL, '2025-01-01'),
5280 ('gemini-2%pro%', 'google', 1.25, 10.0, NULL, NULL, '2025-01-01');
5281";
5282
5283const CURRENT_SCHEMA_REPAIR_MESSAGE_METRICS_SQL: &str = r"
5284CREATE TABLE IF NOT EXISTS message_metrics (
5285 message_id INTEGER PRIMARY KEY REFERENCES messages(id) ON DELETE CASCADE,
5286 created_at_ms INTEGER NOT NULL,
5287 hour_id INTEGER NOT NULL,
5288 day_id INTEGER NOT NULL,
5289 agent_slug TEXT NOT NULL,
5290 workspace_id INTEGER NOT NULL DEFAULT 0,
5291 source_id TEXT NOT NULL DEFAULT 'local',
5292 role TEXT NOT NULL,
5293 content_chars INTEGER NOT NULL,
5294 content_tokens_est INTEGER NOT NULL,
5295 api_input_tokens INTEGER,
5296 api_output_tokens INTEGER,
5297 api_cache_read_tokens INTEGER,
5298 api_cache_creation_tokens INTEGER,
5299 api_thinking_tokens INTEGER,
5300 api_service_tier TEXT,
5301 api_data_source TEXT NOT NULL DEFAULT 'estimated',
5302 tool_call_count INTEGER NOT NULL DEFAULT 0,
5303 has_tool_calls INTEGER NOT NULL DEFAULT 0,
5304 has_plan INTEGER NOT NULL DEFAULT 0,
5305 model_name TEXT,
5306 model_family TEXT NOT NULL DEFAULT 'unknown',
5307 model_tier TEXT NOT NULL DEFAULT 'unknown',
5308 provider TEXT NOT NULL DEFAULT 'unknown'
5309);
5310
5311CREATE TABLE IF NOT EXISTS usage_hourly (
5312 hour_id INTEGER NOT NULL,
5313 agent_slug TEXT NOT NULL,
5314 workspace_id INTEGER NOT NULL DEFAULT 0,
5315 source_id TEXT NOT NULL DEFAULT 'local',
5316 message_count INTEGER NOT NULL DEFAULT 0,
5317 user_message_count INTEGER NOT NULL DEFAULT 0,
5318 assistant_message_count INTEGER NOT NULL DEFAULT 0,
5319 tool_call_count INTEGER NOT NULL DEFAULT 0,
5320 plan_message_count INTEGER NOT NULL DEFAULT 0,
5321 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
5322 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
5323 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
5324 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
5325 api_tokens_total INTEGER NOT NULL DEFAULT 0,
5326 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
5327 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
5328 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
5329 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
5330 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
5331 last_updated INTEGER NOT NULL DEFAULT 0,
5332 plan_content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
5333 plan_api_tokens_total INTEGER NOT NULL DEFAULT 0,
5334 PRIMARY KEY (hour_id, agent_slug, workspace_id, source_id)
5335);
5336
5337CREATE TABLE IF NOT EXISTS usage_daily (
5338 day_id INTEGER NOT NULL,
5339 agent_slug TEXT NOT NULL,
5340 workspace_id INTEGER NOT NULL DEFAULT 0,
5341 source_id TEXT NOT NULL DEFAULT 'local',
5342 message_count INTEGER NOT NULL DEFAULT 0,
5343 user_message_count INTEGER NOT NULL DEFAULT 0,
5344 assistant_message_count INTEGER NOT NULL DEFAULT 0,
5345 tool_call_count INTEGER NOT NULL DEFAULT 0,
5346 plan_message_count INTEGER NOT NULL DEFAULT 0,
5347 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
5348 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
5349 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
5350 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
5351 api_tokens_total INTEGER NOT NULL DEFAULT 0,
5352 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
5353 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
5354 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
5355 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
5356 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
5357 last_updated INTEGER NOT NULL DEFAULT 0,
5358 plan_content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
5359 plan_api_tokens_total INTEGER NOT NULL DEFAULT 0,
5360 PRIMARY KEY (day_id, agent_slug, workspace_id, source_id)
5361);
5362
5363CREATE TABLE IF NOT EXISTS usage_models_daily (
5364 day_id INTEGER NOT NULL,
5365 agent_slug TEXT NOT NULL,
5366 workspace_id INTEGER NOT NULL DEFAULT 0,
5367 source_id TEXT NOT NULL DEFAULT 'local',
5368 model_family TEXT NOT NULL DEFAULT 'unknown',
5369 model_tier TEXT NOT NULL DEFAULT 'unknown',
5370 message_count INTEGER NOT NULL DEFAULT 0,
5371 user_message_count INTEGER NOT NULL DEFAULT 0,
5372 assistant_message_count INTEGER NOT NULL DEFAULT 0,
5373 tool_call_count INTEGER NOT NULL DEFAULT 0,
5374 plan_message_count INTEGER NOT NULL DEFAULT 0,
5375 api_coverage_message_count INTEGER NOT NULL DEFAULT 0,
5376 content_tokens_est_total INTEGER NOT NULL DEFAULT 0,
5377 content_tokens_est_user INTEGER NOT NULL DEFAULT 0,
5378 content_tokens_est_assistant INTEGER NOT NULL DEFAULT 0,
5379 api_tokens_total INTEGER NOT NULL DEFAULT 0,
5380 api_input_tokens_total INTEGER NOT NULL DEFAULT 0,
5381 api_output_tokens_total INTEGER NOT NULL DEFAULT 0,
5382 api_cache_read_tokens_total INTEGER NOT NULL DEFAULT 0,
5383 api_cache_creation_tokens_total INTEGER NOT NULL DEFAULT 0,
5384 api_thinking_tokens_total INTEGER NOT NULL DEFAULT 0,
5385 last_updated INTEGER NOT NULL DEFAULT 0,
5386 PRIMARY KEY (day_id, agent_slug, workspace_id, source_id, model_family, model_tier)
5387);
5388
5389CREATE INDEX IF NOT EXISTS idx_mm_hour ON message_metrics(hour_id);
5390CREATE INDEX IF NOT EXISTS idx_mm_day ON message_metrics(day_id);
5391CREATE INDEX IF NOT EXISTS idx_mm_agent_hour ON message_metrics(agent_slug, hour_id);
5392CREATE INDEX IF NOT EXISTS idx_mm_agent_day ON message_metrics(agent_slug, day_id);
5393CREATE INDEX IF NOT EXISTS idx_mm_workspace_hour ON message_metrics(workspace_id, hour_id);
5394CREATE INDEX IF NOT EXISTS idx_mm_source_hour ON message_metrics(source_id, hour_id);
5395CREATE INDEX IF NOT EXISTS idx_mm_model_family_day ON message_metrics(model_family, day_id);
5396CREATE INDEX IF NOT EXISTS idx_mm_provider_day ON message_metrics(provider, day_id);
5397CREATE INDEX IF NOT EXISTS idx_uh_agent ON usage_hourly(agent_slug, hour_id);
5398CREATE INDEX IF NOT EXISTS idx_uh_workspace ON usage_hourly(workspace_id, hour_id);
5399CREATE INDEX IF NOT EXISTS idx_uh_source ON usage_hourly(source_id, hour_id);
5400CREATE INDEX IF NOT EXISTS idx_ud_agent ON usage_daily(agent_slug, day_id);
5401CREATE INDEX IF NOT EXISTS idx_ud_workspace ON usage_daily(workspace_id, day_id);
5402CREATE INDEX IF NOT EXISTS idx_ud_source ON usage_daily(source_id, day_id);
5403CREATE INDEX IF NOT EXISTS idx_umd_model_day ON usage_models_daily(model_family, day_id);
5404CREATE INDEX IF NOT EXISTS idx_umd_agent_day ON usage_models_daily(agent_slug, day_id);
5405CREATE INDEX IF NOT EXISTS idx_umd_workspace_day ON usage_models_daily(workspace_id, day_id);
5406CREATE INDEX IF NOT EXISTS idx_umd_source_day ON usage_models_daily(source_id, day_id);
5407";
5408
5409const CURRENT_SCHEMA_REPAIR_BATCHES: &[SchemaRepairBatch] = &[
5410 SchemaRepairBatch {
5411 name: "sources",
5412 tables: &["sources"],
5413 sql: CURRENT_SCHEMA_REPAIR_SOURCES_SQL,
5414 },
5415 SchemaRepairBatch {
5416 name: "daily_stats",
5417 tables: &["daily_stats"],
5418 sql: CURRENT_SCHEMA_REPAIR_DAILY_STATS_SQL,
5419 },
5420 SchemaRepairBatch {
5421 name: "conversation_external_lookup",
5422 tables: &["conversation_external_lookup"],
5423 sql: CURRENT_SCHEMA_REPAIR_CONVERSATION_EXTERNAL_LOOKUP_SQL,
5424 },
5425 SchemaRepairBatch {
5426 name: "conversation_external_tail_lookup",
5427 tables: &[
5428 "conversation_tail_state",
5429 "conversation_external_tail_lookup",
5430 ],
5431 sql: CURRENT_SCHEMA_REPAIR_CONVERSATION_EXTERNAL_TAIL_LOOKUP_SQL,
5432 },
5433 SchemaRepairBatch {
5434 name: "embedding_jobs",
5435 tables: &["embedding_jobs"],
5436 sql: CURRENT_SCHEMA_REPAIR_EMBEDDING_JOBS_SQL,
5437 },
5438 SchemaRepairBatch {
5439 name: "token_analytics",
5440 tables: &["token_usage", "token_daily_stats", "model_pricing"],
5441 sql: CURRENT_SCHEMA_REPAIR_TOKEN_ANALYTICS_SQL,
5442 },
5443 SchemaRepairBatch {
5444 name: "message_rollups",
5445 tables: &[
5446 "message_metrics",
5447 "usage_hourly",
5448 "usage_daily",
5449 "usage_models_daily",
5450 ],
5451 sql: CURRENT_SCHEMA_REPAIR_MESSAGE_METRICS_SQL,
5452 },
5453];
5454
5455fn current_schema_repair_batches_for_missing_tables(
5456 missing_tables: &[&'static str],
5457) -> Result<Vec<&'static SchemaRepairBatch>> {
5458 let missing_set: HashSet<&'static str> = missing_tables.iter().copied().collect();
5459 let mut selected_batches = Vec::new();
5460 let mut covered_tables = HashSet::new();
5461
5462 for batch in CURRENT_SCHEMA_REPAIR_BATCHES {
5463 if !batch
5464 .tables
5465 .iter()
5466 .any(|table_name| missing_set.contains(table_name))
5467 {
5468 continue;
5469 }
5470 selected_batches.push(batch);
5471 covered_tables.extend(batch.tables.iter().copied());
5472 }
5473
5474 for &table_name in missing_tables {
5475 if !covered_tables.contains(table_name) {
5476 return Err(anyhow!(
5477 "no current-schema repair batch registered for missing table {table_name}"
5478 ));
5479 }
5480 }
5481
5482 Ok(selected_batches)
5483}
5484
5485const MIGRATION_NAMES: [(i64, &str); 20] = [
5487 (1, "core_tables"),
5488 (2, "fts_messages"),
5489 (3, "fts_messages_rebuild"),
5490 (4, "sources"),
5491 (5, "provenance_columns"),
5492 (6, "source_path_index"),
5493 (7, "msgpack_columns"),
5494 (8, "daily_stats"),
5495 (9, "embedding_jobs"),
5496 (10, "token_analytics"),
5497 (11, "message_metrics"),
5498 (12, "model_dimensions"),
5499 (13, "plan_token_rollups"),
5500 (14, "fts_contentless"),
5501 (15, "conversation_tail_state_cache"),
5502 (16, "drop_redundant_message_conv_idx"),
5503 (17, "drop_message_created_idx"),
5504 (18, "conversation_tail_state_hot_table"),
5505 (19, "conversation_external_lookup"),
5506 (20, "conversation_external_tail_lookup"),
5507];
5508
5509fn transition_from_meta_version(conn: &FrankenConnection) -> Result<()> {
5528 if conn
5532 .query("SELECT version FROM \"_schema_migrations\";")
5533 .is_ok()
5534 {
5535 return Ok(());
5536 }
5537
5538 if conn.query("SELECT key FROM meta;").is_err() {
5540 return Ok(());
5542 }
5543
5544 let rows = conn
5546 .query("SELECT value FROM meta WHERE key = 'schema_version';")
5547 .with_context(|| "reading schema_version from meta")?;
5548
5549 let current_version: i64 = rows
5550 .first()
5551 .and_then(|row| row.get_typed::<String>(0).ok())
5552 .and_then(|s| s.parse().ok())
5553 .unwrap_or(0);
5554
5555 if current_version == 0 {
5556 if conn.query("SELECT id FROM conversations LIMIT 1;").is_err() {
5558 return Ok(());
5560 }
5561
5562 info!("meta.schema_version=0 but tables exist; skipping transition (corrupted state)");
5565 return Ok(());
5566 }
5567
5568 info!(
5570 current_version,
5571 "transitioning schema tracking from meta table to _schema_migrations"
5572 );
5573
5574 conn.execute(
5575 "CREATE TABLE IF NOT EXISTS _schema_migrations (\
5576 version INTEGER PRIMARY KEY, \
5577 name TEXT NOT NULL, \
5578 applied_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now'))\
5579 );",
5580 )
5581 .with_context(|| "creating _schema_migrations table for transition")?;
5582
5583 let backfill_through_version = if (10..13).contains(¤t_version) {
5584 13
5585 } else {
5586 current_version
5587 };
5588
5589 for &(version, name) in &MIGRATION_NAMES {
5590 if version > backfill_through_version {
5591 break;
5592 }
5593 conn.execute_compat(
5594 "INSERT INTO _schema_migrations (version, name) VALUES (?1, ?2);",
5595 &[ParamValue::from(version), ParamValue::from(name)],
5596 )
5597 .with_context(|| format!("backfilling _schema_migrations version {version}"))?;
5598 }
5599
5600 info!(
5601 current_version,
5602 backfill_through_version,
5603 "schema version transition complete: backfilled legacy meta schema versions"
5604 );
5605
5606 Ok(())
5607}
5608
5609const REQUIRED_CURRENT_SCHEMA_TABLE_PROBES: &[(&str, &str)] = &[
5610 ("sources", "SELECT id FROM sources LIMIT 1;"),
5611 ("daily_stats", "SELECT day_id FROM daily_stats LIMIT 1;"),
5612 (
5613 "conversation_external_lookup",
5614 "SELECT lookup_key FROM conversation_external_lookup LIMIT 1;",
5615 ),
5616 (
5617 "conversation_tail_state",
5618 "SELECT conversation_id FROM conversation_tail_state LIMIT 1;",
5619 ),
5620 (
5621 "conversation_external_tail_lookup",
5622 "SELECT lookup_key, last_message_idx FROM conversation_external_tail_lookup LIMIT 1;",
5623 ),
5624 ("embedding_jobs", "SELECT id FROM embedding_jobs LIMIT 1;"),
5625 ("token_usage", "SELECT id FROM token_usage LIMIT 1;"),
5626 (
5627 "token_daily_stats",
5628 "SELECT day_id FROM token_daily_stats LIMIT 1;",
5629 ),
5630 (
5631 "model_pricing",
5632 "SELECT model_pattern FROM model_pricing LIMIT 1;",
5633 ),
5634 (
5635 "message_metrics",
5636 "SELECT message_id FROM message_metrics LIMIT 1;",
5637 ),
5638 ("usage_hourly", "SELECT hour_id FROM usage_hourly LIMIT 1;"),
5639 ("usage_daily", "SELECT day_id FROM usage_daily LIMIT 1;"),
5640 (
5641 "usage_models_daily",
5642 "SELECT day_id FROM usage_models_daily LIMIT 1;",
5643 ),
5644];
5645
5646const REQUIRED_CONVERSATION_TOKEN_COLUMNS: &[(&str, &str)] = &[
5647 ("total_input_tokens", "INTEGER"),
5648 ("total_output_tokens", "INTEGER"),
5649 ("total_cache_read_tokens", "INTEGER"),
5650 ("total_cache_creation_tokens", "INTEGER"),
5651 ("grand_total_tokens", "INTEGER"),
5652 ("estimated_cost_usd", "REAL"),
5653 ("primary_model", "TEXT"),
5654 ("api_call_count", "INTEGER"),
5655 ("tool_call_count", "INTEGER"),
5656 ("user_message_count", "INTEGER"),
5657 ("assistant_message_count", "INTEGER"),
5658];
5659
5660fn error_indicates_missing_table(err: &impl std::fmt::Display) -> bool {
5661 err.to_string()
5662 .to_ascii_lowercase()
5663 .contains("no such table")
5664}
5665
5666fn error_indicates_missing_column(err: &impl std::fmt::Display) -> bool {
5667 err.to_string()
5668 .to_ascii_lowercase()
5669 .contains("no such column")
5670}
5671
5672const ORPHAN_FK_ID_CHUNK_SIZE: usize = 256;
5673
5674fn collect_orphan_message_ids(conn: &FrankenConnection) -> Result<Vec<i64>> {
5675 let min_conversation_id = conn
5676 .query_map_collect(
5677 "SELECT conversation_id
5678 FROM messages
5679 ORDER BY conversation_id ASC
5680 LIMIT 1",
5681 fparams![],
5682 |row| row.get_typed(0),
5683 )
5684 .context("finding minimum message conversation id for orphan FK cleanup")?
5685 .into_iter()
5686 .next();
5687 let Some(min_conversation_id) = min_conversation_id else {
5688 return Ok(Vec::new());
5689 };
5690 let max_conversation_id: i64 = conn
5691 .query_row_map(
5692 "SELECT conversation_id
5693 FROM messages
5694 ORDER BY conversation_id DESC
5695 LIMIT 1",
5696 fparams![],
5697 |row| row.get_typed(0),
5698 )
5699 .context("finding maximum message conversation id for orphan FK cleanup")?;
5700
5701 let parent_conversation_ids: Vec<i64> = conn
5702 .query_map_collect(
5703 "SELECT id
5704 FROM conversations
5705 WHERE id BETWEEN ?1 AND ?2
5706 ORDER BY id",
5707 fparams![min_conversation_id, max_conversation_id],
5708 |row| row.get_typed(0),
5709 )
5710 .context("listing parent conversation ids for orphan FK cleanup")?;
5711
5712 let mut message_ids = Vec::new();
5713 let mut gap_start = min_conversation_id;
5714 for parent_id in parent_conversation_ids {
5715 if parent_id < gap_start {
5716 continue;
5717 }
5718 if parent_id > max_conversation_id {
5719 break;
5720 }
5721 if gap_start < parent_id {
5722 collect_message_ids_for_conversation_gap(
5723 conn,
5724 gap_start,
5725 parent_id.saturating_sub(1),
5726 &mut message_ids,
5727 )?;
5728 }
5729 if parent_id == i64::MAX {
5730 return Ok(message_ids);
5731 }
5732 gap_start = parent_id + 1;
5733 }
5734 if gap_start <= max_conversation_id {
5735 collect_message_ids_for_conversation_gap(
5736 conn,
5737 gap_start,
5738 max_conversation_id,
5739 &mut message_ids,
5740 )?;
5741 }
5742
5743 Ok(message_ids)
5744}
5745
5746fn collect_message_ids_for_conversation_gap(
5747 conn: &FrankenConnection,
5748 gap_start: i64,
5749 gap_end: i64,
5750 message_ids: &mut Vec<i64>,
5751) -> Result<()> {
5752 let (sql, params) = if gap_start == gap_end {
5753 (
5754 "SELECT id FROM messages WHERE conversation_id = ?1",
5755 vec![SqliteValue::from(gap_start)],
5756 )
5757 } else {
5758 (
5759 "SELECT id FROM messages WHERE conversation_id BETWEEN ?1 AND ?2",
5760 vec![SqliteValue::from(gap_start), SqliteValue::from(gap_end)],
5761 )
5762 };
5763 let rows = conn.query_with_params(sql, ¶ms).with_context(|| {
5764 format!("listing orphan message ids for conversation-id gap {gap_start}..={gap_end}")
5765 })?;
5766 message_ids.reserve(rows.len());
5767 for row in rows {
5768 message_ids.push(row.get_typed(0)?);
5769 }
5770 Ok(())
5771}
5772
5773fn delete_rows_by_i64_chunks(
5774 tx: &FrankenTransaction<'_>,
5775 delete_many_sql_prefix: &'static str,
5776 ids: &[i64],
5777) -> Result<usize> {
5778 if ids.is_empty() {
5779 return Ok(0);
5780 }
5781
5782 let full_chunk_sql = delete_rows_by_i64_sql(delete_many_sql_prefix, ORPHAN_FK_ID_CHUNK_SIZE);
5783 let tail_len = ids.len() % ORPHAN_FK_ID_CHUNK_SIZE;
5784 let tail_sql =
5785 (tail_len != 0).then(|| delete_rows_by_i64_sql(delete_many_sql_prefix, tail_len));
5786
5787 let mut deleted = 0;
5788 for chunk in ids.chunks(ORPHAN_FK_ID_CHUNK_SIZE) {
5789 let sql = if chunk.len() == ORPHAN_FK_ID_CHUNK_SIZE {
5790 &full_chunk_sql
5791 } else {
5792 tail_sql.as_ref().unwrap_or(&full_chunk_sql)
5793 };
5794 let params = chunk
5795 .iter()
5796 .map(|id| SqliteValue::from(*id))
5797 .collect::<Vec<_>>();
5798 deleted += tx.execute_with_params(sql, ¶ms)?;
5799 }
5800 Ok(deleted)
5801}
5802
5803fn delete_rows_by_i64_sql(delete_many_sql_prefix: &'static str, count: usize) -> String {
5804 let placeholders = sql_placeholders(count);
5805 format!("{delete_many_sql_prefix} ({placeholders})")
5806}
5807
5808fn sql_placeholders(count: usize) -> String {
5809 vec!["?"; count].join(", ")
5810}
5811
5812fn delete_orphan_message_ids_bisecting_oom(conn: &FrankenConnection, ids: &[i64]) -> Result<usize> {
5813 let mut deleted = 0usize;
5814 for chunk in ids.chunks(ORPHAN_FK_ID_CHUNK_SIZE) {
5815 deleted = deleted.saturating_add(delete_orphan_message_id_chunk(conn, chunk)?);
5816 }
5817 Ok(deleted)
5818}
5819
5820fn delete_orphan_message_id_chunk(conn: &FrankenConnection, ids: &[i64]) -> Result<usize> {
5821 if ids.is_empty() {
5822 return Ok(0);
5823 }
5824
5825 match delete_orphan_message_id_chunk_once(conn, ids) {
5826 Ok(deleted) => Ok(deleted),
5827 Err(err) if is_out_of_memory_error(&err) && ids.len() > 1 => {
5828 let split_at = ids.len() / 2;
5829 tracing::warn!(
5830 target: "cass::fk_repair",
5831 rows = ids.len(),
5832 left = split_at,
5833 right = ids.len().saturating_sub(split_at),
5834 error = %err,
5835 "orphan-message cleanup ran out of memory; retrying as smaller batches"
5836 );
5837 let left = delete_orphan_message_id_chunk(conn, &ids[..split_at])?;
5838 let right = delete_orphan_message_id_chunk(conn, &ids[split_at..])?;
5839 Ok(left.saturating_add(right))
5840 }
5841 Err(err) => Err(err),
5842 }
5843}
5844
5845fn delete_orphan_message_id_chunk_once(conn: &FrankenConnection, ids: &[i64]) -> Result<usize> {
5846 let mut tx = conn.transaction()?;
5847 let mut deleted = 0usize;
5848 for entry in ORPHAN_MESSAGE_DEPENDENT_TABLES {
5849 match delete_rows_by_i64_chunks(&tx, entry.delete_many_sql_prefix, ids) {
5850 Ok(count) => {
5851 deleted = deleted.saturating_add(count);
5852 }
5853 Err(err) if error_indicates_missing_table(&err) => {
5854 tracing::debug!(
5855 target: "cass::fk_repair",
5856 child_table = entry.child_table,
5857 error = %err,
5858 "skipping orphan-message dependent cleanup (table unavailable)"
5859 );
5860 }
5861 Err(err) => {
5862 return Err(err).with_context(|| {
5863 format!(
5864 "deleting rows from {} that depend on orphan messages",
5865 entry.child_table
5866 )
5867 });
5868 }
5869 }
5870 }
5871 deleted = deleted.saturating_add(
5872 delete_rows_by_i64_chunks(&tx, "DELETE FROM messages WHERE id IN", ids)
5873 .context("deleting orphan rows from messages")?,
5874 );
5875 tx.commit()?;
5876 Ok(deleted)
5877}
5878
5879fn collect_direct_orphan_id_page(
5880 conn: &FrankenConnection,
5881 entry: &'static OrphanFkTable,
5882) -> Result<Vec<i64>> {
5883 Ok(conn.query_map_collect(
5884 entry.orphan_id_page_sql,
5885 fparams![i64::try_from(ORPHAN_FK_ID_CHUNK_SIZE).unwrap_or(i64::MAX)],
5886 |row| row.get_typed(0),
5887 )?)
5888}
5889
5890fn delete_direct_orphan_ids_bisecting_oom(
5891 conn: &FrankenConnection,
5892 entry: &'static OrphanFkTable,
5893 ids: &[i64],
5894) -> Result<usize> {
5895 let mut deleted = 0usize;
5896 for chunk in ids.chunks(ORPHAN_FK_ID_CHUNK_SIZE) {
5897 deleted = deleted.saturating_add(delete_direct_orphan_id_chunk(conn, entry, chunk)?);
5898 }
5899 Ok(deleted)
5900}
5901
5902fn delete_direct_orphan_id_chunk(
5903 conn: &FrankenConnection,
5904 entry: &'static OrphanFkTable,
5905 ids: &[i64],
5906) -> Result<usize> {
5907 if ids.is_empty() {
5908 return Ok(0);
5909 }
5910
5911 match delete_direct_orphan_id_chunk_once(conn, entry, ids) {
5912 Ok(deleted) => Ok(deleted),
5913 Err(err) if is_out_of_memory_error(&err) && ids.len() > 1 => {
5914 let split_at = ids.len() / 2;
5915 tracing::warn!(
5916 target: "cass::fk_repair",
5917 child_table = entry.child_table,
5918 rows = ids.len(),
5919 left = split_at,
5920 right = ids.len().saturating_sub(split_at),
5921 error = %err,
5922 "direct orphan cleanup ran out of memory; retrying as smaller batches"
5923 );
5924 let left = delete_direct_orphan_id_chunk(conn, entry, &ids[..split_at])?;
5925 let right = delete_direct_orphan_id_chunk(conn, entry, &ids[split_at..])?;
5926 Ok(left.saturating_add(right))
5927 }
5928 Err(err) => Err(err),
5929 }
5930}
5931
5932fn delete_direct_orphan_id_chunk_once(
5933 conn: &FrankenConnection,
5934 entry: &'static OrphanFkTable,
5935 ids: &[i64],
5936) -> Result<usize> {
5937 let mut tx = conn.transaction()?;
5938 let deleted = delete_rows_by_i64_chunks(&tx, entry.delete_many_sql_prefix, ids)?;
5939 tx.commit()?;
5940 Ok(deleted)
5941}
5942
5943struct OrphanFkTable {
5949 child_table: &'static str,
5950 orphan_id_page_sql: &'static str,
5951 delete_many_sql_prefix: &'static str,
5952}
5953
5954const ORPHAN_DIRECT_CHILD_TABLES: &[OrphanFkTable] = &[
5955 OrphanFkTable {
5956 child_table: "message_metrics",
5957 orphan_id_page_sql: "SELECT message_id FROM message_metrics \
5958 WHERE NOT EXISTS (\
5959 SELECT 1 FROM messages \
5960 WHERE messages.id = message_metrics.message_id\
5961 ) \
5962 ORDER BY message_id \
5963 LIMIT ?1",
5964 delete_many_sql_prefix: "DELETE FROM message_metrics WHERE message_id IN",
5965 },
5966 OrphanFkTable {
5967 child_table: "token_usage",
5968 orphan_id_page_sql: "SELECT message_id FROM token_usage \
5969 WHERE NOT EXISTS (\
5970 SELECT 1 FROM messages \
5971 WHERE messages.id = token_usage.message_id\
5972 ) \
5973 ORDER BY message_id \
5974 LIMIT ?1",
5975 delete_many_sql_prefix: "DELETE FROM token_usage WHERE message_id IN",
5976 },
5977 OrphanFkTable {
5978 child_table: "snippets",
5979 orphan_id_page_sql: "SELECT message_id FROM snippets \
5980 WHERE NOT EXISTS (\
5981 SELECT 1 FROM messages \
5982 WHERE messages.id = snippets.message_id\
5983 ) \
5984 ORDER BY message_id \
5985 LIMIT ?1",
5986 delete_many_sql_prefix: "DELETE FROM snippets WHERE message_id IN",
5987 },
5988 OrphanFkTable {
5989 child_table: "conversation_tags",
5990 orphan_id_page_sql: "SELECT conversation_id FROM conversation_tags \
5991 WHERE NOT EXISTS (\
5992 SELECT 1 FROM conversations \
5993 WHERE conversations.id = conversation_tags.conversation_id\
5994 ) \
5995 ORDER BY conversation_id \
5996 LIMIT ?1",
5997 delete_many_sql_prefix: "DELETE FROM conversation_tags WHERE conversation_id IN",
5998 },
5999];
6000
6001struct OrphanMessageDependentTable {
6002 child_table: &'static str,
6003 delete_many_sql_prefix: &'static str,
6004}
6005
6006const ORPHAN_MESSAGE_DEPENDENT_TABLES: &[OrphanMessageDependentTable] = &[
6007 OrphanMessageDependentTable {
6008 child_table: "message_metrics",
6009 delete_many_sql_prefix: "DELETE FROM message_metrics WHERE message_id IN",
6010 },
6011 OrphanMessageDependentTable {
6012 child_table: "token_usage",
6013 delete_many_sql_prefix: "DELETE FROM token_usage WHERE message_id IN",
6014 },
6015 OrphanMessageDependentTable {
6016 child_table: "snippets",
6017 delete_many_sql_prefix: "DELETE FROM snippets WHERE message_id IN",
6018 },
6019];
6020
6021#[derive(Debug, Default, Clone)]
6032pub(crate) struct OrphanFkCleanupReport {
6033 pub total: i64,
6034 pub per_table: Vec<(&'static str, i64)>,
6035}
6036
6037impl OrphanFkCleanupReport {
6038 fn record(&mut self, child_table: &'static str, count: i64) {
6039 if let Some((_, existing)) = self
6040 .per_table
6041 .iter_mut()
6042 .find(|(table, _)| *table == child_table)
6043 {
6044 *existing = existing.saturating_add(count);
6045 } else {
6046 self.per_table.push((child_table, count));
6047 }
6048 self.total = self.total.saturating_add(count);
6049 }
6050}
6051
6052pub struct InsertOutcome {
6053 pub conversation_id: i64,
6054 pub conversation_inserted: bool,
6055 pub inserted_indices: Vec<i64>,
6056}
6057
6058#[cfg(test)]
6059#[derive(Debug, Clone, Default)]
6060struct MessageInsertSubstageProfile {
6061 single_row_calls: usize,
6062 batch_calls: usize,
6063 batch_rows: usize,
6064 payload_duration: Duration,
6065 sql_build_duration: Duration,
6066 param_build_duration: Duration,
6067 execute_duration: Duration,
6068 rowid_duration: Duration,
6069}
6070
6071#[cfg(test)]
6072#[derive(Debug, Clone, Default)]
6073struct InsertConversationTreePerfProfile {
6074 invocations: usize,
6075 messages: usize,
6076 inserted_messages: usize,
6077 total_duration: Duration,
6078 source_duration: Duration,
6079 tx_open_duration: Duration,
6080 existing_lookup_duration: Duration,
6081 existing_idx_lookup_duration: Duration,
6082 existing_replay_lookup_duration: Duration,
6083 dedupe_filter_duration: Duration,
6084 conversation_row_duration: Duration,
6085 message_insert_duration: Duration,
6086 message_insert_breakdown: MessageInsertSubstageProfile,
6087 snippet_insert_duration: Duration,
6088 fts_entry_duration: Duration,
6089 fts_flush_duration: Duration,
6090 analytics_duration: Duration,
6091 commit_duration: Duration,
6092}
6093
6094#[cfg(test)]
6095impl InsertConversationTreePerfProfile {
6096 fn millis(duration: Duration) -> f64 {
6097 duration.as_secs_f64() * 1000.0
6098 }
6099
6100 fn log_summary(&self, label: &str) {
6101 let calls = self.invocations.max(1) as f64;
6102 let accounted_duration = self.source_duration
6103 + self.tx_open_duration
6104 + self.existing_lookup_duration
6105 + self.existing_idx_lookup_duration
6106 + self.existing_replay_lookup_duration
6107 + self.dedupe_filter_duration
6108 + self.conversation_row_duration
6109 + self.message_insert_duration
6110 + self.snippet_insert_duration
6111 + self.fts_entry_duration
6112 + self.fts_flush_duration
6113 + self.analytics_duration
6114 + self.commit_duration;
6115 let residual_duration = self.total_duration.saturating_sub(accounted_duration);
6116 eprintln!(
6117 concat!(
6118 "CASS_INSERT_TREE_STAGE_PROFILE ",
6119 "label={} calls={} messages={} inserted_messages={} ",
6120 "total_ms={:.3} source_ms={:.3} tx_open_ms={:.3} existing_lookup_ms={:.3} ",
6121 "existing_idx_lookup_ms={:.3} existing_replay_lookup_ms={:.3} dedupe_filter_ms={:.3} ",
6122 "conversation_row_ms={:.3} message_insert_ms={:.3} snippet_insert_ms={:.3} ",
6123 "fts_entry_ms={:.3} fts_flush_ms={:.3} analytics_ms={:.3} commit_ms={:.3} ",
6124 "msg_payload_ms={:.3} msg_sql_ms={:.3} msg_param_ms={:.3} msg_execute_ms={:.3} msg_rowid_ms={:.3} ",
6125 "residual_ms={:.3} avg_total_ms={:.3} avg_message_insert_ms={:.3} ",
6126 "avg_msg_execute_ms={:.3} avg_msg_payload_ms={:.3} avg_snippet_insert_ms={:.3} avg_fts_entry_ms={:.3} avg_commit_ms={:.3}"
6127 ),
6128 label,
6129 self.invocations,
6130 self.messages,
6131 self.inserted_messages,
6132 Self::millis(self.total_duration),
6133 Self::millis(self.source_duration),
6134 Self::millis(self.tx_open_duration),
6135 Self::millis(self.existing_lookup_duration),
6136 Self::millis(self.existing_idx_lookup_duration),
6137 Self::millis(self.existing_replay_lookup_duration),
6138 Self::millis(self.dedupe_filter_duration),
6139 Self::millis(self.conversation_row_duration),
6140 Self::millis(self.message_insert_duration),
6141 Self::millis(self.snippet_insert_duration),
6142 Self::millis(self.fts_entry_duration),
6143 Self::millis(self.fts_flush_duration),
6144 Self::millis(self.analytics_duration),
6145 Self::millis(self.commit_duration),
6146 Self::millis(self.message_insert_breakdown.payload_duration),
6147 Self::millis(self.message_insert_breakdown.sql_build_duration),
6148 Self::millis(self.message_insert_breakdown.param_build_duration),
6149 Self::millis(self.message_insert_breakdown.execute_duration),
6150 Self::millis(self.message_insert_breakdown.rowid_duration),
6151 Self::millis(residual_duration),
6152 Self::millis(self.total_duration) / calls,
6153 Self::millis(self.message_insert_duration) / calls,
6154 Self::millis(self.message_insert_breakdown.execute_duration) / calls,
6155 Self::millis(self.message_insert_breakdown.payload_duration) / calls,
6156 Self::millis(self.snippet_insert_duration) / calls,
6157 Self::millis(self.fts_entry_duration) / calls,
6158 Self::millis(self.commit_duration) / calls,
6159 );
6160 }
6161}
6162
6163#[derive(Debug, Clone, PartialEq, Eq, Hash)]
6164enum PendingConversationKey {
6165 External {
6166 source_id: String,
6167 agent_id: i64,
6168 external_id: String,
6169 },
6170 SourcePath {
6171 source_id: String,
6172 agent_id: i64,
6173 source_path: String,
6174 started_at: Option<i64>,
6175 },
6176}
6177
6178fn conversation_external_lookup_key(source_id: &str, agent_id: i64, external_id: &str) -> String {
6179 format!(
6180 "{}:{source_id}:{agent_id}:{}:{external_id}",
6181 source_id.chars().count(),
6182 external_id.chars().count()
6183 )
6184}
6185
6186fn conversation_external_lookup_key_for_conv(agent_id: i64, conv: &Conversation) -> Option<String> {
6187 conv.external_id
6188 .as_deref()
6189 .map(|external_id| conversation_external_lookup_key(&conv.source_id, agent_id, external_id))
6190}
6191
6192#[derive(Debug, Clone, PartialEq, Eq, Hash)]
6193struct MessageMergeFingerprint {
6194 idx: i64,
6195 created_at: Option<i64>,
6196 role: MessageRole,
6197 author: Option<String>,
6198 content_hash: [u8; 32],
6199}
6200
6201#[derive(Debug, Clone, PartialEq, Eq, Hash)]
6202struct MessageReplayFingerprint {
6203 created_at: Option<i64>,
6204 role: MessageRole,
6205 author: Option<String>,
6206 content_hash: [u8; 32],
6207}
6208
6209#[derive(Debug, Clone, Copy, PartialEq, Eq)]
6210struct ConversationMergeEvidence {
6211 exact_overlap: usize,
6212 replay_overlap: usize,
6213 smaller_replay_set: usize,
6214 started_close: bool,
6215 start_distance_ms: i64,
6216}
6217
6218struct ExistingConversationNewMessages<'a> {
6219 messages: Vec<&'a Message>,
6220 new_chars: i64,
6221 idx_collision_count: usize,
6222 first_collision_idx: Option<i64>,
6223}
6224
6225#[derive(Debug, Clone, Copy)]
6226struct ExistingConversationTailState {
6227 last_message_idx: i64,
6228 last_message_created_at: i64,
6229 ended_at: Option<i64>,
6230}
6231
6232#[derive(Debug, Clone, Copy)]
6233struct ExistingConversationWithTail {
6234 id: i64,
6235 tail_state: Option<ExistingConversationTailState>,
6236}
6237
6238fn conversation_effective_started_at(conv: &Conversation) -> Option<i64> {
6239 conv.started_at
6240 .or_else(|| conv.messages.iter().filter_map(|msg| msg.created_at).min())
6241}
6242
6243fn conversation_tail_state(conv: &Conversation) -> (Option<i64>, Option<i64>) {
6244 (
6245 conv.messages.iter().map(|msg| msg.idx).max(),
6246 conv.messages.iter().filter_map(|msg| msg.created_at).max(),
6247 )
6248}
6249
6250fn borrowed_messages_tail_state(messages: &[&Message]) -> (Option<i64>, Option<i64>) {
6251 (
6252 messages.iter().map(|msg| msg.idx).max(),
6253 messages.iter().filter_map(|msg| msg.created_at).max(),
6254 )
6255}
6256
6257fn role_from_str(role: &str) -> MessageRole {
6258 match role {
6259 "user" => MessageRole::User,
6260 "agent" | "assistant" => MessageRole::Agent,
6261 "tool" => MessageRole::Tool,
6262 "system" => MessageRole::System,
6263 other => MessageRole::Other(other.to_string()),
6264 }
6265}
6266
6267fn message_merge_fingerprint(msg: &Message) -> MessageMergeFingerprint {
6268 MessageMergeFingerprint {
6269 idx: msg.idx,
6270 created_at: msg.created_at,
6271 role: msg.role.clone(),
6272 author: msg.author.clone(),
6273 content_hash: *blake3::hash(msg.content.as_bytes()).as_bytes(),
6274 }
6275}
6276
6277fn message_replay_fingerprint(msg: &Message) -> MessageReplayFingerprint {
6278 MessageReplayFingerprint {
6279 created_at: msg.created_at,
6280 role: msg.role.clone(),
6281 author: msg.author.clone(),
6282 content_hash: *blake3::hash(msg.content.as_bytes()).as_bytes(),
6283 }
6284}
6285
6286fn conversation_message_fingerprints(conv: &Conversation) -> HashSet<MessageMergeFingerprint> {
6287 conv.messages
6288 .iter()
6289 .map(message_merge_fingerprint)
6290 .collect()
6291}
6292
6293fn conversation_message_replay_fingerprints(
6294 conv: &Conversation,
6295) -> HashSet<MessageReplayFingerprint> {
6296 conv.messages
6297 .iter()
6298 .map(message_replay_fingerprint)
6299 .collect()
6300}
6301
6302fn replay_fingerprint_from_merge(
6303 fingerprint: &MessageMergeFingerprint,
6304) -> MessageReplayFingerprint {
6305 MessageReplayFingerprint {
6306 created_at: fingerprint.created_at,
6307 role: fingerprint.role.clone(),
6308 author: fingerprint.author.clone(),
6309 content_hash: fingerprint.content_hash,
6310 }
6311}
6312
6313fn replay_fingerprints_from_merge_set(
6314 fingerprints: &HashSet<MessageMergeFingerprint>,
6315) -> HashSet<MessageReplayFingerprint> {
6316 fingerprints
6317 .iter()
6318 .map(replay_fingerprint_from_merge)
6319 .collect()
6320}
6321
6322fn collect_new_messages_for_existing_conversation<'a>(
6323 conversation_id: i64,
6324 conv: &'a Conversation,
6325 existing_messages: &mut HashMap<i64, MessageMergeFingerprint>,
6326 existing_replay_fingerprints: &mut HashSet<MessageReplayFingerprint>,
6327 replay_skip_log: &'static str,
6328) -> ExistingConversationNewMessages<'a> {
6329 let mut idx_collision_count = 0usize;
6330 let mut first_collision_idx: Option<i64> = None;
6331 let mut new_chars: i64 = 0;
6332 let mut messages = Vec::new();
6333
6334 for msg in &conv.messages {
6335 let incoming_fingerprint = message_merge_fingerprint(msg);
6336 if let Some(existing_fingerprint) = existing_messages.get(&msg.idx) {
6337 if existing_fingerprint != &incoming_fingerprint {
6338 idx_collision_count = idx_collision_count.saturating_add(1);
6339 first_collision_idx.get_or_insert(msg.idx);
6340 }
6341 continue;
6342 }
6343
6344 let incoming_replay = replay_fingerprint_from_merge(&incoming_fingerprint);
6345 if existing_replay_fingerprints.contains(&incoming_replay) {
6346 tracing::debug!(
6347 conversation_id,
6348 idx = msg.idx,
6349 source_path = %conv.source_path.display(),
6350 "{replay_skip_log}"
6351 );
6352 continue;
6353 }
6354
6355 existing_messages.insert(msg.idx, incoming_fingerprint);
6356 existing_replay_fingerprints.insert(incoming_replay);
6357 new_chars += msg.content.len() as i64;
6358 messages.push(msg);
6359 }
6360
6361 ExistingConversationNewMessages {
6362 messages,
6363 new_chars,
6364 idx_collision_count,
6365 first_collision_idx,
6366 }
6367}
6368
6369fn franken_existing_conversation_append_tail_state(
6370 tx: &FrankenTransaction<'_>,
6371 conversation_id: i64,
6372) -> Result<Option<ExistingConversationTailState>> {
6373 let cached: Option<(Option<i64>, Option<i64>, Option<i64>)> = tx
6374 .query_row_map(
6375 "SELECT last_message_idx, last_message_created_at, ended_at
6376 FROM conversation_tail_state
6377 WHERE conversation_id = ?1",
6378 fparams![conversation_id],
6379 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
6380 )
6381 .optional()?;
6382 if let Some(cached) = cached {
6383 let (_, _, cached_ended_at) = cached;
6384 if let Some(tail_state) =
6385 existing_conversation_tail_state_from_cached(cached.0, cached.1, cached_ended_at)
6386 {
6387 return Ok(Some(tail_state));
6388 }
6389 }
6390
6391 let legacy_cached: (Option<i64>, Option<i64>, Option<i64>) = tx.query_row_map(
6392 "SELECT last_message_idx, last_message_created_at, ended_at
6393 FROM conversations
6394 WHERE id = ?1",
6395 fparams![conversation_id],
6396 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
6397 )?;
6398 let (_, _, cached_ended_at) = legacy_cached;
6399 if let Some(tail_state) = existing_conversation_tail_state_from_cached(
6400 legacy_cached.0,
6401 legacy_cached.1,
6402 cached_ended_at,
6403 ) {
6404 franken_insert_conversation_tail_state(
6405 tx,
6406 conversation_id,
6407 cached_ended_at,
6408 Some(tail_state.last_message_idx),
6409 Some(tail_state.last_message_created_at),
6410 )?;
6411 return Ok(Some(tail_state));
6412 }
6413
6414 let (max_idx, max_created_at): (Option<i64>, Option<i64>) = tx.query_row_map(
6415 "SELECT MAX(idx), MAX(created_at)
6416 FROM messages
6417 WHERE conversation_id = ?1",
6418 fparams![conversation_id],
6419 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
6420 )?;
6421 if let Some((last_message_idx, last_message_created_at)) = max_idx.zip(max_created_at) {
6422 franken_update_conversation_tail_state(
6423 tx,
6424 conversation_id,
6425 None,
6426 Some(last_message_idx),
6427 Some(last_message_created_at),
6428 )?;
6429 return Ok(Some(ExistingConversationTailState {
6430 last_message_idx,
6431 last_message_created_at,
6432 ended_at: cached_ended_at,
6433 }));
6434 }
6435 Ok(None)
6436}
6437
6438fn existing_conversation_tail_state_from_cached(
6439 last_message_idx: Option<i64>,
6440 last_message_created_at: Option<i64>,
6441 ended_at: Option<i64>,
6442) -> Option<ExistingConversationTailState> {
6443 let (last_message_idx, last_message_created_at) =
6444 last_message_idx.zip(last_message_created_at)?;
6445 Some(ExistingConversationTailState {
6446 last_message_idx,
6447 last_message_created_at,
6448 ended_at,
6449 })
6450}
6451
6452fn franken_find_existing_conversation_with_tail_by_key(
6453 tx: &FrankenTransaction<'_>,
6454 key: &PendingConversationKey,
6455 conv: Option<&Conversation>,
6456) -> Result<Option<ExistingConversationWithTail>> {
6457 if let PendingConversationKey::External {
6458 source_id,
6459 agent_id,
6460 external_id,
6461 } = key
6462 {
6463 let lookup_key = conversation_external_lookup_key(source_id, *agent_id, external_id);
6464 if let Some(existing) = franken_find_external_conversation_tail_lookup(tx, &lookup_key)? {
6465 return Ok(Some(existing));
6466 }
6467 return Ok(None);
6468 }
6469
6470 let Some(id) = franken_find_existing_conversation_by_key(tx, key, conv)? else {
6471 return Ok(None);
6472 };
6473 let tail_state = franken_existing_conversation_append_tail_state(tx, id)?;
6474 Ok(Some(ExistingConversationWithTail { id, tail_state }))
6475}
6476
6477fn franken_insert_conversation_tail_state(
6478 tx: &FrankenTransaction<'_>,
6479 conversation_id: i64,
6480 ended_at: Option<i64>,
6481 last_message_idx: Option<i64>,
6482 last_message_created_at: Option<i64>,
6483) -> Result<()> {
6484 if ended_at.is_none() && last_message_idx.is_none() && last_message_created_at.is_none() {
6485 return Ok(());
6486 }
6487 tx.execute_compat(
6488 "INSERT OR REPLACE INTO conversation_tail_state (
6489 conversation_id, ended_at, last_message_idx, last_message_created_at
6490 ) VALUES (?1, ?2, ?3, ?4)",
6491 fparams![
6492 conversation_id,
6493 ended_at,
6494 last_message_idx,
6495 last_message_created_at
6496 ],
6497 )?;
6498 Ok(())
6499}
6500
6501fn franken_update_conversation_tail_columns(
6502 tx: &FrankenTransaction<'_>,
6503 conversation_id: i64,
6504 ended_at_candidate: Option<i64>,
6505 last_message_idx_candidate: Option<i64>,
6506 last_message_created_at_candidate: Option<i64>,
6507) -> Result<()> {
6508 if ended_at_candidate.is_none()
6509 && last_message_idx_candidate.is_none()
6510 && last_message_created_at_candidate.is_none()
6511 {
6512 return Ok(());
6513 }
6514
6515 tx.execute_compat(
6516 "UPDATE conversations
6517 SET ended_at = CASE
6518 WHEN ?1 IS NULL THEN ended_at
6519 WHEN ended_at IS NULL OR ended_at < ?1 THEN ?1
6520 ELSE ended_at
6521 END,
6522 last_message_idx = CASE
6523 WHEN ?2 IS NULL THEN last_message_idx
6524 WHEN last_message_idx IS NULL OR last_message_idx < ?2 THEN ?2
6525 ELSE last_message_idx
6526 END,
6527 last_message_created_at = CASE
6528 WHEN ?3 IS NULL THEN last_message_created_at
6529 WHEN last_message_created_at IS NULL OR last_message_created_at < ?3 THEN ?3
6530 ELSE last_message_created_at
6531 END
6532 WHERE id = ?4",
6533 fparams![
6534 ended_at_candidate,
6535 last_message_idx_candidate,
6536 last_message_created_at_candidate,
6537 conversation_id
6538 ],
6539 )?;
6540 Ok(())
6541}
6542
6543fn franken_tail_state_insert_ended_at(
6544 tx: &FrankenTransaction<'_>,
6545 conversation_id: i64,
6546 candidate: Option<i64>,
6547) -> Result<Option<i64>> {
6548 let canonical: Option<i64> = tx
6549 .query_row_map(
6550 "SELECT ended_at FROM conversations WHERE id = ?1",
6551 fparams![conversation_id],
6552 |row| row.get_typed(0),
6553 )
6554 .optional()?
6555 .flatten();
6556 Ok(canonical.max(candidate))
6557}
6558
6559fn franken_update_conversation_tail_state(
6560 tx: &FrankenTransaction<'_>,
6561 conversation_id: i64,
6562 ended_at_candidate: Option<i64>,
6563 last_message_idx_candidate: Option<i64>,
6564 last_message_created_at_candidate: Option<i64>,
6565) -> Result<()> {
6566 if ended_at_candidate.is_none()
6567 && last_message_idx_candidate.is_none()
6568 && last_message_created_at_candidate.is_none()
6569 {
6570 return Ok(());
6571 }
6572
6573 let changed = tx.execute_compat(
6574 "UPDATE conversation_tail_state
6575 SET ended_at = CASE
6576 WHEN ?1 IS NULL THEN ended_at
6577 ELSE MAX(IFNULL(ended_at, 0), ?1)
6578 END,
6579 last_message_idx = CASE
6580 WHEN ?2 IS NULL THEN last_message_idx
6581 WHEN last_message_idx IS NULL OR last_message_idx < ?2 THEN ?2
6582 ELSE last_message_idx
6583 END,
6584 last_message_created_at = CASE
6585 WHEN ?3 IS NULL THEN last_message_created_at
6586 WHEN last_message_created_at IS NULL OR last_message_created_at < ?3 THEN ?3
6587 ELSE last_message_created_at
6588 END
6589 WHERE conversation_id = ?4",
6590 fparams![
6591 ended_at_candidate,
6592 last_message_idx_candidate,
6593 last_message_created_at_candidate,
6594 conversation_id
6595 ],
6596 )?;
6597 if changed == 0 {
6598 let insert_ended_at =
6599 franken_tail_state_insert_ended_at(tx, conversation_id, ended_at_candidate)?;
6600 franken_insert_conversation_tail_state(
6601 tx,
6602 conversation_id,
6603 insert_ended_at,
6604 last_message_idx_candidate,
6605 last_message_created_at_candidate,
6606 )?;
6607 }
6608 franken_update_conversation_tail_columns(
6609 tx,
6610 conversation_id,
6611 ended_at_candidate,
6612 last_message_idx_candidate,
6613 last_message_created_at_candidate,
6614 )?;
6615 Ok(())
6616}
6617
6618fn franken_set_conversation_tail_state_after_append(
6619 tx: &FrankenTransaction<'_>,
6620 conversation_id: i64,
6621 ended_at: i64,
6622 last_message_idx: i64,
6623 last_message_created_at: i64,
6624) -> Result<()> {
6625 let changed = tx.execute_compat(
6626 "UPDATE conversation_tail_state
6627 SET ended_at = ?1,
6628 last_message_idx = ?2,
6629 last_message_created_at = ?3
6630 WHERE conversation_id = ?4",
6631 fparams![
6632 ended_at,
6633 last_message_idx,
6634 last_message_created_at,
6635 conversation_id
6636 ],
6637 )?;
6638 if changed == 0 {
6639 let insert_ended_at =
6640 franken_tail_state_insert_ended_at(tx, conversation_id, Some(ended_at))?;
6641 franken_insert_conversation_tail_state(
6642 tx,
6643 conversation_id,
6644 insert_ended_at,
6645 Some(last_message_idx),
6646 Some(last_message_created_at),
6647 )?;
6648 }
6649 franken_update_conversation_tail_columns(
6650 tx,
6651 conversation_id,
6652 Some(ended_at),
6653 Some(last_message_idx),
6654 Some(last_message_created_at),
6655 )?;
6656 Ok(())
6657}
6658
6659fn collect_append_only_tail_messages<'a>(
6660 conv: &'a Conversation,
6661 existing_max_idx: i64,
6662 existing_max_created_at: i64,
6663) -> Option<ExistingConversationNewMessages<'a>> {
6664 if conv.messages.is_empty() {
6665 return Some(ExistingConversationNewMessages {
6666 messages: Vec::new(),
6667 new_chars: 0,
6668 idx_collision_count: 0,
6669 first_collision_idx: None,
6670 });
6671 }
6672
6673 let mut split_idx = None;
6674 let mut prev_idx = None;
6675 for (pos, msg) in conv.messages.iter().enumerate() {
6676 if prev_idx.is_some_and(|prev| msg.idx < prev) {
6677 return None;
6678 }
6679 prev_idx = Some(msg.idx);
6680 if split_idx.is_none() && msg.idx > existing_max_idx {
6681 split_idx = Some(pos);
6682 }
6683 }
6684 let split_idx = split_idx?;
6685
6686 let mut seen_tail_idx = HashSet::new();
6687 let mut seen_tail_replay = HashSet::new();
6688 let mut new_chars = 0i64;
6689 let mut messages = Vec::new();
6690 for msg in &conv.messages[split_idx..] {
6691 let created_at = msg.created_at?;
6692 if created_at <= existing_max_created_at {
6693 return None;
6694 }
6695
6696 if !seen_tail_idx.insert(msg.idx) {
6697 return None;
6698 }
6699
6700 let replay_fingerprint = message_replay_fingerprint(msg);
6701 if !seen_tail_replay.insert(replay_fingerprint) {
6702 return None;
6703 }
6704
6705 new_chars += msg.content.len() as i64;
6706 messages.push(msg);
6707 }
6708
6709 Some(ExistingConversationNewMessages {
6710 messages,
6711 new_chars,
6712 idx_collision_count: 0,
6713 first_collision_idx: None,
6714 })
6715}
6716
6717fn start_distance_ms(left: Option<i64>, right: Option<i64>) -> i64 {
6718 match (left, right) {
6719 (Some(left), Some(right)) => (i128::from(left) - i128::from(right))
6720 .abs()
6721 .try_into()
6722 .unwrap_or(i64::MAX),
6723 _ => i64::MAX,
6724 }
6725}
6726
6727fn conversation_merge_evidence(
6728 incoming_exact: &HashSet<MessageMergeFingerprint>,
6729 incoming_replay: &HashSet<MessageReplayFingerprint>,
6730 existing_exact: &HashSet<MessageMergeFingerprint>,
6731 existing_replay: &HashSet<MessageReplayFingerprint>,
6732 incoming_started_at: Option<i64>,
6733 existing_started_at: Option<i64>,
6734) -> Option<ConversationMergeEvidence> {
6735 let exact_overlap = incoming_exact.intersection(existing_exact).count();
6736 let replay_overlap = incoming_replay.intersection(existing_replay).count();
6737 if exact_overlap == 0 && replay_overlap == 0 {
6738 return None;
6739 }
6740
6741 let smaller_replay_set = incoming_replay.len().min(existing_replay.len());
6742 let started_close = timestamps_within_tolerance(
6743 incoming_started_at,
6744 existing_started_at,
6745 SOURCE_PATH_MERGE_START_TOLERANCE_MS,
6746 );
6747 let full_replay_subset_match = smaller_replay_set >= 2 && replay_overlap == smaller_replay_set;
6748
6749 let merge_allowed = if started_close {
6750 exact_overlap >= 1 || replay_overlap >= 2
6751 } else {
6752 exact_overlap >= 2 || full_replay_subset_match
6753 };
6754
6755 merge_allowed.then_some(ConversationMergeEvidence {
6756 exact_overlap,
6757 replay_overlap,
6758 smaller_replay_set,
6759 started_close,
6760 start_distance_ms: start_distance_ms(incoming_started_at, existing_started_at),
6761 })
6762}
6763
6764fn timestamps_within_tolerance(left: Option<i64>, right: Option<i64>, tolerance_ms: i64) -> bool {
6765 match (left, right) {
6766 (Some(left), Some(right)) => {
6767 (i128::from(left) - i128::from(right)).abs() <= i128::from(tolerance_ms)
6768 }
6769 _ => false,
6770 }
6771}
6772
6773fn conversation_merge_key(agent_id: i64, conv: &Conversation) -> PendingConversationKey {
6774 if let Some(external_id) = conv.external_id.clone() {
6775 PendingConversationKey::External {
6776 source_id: conv.source_id.clone(),
6777 agent_id,
6778 external_id,
6779 }
6780 } else {
6781 PendingConversationKey::SourcePath {
6782 source_id: conv.source_id.clone(),
6783 agent_id,
6784 source_path: path_to_string(&conv.source_path),
6785 started_at: conversation_effective_started_at(conv),
6786 }
6787 }
6788}
6789
6790pub struct MessageForEmbedding {
6792 pub message_id: i64,
6793 pub created_at: Option<i64>,
6794 pub agent_id: i64,
6795 pub workspace_id: Option<i64>,
6796 pub source_id_hash: u32,
6797 pub role: String,
6798 pub content: String,
6799}
6800
6801impl FrankenStorage {
6806 pub fn ensure_agent(&self, agent: &Agent) -> Result<i64> {
6808 let cache_key = EnsuredAgentKey::from_agent(agent);
6809 if let Some(id) = self.cached_agent_id(&cache_key) {
6810 return Ok(id);
6811 }
6812
6813 let now = Self::now_millis();
6814 self.conn.execute_compat(
6815 "INSERT INTO agents(slug, name, version, kind, created_at, updated_at)
6816 VALUES(?1, ?2, ?3, ?4, ?5, ?6)
6817 ON CONFLICT(slug) DO UPDATE SET
6818 name = excluded.name,
6819 version = excluded.version,
6820 kind = excluded.kind,
6821 updated_at = excluded.updated_at
6822 WHERE NOT (
6823 agents.name IS excluded.name
6824 AND agents.version IS excluded.version
6825 AND agents.kind IS excluded.kind
6826 )",
6827 fparams![
6828 agent.slug.as_str(),
6829 agent.name.as_str(),
6830 agent.version.as_deref(),
6831 cache_key.kind.as_str(),
6832 now,
6833 now
6834 ],
6835 )?;
6836
6837 let id = self
6838 .conn
6839 .query_row_map(
6840 "SELECT id FROM agents WHERE slug = ?1 LIMIT 1",
6841 fparams![agent.slug.as_str()],
6842 |row| row.get_typed(0),
6843 )
6844 .with_context(|| format!("fetching agent id for {}", agent.slug))?;
6845 self.mark_agent_ensured(cache_key, id);
6846 Ok(id)
6847 }
6848
6849 pub fn ensure_workspace(&self, path: &Path, display_name: Option<&str>) -> Result<i64> {
6851 let path_str = path.to_string_lossy().to_string();
6852 let cache_key = EnsuredWorkspaceKey::new(path_str.clone(), display_name);
6853 if let Some(id) = self.cached_workspace_id(&cache_key) {
6854 return Ok(id);
6855 }
6856
6857 if let Some(display_name) = display_name {
6858 self.conn.execute_compat(
6859 "INSERT INTO workspaces(path, display_name)
6860 VALUES(?1, ?2)
6861 ON CONFLICT(path) DO UPDATE SET
6862 display_name = excluded.display_name
6863 WHERE NOT (workspaces.display_name IS excluded.display_name)",
6864 fparams![path_str.as_str(), display_name],
6865 )?;
6866 } else {
6867 self.conn.execute_compat(
6868 "INSERT OR IGNORE INTO workspaces(path, display_name) VALUES(?1, NULL)",
6869 fparams![path_str.as_str()],
6870 )?;
6871 }
6872
6873 let id = self
6874 .conn
6875 .query_row_map(
6876 "SELECT id FROM workspaces WHERE path = ?1 LIMIT 1",
6877 fparams![path_str.as_str()],
6878 |row| row.get_typed(0),
6879 )
6880 .with_context(|| format!("fetching workspace id for {path_str}"))?;
6881 self.mark_workspace_ensured(cache_key, id);
6882 Ok(id)
6883 }
6884
6885 pub fn now_millis() -> i64 {
6887 SystemTime::now()
6888 .duration_since(UNIX_EPOCH)
6889 .map(|d| i64::try_from(d.as_millis()).unwrap_or(i64::MAX))
6890 .unwrap_or(0)
6891 }
6892
6893 pub fn day_id_from_millis(timestamp_ms: i64) -> i64 {
6895 const EPOCH_2020_SECS: i64 = 1_577_836_800;
6896 let secs = timestamp_ms.div_euclid(1000);
6897 (secs - EPOCH_2020_SECS).div_euclid(86400)
6898 }
6899
6900 pub fn hour_id_from_millis(timestamp_ms: i64) -> i64 {
6902 const EPOCH_2020_SECS: i64 = 1_577_836_800;
6903 let secs = timestamp_ms.div_euclid(1000);
6904 (secs - EPOCH_2020_SECS).div_euclid(3600)
6905 }
6906
6907 pub fn millis_from_day_id(day_id: i64) -> i64 {
6909 const EPOCH_2020_SECS: i64 = 1_577_836_800;
6910 (EPOCH_2020_SECS + day_id * 86400) * 1000
6911 }
6912
6913 pub fn millis_from_hour_id(hour_id: i64) -> i64 {
6915 const EPOCH_2020_SECS: i64 = 1_577_836_800;
6916 (EPOCH_2020_SECS + hour_id * 3600) * 1000
6917 }
6918
6919 pub fn get_last_scan_ts(&self) -> Result<Option<i64>> {
6921 let result: Result<String, _> = self.conn.query_row_map(
6922 "SELECT value FROM meta WHERE key = 'last_scan_ts'",
6923 fparams![],
6924 |row| row.get_typed(0),
6925 );
6926 match result.optional() {
6927 Ok(Some(s)) => Ok(s.parse().ok()),
6928 Ok(None) => Ok(None),
6929 Err(e) => Err(e.into()),
6930 }
6931 }
6932
6933 pub fn set_last_scan_ts(&self, ts: i64) -> Result<()> {
6935 self.conn.execute_compat(
6936 "INSERT OR REPLACE INTO meta(key, value) VALUES('last_scan_ts', ?1)",
6937 fparams![ts.to_string()],
6938 )?;
6939 Ok(())
6940 }
6941
6942 pub fn get_last_indexed_at(&self) -> Result<Option<i64>> {
6944 let result: Result<String, _> = self.conn.query_row_map(
6945 "SELECT value FROM meta WHERE key = 'last_indexed_at'",
6946 fparams![],
6947 |row| row.get_typed(0),
6948 );
6949 match result.optional() {
6950 Ok(Some(s)) => Ok(s.parse().ok()),
6951 Ok(None) => Ok(None),
6952 Err(e) => Err(e.into()),
6953 }
6954 }
6955
6956 pub fn set_last_indexed_at(&self, ts: i64) -> Result<()> {
6958 self.conn.execute_compat(
6959 "INSERT OR REPLACE INTO meta(key, value) VALUES('last_indexed_at', ?1)",
6960 fparams![ts.to_string()],
6961 )?;
6962 Ok(())
6963 }
6964
6965 pub fn list_agents(&self) -> Result<Vec<Agent>> {
6967 self.conn
6968 .query_map_collect(
6969 "SELECT id, slug, name, version, kind FROM agents ORDER BY slug",
6970 fparams![],
6971 |row| {
6972 let kind: String = row.get_typed(4)?;
6973 Ok(Agent {
6974 id: Some(row.get_typed(0)?),
6975 slug: row.get_typed(1)?,
6976 name: row.get_typed(2)?,
6977 version: row.get_typed(3)?,
6978 kind: match kind.as_str() {
6979 "cli" => AgentKind::Cli,
6980 "vscode" => AgentKind::VsCode,
6981 _ => AgentKind::Hybrid,
6982 },
6983 })
6984 },
6985 )
6986 .with_context(|| "listing agents")
6987 }
6988
6989 pub fn total_conversation_count(&self) -> Result<usize> {
6991 let count: i64 =
6992 self.conn
6993 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
6994 row.get_typed(0)
6995 })?;
6996 Ok(count.max(0) as usize)
6997 }
6998
6999 pub fn total_message_count(&self) -> Result<usize> {
7001 let count: i64 =
7002 self.conn
7003 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
7004 row.get_typed(0)
7005 })?;
7006 Ok(count.max(0) as usize)
7007 }
7008
7009 pub fn purge_agent_archive_data(&self, agent_slug: &str) -> Result<AgentArchivePurgeResult> {
7014 let normalized = agent_slug.trim().to_ascii_lowercase();
7015 if normalized.is_empty() {
7016 return Err(anyhow!("agent slug cannot be empty"));
7017 }
7018
7019 let Some(agent_id) = self
7020 .conn
7021 .query_row_map(
7022 "SELECT id FROM agents WHERE slug = ?1",
7023 fparams![normalized.as_str()],
7024 |row| row.get_typed::<i64>(0),
7025 )
7026 .optional()?
7027 else {
7028 return Ok(AgentArchivePurgeResult::default());
7029 };
7030
7031 let conversations_deleted: i64 = self.conn.query_row_map(
7032 "SELECT COUNT(*) FROM conversations WHERE agent_id = ?1",
7033 fparams![agent_id],
7034 |row| row.get_typed(0),
7035 )?;
7036 if conversations_deleted == 0 {
7037 return Ok(AgentArchivePurgeResult::default());
7038 }
7039
7040 let messages_deleted: i64 = self.conn.query_row_map(
7041 "SELECT COUNT(*)
7042 FROM messages
7043 WHERE conversation_id IN (
7044 SELECT id FROM conversations WHERE agent_id = ?1
7045 )",
7046 fparams![agent_id],
7047 |row| row.get_typed(0),
7048 )?;
7049
7050 let mut tx = self.conn.transaction()?;
7051 tx.execute_compat(
7052 "DELETE FROM conversation_external_lookup
7053 WHERE conversation_id IN (
7054 SELECT id FROM conversations WHERE agent_id = ?1
7055 )",
7056 fparams![agent_id],
7057 )?;
7058 tx.execute_compat(
7059 "DELETE FROM conversation_external_tail_lookup
7060 WHERE conversation_id IN (
7061 SELECT id FROM conversations WHERE agent_id = ?1
7062 )",
7063 fparams![agent_id],
7064 )?;
7065 tx.execute_compat(
7066 "DELETE FROM conversations WHERE agent_id = ?1",
7067 fparams![agent_id],
7068 )?;
7069 tx.execute_compat(
7070 "DELETE FROM agents
7071 WHERE id = ?1
7072 AND NOT EXISTS (
7073 SELECT 1 FROM conversations WHERE agent_id = ?1
7074 )",
7075 fparams![agent_id],
7076 )?;
7077 tx.commit()?;
7078
7079 Ok(AgentArchivePurgeResult {
7080 conversations_deleted: conversations_deleted.max(0) as usize,
7081 messages_deleted: messages_deleted.max(0) as usize,
7082 })
7083 }
7084
7085 pub fn list_workspaces(&self) -> Result<Vec<crate::model::types::Workspace>> {
7087 self.conn
7088 .query_map_collect(
7089 "SELECT id, path, display_name FROM workspaces ORDER BY path",
7090 fparams![],
7091 |row| {
7092 let path_str: String = row.get_typed(1)?;
7093 Ok(crate::model::types::Workspace {
7094 id: Some(row.get_typed(0)?),
7095 path: Path::new(&path_str).to_path_buf(),
7096 display_name: row.get_typed(2)?,
7097 })
7098 },
7099 )
7100 .with_context(|| "listing workspaces")
7101 }
7102
7103 pub fn list_conversations(&self, limit: i64, offset: i64) -> Result<Vec<Conversation>> {
7105 self.conn
7112 .query_map_collect(
7113 r"SELECT c.id,
7114 COALESCE((SELECT a.slug FROM agents a WHERE a.id = c.agent_id), 'unknown'),
7115 (SELECT w.path FROM workspaces w WHERE w.id = c.workspace_id),
7116 c.external_id, c.title, c.source_path,
7117 c.started_at,
7118 COALESCE(
7119 (SELECT ts.ended_at
7120 FROM conversation_tail_state ts
7121 WHERE ts.conversation_id = c.id),
7122 c.ended_at
7123 ),
7124 c.approx_tokens, c.metadata_json,
7125 c.source_id, c.origin_host, c.metadata_bin
7126 FROM conversations c
7127 ORDER BY CASE WHEN c.started_at IS NULL THEN 1 ELSE 0 END, c.started_at DESC, c.id DESC
7128 LIMIT ?1 OFFSET ?2",
7129 fparams![limit, offset],
7130 |row| {
7131 let workspace_path: Option<String> = row.get_typed(2)?;
7132 let source_path: String = row.get_typed(5)?;
7133 let raw_source_id: Option<String> = row.get_typed(10)?;
7134 let raw_origin_host: Option<String> = row.get_typed(11)?;
7135 let (source_id, _, origin_host) = normalized_storage_source_parts(
7136 raw_source_id.as_deref(),
7137 None,
7138 raw_origin_host.as_deref(),
7139 );
7140 Ok(Conversation {
7141 id: Some(row.get_typed(0)?),
7142 agent_slug: row.get_typed(1)?,
7143 workspace: workspace_path.map(|p| Path::new(&p).to_path_buf()),
7144 external_id: row.get_typed(3)?,
7145 title: row.get_typed(4)?,
7146 source_path: Path::new(&source_path).to_path_buf(),
7147 started_at: row.get_typed(6)?,
7148 ended_at: row.get_typed(7)?,
7149 approx_tokens: row.get_typed(8)?,
7150 metadata_json: franken_read_metadata_compat(row, 9, 12),
7151 messages: Vec::new(),
7152 source_id,
7153 origin_host,
7154 })
7155 },
7156 )
7157 .with_context(|| "listing conversations")
7158 }
7159
7160 pub fn build_lexical_rebuild_lookups(
7164 &self,
7165 ) -> Result<(HashMap<i64, String>, HashMap<i64, PathBuf>)> {
7166 let agents: HashMap<i64, String> = self
7167 .conn
7168 .query_map_collect("SELECT id, slug FROM agents", fparams![], |row| {
7169 Ok((row.get_typed::<i64>(0)?, row.get_typed::<String>(1)?))
7170 })
7171 .with_context(|| "loading agent lookup for lexical rebuild")?
7172 .into_iter()
7173 .collect();
7174 let workspaces: HashMap<i64, PathBuf> = self
7175 .conn
7176 .query_map_collect("SELECT id, path FROM workspaces", fparams![], |row| {
7177 let path_str: String = row.get_typed(1)?;
7178 Ok((row.get_typed::<i64>(0)?, PathBuf::from(path_str)))
7179 })
7180 .with_context(|| "loading workspace lookup for lexical rebuild")?
7181 .into_iter()
7182 .collect();
7183 Ok((agents, workspaces))
7184 }
7185
7186 pub fn list_conversation_footprints_for_lexical_rebuild(
7199 &self,
7200 ) -> Result<Vec<LexicalRebuildConversationFootprintRow>> {
7201 let tail_state_rows: Vec<(i64, Option<i64>)> = match self.conn.query_map_collect(
7202 "SELECT conversation_id, last_message_idx
7203 FROM conversation_tail_state
7204 ORDER BY conversation_id ASC",
7205 fparams![],
7206 |row| Ok((row.get_typed::<i64>(0)?, row.get_typed::<Option<i64>>(1)?)),
7207 ) {
7208 Ok(rows) => rows,
7209 Err(err) if error_indicates_missing_table(&err) => Vec::new(),
7210 Err(err) => {
7211 return Err(err).with_context(|| "listing lexical rebuild tail-state estimates");
7212 }
7213 };
7214 let tail_state_by_conversation: HashMap<i64, Option<i64>> =
7215 tail_state_rows.into_iter().collect();
7216
7217 let rows: Vec<(i64, Option<i64>)> = match self.conn.query_map_collect(
7218 "SELECT id, last_message_idx
7219 FROM conversations
7220 ORDER BY id ASC",
7221 fparams![],
7222 |row| Ok((row.get_typed::<i64>(0)?, row.get_typed::<Option<i64>>(1)?)),
7223 ) {
7224 Ok(rows) => rows,
7225 Err(err) if error_indicates_missing_column(&err) => self
7226 .conn
7227 .query_map_collect(
7228 "SELECT id
7229 FROM conversations
7230 ORDER BY id ASC",
7231 fparams![],
7232 |row| Ok((row.get_typed::<i64>(0)?, None)),
7233 )
7234 .with_context(|| {
7235 "listing lexical rebuild conversation ids after missing tail column fallback"
7236 })?,
7237 Err(err) => {
7238 return Err(err)
7239 .with_context(|| "listing lexical rebuild conversation footprint estimates");
7240 }
7241 };
7242
7243 let mut footprints = Vec::with_capacity(rows.len());
7244 let mut missing_tail_positions = HashMap::new();
7245 for (conversation_id, conversation_last_message_idx) in rows {
7246 let last_message_idx = tail_state_by_conversation
7247 .get(&conversation_id)
7248 .copied()
7249 .flatten()
7250 .or(conversation_last_message_idx);
7251 let Some(message_count) = lexical_rebuild_message_count_from_tail_idx(last_message_idx)
7252 else {
7253 missing_tail_positions.insert(conversation_id, footprints.len());
7254 footprints.push(LexicalRebuildConversationFootprintRow {
7255 conversation_id,
7256 message_count: 0,
7257 message_bytes: 0,
7258 });
7259 continue;
7260 };
7261 footprints.push(lexical_rebuild_conversation_footprint_from_count(
7262 conversation_id,
7263 message_count,
7264 ));
7265 }
7266
7267 let every_footprint_was_missing_tail = missing_tail_positions.len() == footprints.len();
7268 if !missing_tail_positions.is_empty() {
7269 self.fill_missing_lexical_rebuild_footprint_tails(
7270 &mut footprints,
7271 &missing_tail_positions,
7272 )?;
7273 }
7274 if !every_footprint_was_missing_tail {
7275 self.raise_lexical_rebuild_footprints_to_exact_message_counts(&mut footprints)?;
7276 }
7277
7278 Ok(footprints)
7279 }
7280
7281 pub fn lexical_rebuild_has_tail_footprint_metadata(&self) -> Result<bool> {
7282 let total_conversations: i64 = self
7283 .conn
7284 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
7285 row.get_typed(0)
7286 })
7287 .with_context(|| "counting conversations for lexical rebuild tail metadata coverage")?;
7288 let total_conversations = usize::try_from(total_conversations.max(0)).unwrap_or(usize::MAX);
7289 if total_conversations == 0 {
7290 return Ok(true);
7291 }
7292
7293 let conversation_columns = franken_table_column_names(&self.conn, "conversations")?;
7294 let conversations_have_tail_column = conversation_columns.contains("last_message_idx");
7295 let tail_state_has_tail_column =
7296 match franken_table_column_names(&self.conn, "conversation_tail_state") {
7297 Ok(columns) => columns.contains("last_message_idx"),
7298 Err(err) if error_indicates_missing_table(&err) => false,
7299 Err(err) => {
7300 return Err(err)
7301 .with_context(|| "reading lexical rebuild tail-state metadata columns");
7302 }
7303 };
7304 if !conversations_have_tail_column && !tail_state_has_tail_column {
7305 return Ok(false);
7306 }
7307
7308 let covered_sql = match (conversations_have_tail_column, tail_state_has_tail_column) {
7309 (true, true) => {
7310 "SELECT COUNT(*)
7311 FROM conversations c
7312 LEFT JOIN conversation_tail_state ts ON ts.conversation_id = c.id
7313 WHERE c.last_message_idx IS NOT NULL
7314 OR ts.last_message_idx IS NOT NULL"
7315 }
7316 (true, false) => {
7317 "SELECT COUNT(*)
7318 FROM conversations
7319 WHERE last_message_idx IS NOT NULL"
7320 }
7321 (false, true) => {
7322 "SELECT COUNT(*)
7323 FROM conversations c
7324 WHERE EXISTS (
7325 SELECT 1
7326 FROM conversation_tail_state ts
7327 WHERE ts.conversation_id = c.id
7328 AND ts.last_message_idx IS NOT NULL
7329 )"
7330 }
7331 (false, false) => unreachable!("checked before covered_sql selection"),
7332 };
7333 let covered_conversations: i64 = self
7334 .conn
7335 .query_row_map(covered_sql, fparams![], |row| row.get_typed(0))
7336 .with_context(
7337 || "counting conversations covered by lexical rebuild tail footprint metadata",
7338 )?;
7339 let covered_conversations =
7340 usize::try_from(covered_conversations.max(0)).unwrap_or(usize::MAX);
7341
7342 Ok(lexical_rebuild_tail_metadata_coverage_is_sufficient(
7343 total_conversations,
7344 covered_conversations,
7345 ))
7346 }
7347
7348 fn raise_lexical_rebuild_footprints_to_exact_message_counts(
7349 &self,
7350 footprints: &mut [LexicalRebuildConversationFootprintRow],
7351 ) -> Result<()> {
7352 if footprints.is_empty() {
7353 return Ok(());
7354 }
7355
7356 let positions_by_conversation: HashMap<i64, usize> = footprints
7357 .iter()
7358 .enumerate()
7359 .map(|(position, footprint)| (footprint.conversation_id, position))
7360 .collect();
7361 self.conn
7362 .query_with_params_for_each(
7363 "SELECT conversation_id, COUNT(*) AS message_count
7364 FROM messages
7365 GROUP BY conversation_id
7366 ORDER BY conversation_id ASC",
7367 &[] as &[SqliteValue],
7368 |row| {
7369 let conversation_id: i64 = row.get_typed(0)?;
7370 let exact_count: i64 = row.get_typed(1)?;
7371 let Some(position) = positions_by_conversation.get(&conversation_id) else {
7372 return Ok(());
7373 };
7374 let exact_count = usize::try_from(exact_count.max(0)).unwrap_or(usize::MAX);
7375 let footprint = &mut footprints[*position];
7376 if exact_count > footprint.message_count {
7377 footprint.message_count = exact_count;
7378 footprint.message_bytes =
7379 footprint.message_bytes.max(exact_count.saturating_mul(
7380 LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
7381 ));
7382 }
7383 Ok(())
7384 },
7385 )
7386 .with_context(|| "raising lexical rebuild footprints to exact message counts")?;
7387 Ok(())
7388 }
7389
7390 fn fill_missing_lexical_rebuild_footprint_tails(
7391 &self,
7392 footprints: &mut [LexicalRebuildConversationFootprintRow],
7393 missing_tail_positions: &HashMap<i64, usize>,
7394 ) -> Result<()> {
7395 if missing_tail_positions.len() <= LEXICAL_REBUILD_FOOTPRINT_POINT_TAIL_FALLBACK_LIMIT {
7396 for (conversation_id, position) in missing_tail_positions {
7397 let last_message_idx: Option<i64> = self
7398 .conn
7399 .query_row_map(
7400 "SELECT MAX(idx) FROM messages WHERE conversation_id = ?1",
7401 fparams![*conversation_id],
7402 |row| row.get_typed(0),
7403 )
7404 .with_context(|| {
7405 format!(
7406 "looking up missing lexical rebuild tail estimate for conversation {conversation_id}"
7407 )
7408 })?;
7409 if let Some(message_count) =
7410 lexical_rebuild_message_count_from_tail_idx(last_message_idx)
7411 {
7412 footprints[*position] = lexical_rebuild_conversation_footprint_from_count(
7413 *conversation_id,
7414 message_count,
7415 );
7416 }
7417 }
7418 return Ok(());
7419 }
7420
7421 self.fill_missing_lexical_rebuild_footprint_tails_from_grouped_messages(
7422 footprints,
7423 missing_tail_positions,
7424 "SELECT conversation_id, MAX(idx) AS last_message_idx
7425 FROM messages INDEXED BY idx_messages_conv_idx
7426 GROUP BY conversation_id
7427 ORDER BY conversation_id ASC",
7428 )
7429 .or_else(|err| {
7430 if err
7431 .to_string()
7432 .contains("no such index: idx_messages_conv_idx")
7433 {
7434 return self.fill_missing_lexical_rebuild_footprint_tails_from_grouped_messages(
7435 footprints,
7436 missing_tail_positions,
7437 "SELECT conversation_id, MAX(idx) AS last_message_idx
7438 FROM messages
7439 GROUP BY conversation_id
7440 ORDER BY conversation_id ASC",
7441 );
7442 }
7443 Err(err)
7444 })
7445 .with_context(|| "grouping missing lexical rebuild tail estimates from messages")?;
7446
7447 Ok(())
7448 }
7449
7450 fn fill_missing_lexical_rebuild_footprint_tails_from_grouped_messages(
7451 &self,
7452 footprints: &mut [LexicalRebuildConversationFootprintRow],
7453 missing_tail_positions: &HashMap<i64, usize>,
7454 sql: &str,
7455 ) -> Result<()> {
7456 self.conn
7457 .query_with_params_for_each(sql, &[] as &[SqliteValue], |row| {
7458 let conversation_id: i64 = row.get_typed(0)?;
7459 let last_message_idx: Option<i64> = row.get_typed(1)?;
7460 let Some(position) = missing_tail_positions.get(&conversation_id) else {
7461 return Ok(());
7462 };
7463 if let Some(message_count) =
7464 lexical_rebuild_message_count_from_tail_idx(last_message_idx)
7465 {
7466 footprints[*position] = lexical_rebuild_conversation_footprint_from_count(
7467 conversation_id,
7468 message_count,
7469 );
7470 }
7471 Ok(())
7472 })
7473 .with_context(|| "grouping lexical rebuild missing tail estimates")
7474 }
7475
7476 pub fn list_conversation_ids_for_lexical_rebuild(&self) -> Result<Vec<i64>> {
7478 self.conn
7479 .query_map_collect(
7480 "SELECT id FROM conversations ORDER BY id ASC",
7481 fparams![],
7482 |row| row.get_typed(0),
7483 )
7484 .with_context(|| "listing conversation ids for lexical rebuild")
7485 }
7486 pub fn list_conversations_for_lexical_rebuild_by_offset(
7491 &self,
7492 limit: i64,
7493 offset: i64,
7494 agent_slugs: &HashMap<i64, String>,
7495 workspace_paths: &HashMap<i64, PathBuf>,
7496 ) -> Result<Vec<LexicalRebuildConversationRow>> {
7497 self.conn
7500 .query_map_collect(
7501 r"SELECT id, agent_id, workspace_id, external_id, title, source_path,
7502 started_at,
7503 COALESCE(
7504 (SELECT ts.ended_at
7505 FROM conversation_tail_state ts
7506 WHERE ts.conversation_id = conversations.id),
7507 ended_at
7508 ),
7509 source_id, origin_host
7510 FROM conversations
7511 ORDER BY id ASC
7512 LIMIT ?1 OFFSET ?2",
7513 fparams![limit, offset],
7514 |row| {
7515 let agent_id: Option<i64> = row.get_typed(1)?;
7516 let workspace_id: Option<i64> = row.get_typed(2)?;
7517 let source_path: String = row.get_typed(5)?;
7518 let raw_source_id: Option<String> = row.get_typed(8)?;
7519 let raw_origin_host: Option<String> = row.get_typed(9)?;
7520 let (source_id, _, origin_host) = normalized_storage_source_parts(
7521 raw_source_id.as_deref(),
7522 None,
7523 raw_origin_host.as_deref(),
7524 );
7525 Ok(LexicalRebuildConversationRow {
7526 id: Some(row.get_typed(0)?),
7527 agent_slug: agent_id
7528 .and_then(|aid| agent_slugs.get(&aid).cloned())
7529 .unwrap_or_else(|| "unknown".to_string()),
7530 workspace: workspace_id.and_then(|wid| workspace_paths.get(&wid).cloned()),
7531 external_id: row.get_typed(3)?,
7532 title: row.get_typed(4)?,
7533 source_path: Path::new(&source_path).to_path_buf(),
7534 started_at: row.get_typed(6)?,
7535 ended_at: row.get_typed(7)?,
7536 source_id,
7537 origin_host,
7538 })
7539 },
7540 )
7541 .with_context(|| "listing conversations for lexical rebuild")
7542 }
7543
7544 pub fn list_conversations_for_lexical_rebuild_after_id(
7549 &self,
7550 limit: i64,
7551 after_conversation_id: i64,
7552 agent_slugs: &HashMap<i64, String>,
7553 workspace_paths: &HashMap<i64, PathBuf>,
7554 ) -> Result<Vec<LexicalRebuildConversationRow>> {
7555 self.conn
7556 .query_map_collect(
7557 r"SELECT id, agent_id, workspace_id, external_id, title, source_path,
7558 started_at,
7559 COALESCE(
7560 (SELECT ts.ended_at
7561 FROM conversation_tail_state ts
7562 WHERE ts.conversation_id = conversations.id),
7563 ended_at
7564 ),
7565 source_id, origin_host
7566 FROM conversations
7567 WHERE id > ?2
7568 ORDER BY id ASC
7569 LIMIT ?1",
7570 fparams![limit, after_conversation_id],
7571 |row| {
7572 let agent_id: Option<i64> = row.get_typed(1)?;
7573 let workspace_id: Option<i64> = row.get_typed(2)?;
7574 let source_path: String = row.get_typed(5)?;
7575 let raw_source_id: Option<String> = row.get_typed(8)?;
7576 let raw_origin_host: Option<String> = row.get_typed(9)?;
7577 let (source_id, _, origin_host) = normalized_storage_source_parts(
7578 raw_source_id.as_deref(),
7579 None,
7580 raw_origin_host.as_deref(),
7581 );
7582 Ok(LexicalRebuildConversationRow {
7583 id: Some(row.get_typed(0)?),
7584 agent_slug: agent_id
7585 .and_then(|aid| agent_slugs.get(&aid).cloned())
7586 .unwrap_or_else(|| "unknown".to_string()),
7587 workspace: workspace_id.and_then(|wid| workspace_paths.get(&wid).cloned()),
7588 external_id: row.get_typed(3)?,
7589 title: row.get_typed(4)?,
7590 source_path: Path::new(&source_path).to_path_buf(),
7591 started_at: row.get_typed(6)?,
7592 ended_at: row.get_typed(7)?,
7593 source_id,
7594 origin_host,
7595 })
7596 },
7597 )
7598 .with_context(|| {
7599 format!(
7600 "listing conversations for lexical rebuild after id {after_conversation_id}"
7601 )
7602 })
7603 }
7604
7605 pub fn list_conversations_for_lexical_rebuild_after_id_through_id(
7611 &self,
7612 limit: i64,
7613 after_conversation_id: i64,
7614 through_conversation_id: i64,
7615 agent_slugs: &HashMap<i64, String>,
7616 workspace_paths: &HashMap<i64, PathBuf>,
7617 ) -> Result<Vec<LexicalRebuildConversationRow>> {
7618 if through_conversation_id <= after_conversation_id {
7619 return Ok(Vec::new());
7620 }
7621 self.conn
7622 .query_map_collect(
7623 r"SELECT id, agent_id, workspace_id, external_id, title, source_path,
7624 started_at,
7625 COALESCE(
7626 (SELECT ts.ended_at
7627 FROM conversation_tail_state ts
7628 WHERE ts.conversation_id = conversations.id),
7629 ended_at
7630 ),
7631 source_id, origin_host
7632 FROM conversations
7633 WHERE id > ?2 AND id <= ?3
7634 ORDER BY id ASC
7635 LIMIT ?1",
7636 fparams![limit, after_conversation_id, through_conversation_id],
7637 |row| {
7638 let agent_id: Option<i64> = row.get_typed(1)?;
7639 let workspace_id: Option<i64> = row.get_typed(2)?;
7640 let source_path: String = row.get_typed(5)?;
7641 let raw_source_id: Option<String> = row.get_typed(8)?;
7642 let raw_origin_host: Option<String> = row.get_typed(9)?;
7643 let (source_id, _, origin_host) = normalized_storage_source_parts(
7644 raw_source_id.as_deref(),
7645 None,
7646 raw_origin_host.as_deref(),
7647 );
7648 Ok(LexicalRebuildConversationRow {
7649 id: Some(row.get_typed(0)?),
7650 agent_slug: agent_id
7651 .and_then(|aid| agent_slugs.get(&aid).cloned())
7652 .unwrap_or_else(|| "unknown".to_string()),
7653 workspace: workspace_id.and_then(|wid| workspace_paths.get(&wid).cloned()),
7654 external_id: row.get_typed(3)?,
7655 title: row.get_typed(4)?,
7656 source_path: Path::new(&source_path).to_path_buf(),
7657 started_at: row.get_typed(6)?,
7658 ended_at: row.get_typed(7)?,
7659 source_id,
7660 origin_host,
7661 })
7662 },
7663 )
7664 .with_context(|| {
7665 format!(
7666 "listing conversations for lexical rebuild after id {after_conversation_id} through id {through_conversation_id}"
7667 )
7668 })
7669 }
7670
7671 pub fn fetch_messages(&self, conversation_id: i64) -> Result<Vec<Message>> {
7673 let hinted_sql = "SELECT id, idx, role, author, created_at, content, extra_json, extra_bin \
7674 FROM messages INDEXED BY sqlite_autoindex_messages_1 \
7675 WHERE conversation_id = ?1 ORDER BY idx";
7676 let fallback_sql = "SELECT id, idx, role, author, created_at, content, extra_json, extra_bin \
7677 FROM messages \
7678 WHERE conversation_id = ?1 ORDER BY idx";
7679
7680 self.conn
7681 .query_map_collect(hinted_sql, fparams![conversation_id], |row| {
7682 let role: String = row.get_typed(2)?;
7683 Ok(Message {
7684 id: Some(row.get_typed(0)?),
7685 idx: row.get_typed(1)?,
7686 role: match role.as_str() {
7687 "user" => MessageRole::User,
7688 "agent" | "assistant" => MessageRole::Agent,
7689 "tool" => MessageRole::Tool,
7690 "system" => MessageRole::System,
7691 other => MessageRole::Other(other.to_string()),
7692 },
7693 author: row.get_typed(3)?,
7694 created_at: row.get_typed(4)?,
7695 content: row.get_typed(5)?,
7696 extra_json: franken_read_message_extra_compat(row, 6, 7),
7697 snippets: Vec::new(),
7698 })
7699 })
7700 .or_else(|err| {
7701 if err
7702 .to_string()
7703 .contains("no such index: sqlite_autoindex_messages_1")
7704 {
7705 return self.conn.query_map_collect(
7706 fallback_sql,
7707 fparams![conversation_id],
7708 |row| {
7709 let role: String = row.get_typed(2)?;
7710 Ok(Message {
7711 id: Some(row.get_typed(0)?),
7712 idx: row.get_typed(1)?,
7713 role: match role.as_str() {
7714 "user" => MessageRole::User,
7715 "agent" | "assistant" => MessageRole::Agent,
7716 "tool" => MessageRole::Tool,
7717 "system" => MessageRole::System,
7718 other => MessageRole::Other(other.to_string()),
7719 },
7720 author: row.get_typed(3)?,
7721 created_at: row.get_typed(4)?,
7722 content: row.get_typed(5)?,
7723 extra_json: franken_read_message_extra_compat(row, 6, 7),
7724 snippets: Vec::new(),
7725 })
7726 },
7727 );
7728 }
7729 Err(err)
7730 })
7731 .with_context(|| format!("fetching messages for conversation {conversation_id}"))
7732 }
7733
7734 pub fn fetch_messages_for_lexical_rebuild(&self, conversation_id: i64) -> Result<Vec<Message>> {
7740 let hinted_sql = "SELECT id, idx, role, author, created_at, content \
7741 FROM messages INDEXED BY sqlite_autoindex_messages_1 \
7742 WHERE conversation_id = ?1 ORDER BY idx";
7743 let fallback_sql = "SELECT id, idx, role, author, created_at, content \
7744 FROM messages \
7745 WHERE conversation_id = ?1 ORDER BY idx";
7746
7747 self.conn
7748 .query_map_collect(hinted_sql, fparams![conversation_id], |row| {
7749 let role: String = row.get_typed(2)?;
7750 Ok(Message {
7751 id: Some(row.get_typed(0)?),
7752 idx: row.get_typed(1)?,
7753 role: match role.as_str() {
7754 "user" => MessageRole::User,
7755 "agent" | "assistant" => MessageRole::Agent,
7756 "tool" => MessageRole::Tool,
7757 "system" => MessageRole::System,
7758 other => MessageRole::Other(other.to_string()),
7759 },
7760 author: row.get_typed(3)?,
7761 created_at: row.get_typed(4)?,
7762 content: row.get_typed(5)?,
7763 extra_json: serde_json::Value::Null,
7764 snippets: Vec::new(),
7765 })
7766 })
7767 .or_else(|err| {
7768 if err
7769 .to_string()
7770 .contains("no such index: sqlite_autoindex_messages_1")
7771 {
7772 return self.conn.query_map_collect(
7773 fallback_sql,
7774 fparams![conversation_id],
7775 |row| {
7776 let role: String = row.get_typed(2)?;
7777 Ok(Message {
7778 id: Some(row.get_typed(0)?),
7779 idx: row.get_typed(1)?,
7780 role: match role.as_str() {
7781 "user" => MessageRole::User,
7782 "agent" | "assistant" => MessageRole::Agent,
7783 "tool" => MessageRole::Tool,
7784 "system" => MessageRole::System,
7785 other => MessageRole::Other(other.to_string()),
7786 },
7787 author: row.get_typed(3)?,
7788 created_at: row.get_typed(4)?,
7789 content: row.get_typed(5)?,
7790 extra_json: serde_json::Value::Null,
7791 snippets: Vec::new(),
7792 })
7793 },
7794 );
7795 }
7796 Err(err)
7797 })
7798 .with_context(|| {
7799 format!("fetching messages for lexical rebuild of conversation {conversation_id}")
7800 })
7801 }
7802
7803 pub fn fetch_messages_for_lexical_rebuild_batch(
7808 &self,
7809 conversation_ids: &[i64],
7810 max_messages: Option<usize>,
7811 max_content_bytes: Option<usize>,
7812 ) -> Result<HashMap<i64, Vec<Message>>> {
7813 if conversation_ids.is_empty() {
7814 return Ok(HashMap::new());
7815 }
7816
7817 let mut grouped: HashMap<i64, Vec<Message>> =
7818 HashMap::with_capacity(conversation_ids.len());
7819 let mut fetched_conversation_ids = HashSet::with_capacity(conversation_ids.len());
7820 let mut total_messages = 0usize;
7821 let mut total_content_bytes = 0usize;
7822
7823 for conversation_id in conversation_ids {
7828 if !fetched_conversation_ids.insert(*conversation_id) {
7829 continue;
7830 }
7831
7832 let messages = self
7833 .fetch_messages_for_lexical_rebuild(*conversation_id)
7834 .with_context(|| {
7835 format!("fetching lexical rebuild messages for conversation {conversation_id}")
7836 })?;
7837 total_messages = total_messages.saturating_add(messages.len());
7838 if let Some(limit) = max_messages
7839 && total_messages > limit
7840 {
7841 return Err(anyhow!(
7842 "lexical rebuild batch fetch exceeded message guardrail: messages={total_messages} limit={limit} conversations={}",
7843 conversation_ids.len()
7844 ));
7845 }
7846
7847 let message_bytes = messages
7848 .iter()
7849 .map(|message| message.content.len())
7850 .sum::<usize>();
7851 total_content_bytes = total_content_bytes.saturating_add(message_bytes);
7852 if let Some(limit) = max_content_bytes
7853 && total_content_bytes > limit
7854 {
7855 return Err(anyhow!(
7856 "lexical rebuild batch fetch exceeded content-byte guardrail: bytes={total_content_bytes} limit={limit} conversations={}",
7857 conversation_ids.len()
7858 ));
7859 }
7860
7861 if !messages.is_empty() {
7862 grouped.insert(*conversation_id, messages);
7863 }
7864 }
7865
7866 Ok(grouped)
7867 }
7868
7869 pub fn stream_messages_for_lexical_rebuild_between_conversation_ids<F>(
7872 &self,
7873 start_conversation_id: i64,
7874 end_conversation_id: i64,
7875 mut f: F,
7876 ) -> Result<()>
7877 where
7878 F: FnMut(LexicalRebuildMessageRow) -> Result<()>,
7879 {
7880 if end_conversation_id < start_conversation_id {
7881 return Ok(());
7882 }
7883
7884 let conversation_ids: Vec<i64> = self
7885 .conn
7886 .query_map_collect(
7887 "SELECT id FROM conversations WHERE id >= ?1 AND id <= ?2 ORDER BY id ASC",
7888 fparams![start_conversation_id, end_conversation_id],
7889 |row| row.get_typed(0),
7890 )
7891 .with_context(|| "listing conversation ids for streamed lexical rebuild")?;
7892
7893 for conversation_id in conversation_ids {
7894 let messages = self
7895 .fetch_messages_for_lexical_rebuild(conversation_id)
7896 .with_context(|| {
7897 format!("streaming lexical rebuild messages for conversation {conversation_id}")
7898 })?;
7899
7900 for message in messages {
7901 let message_id = message.id.ok_or_else(|| {
7902 anyhow!(
7903 "lexical rebuild message missing id for conversation {conversation_id} idx {}",
7904 message.idx
7905 )
7906 })?;
7907 f(LexicalRebuildMessageRow {
7908 conversation_id,
7909 id: message_id,
7910 idx: message.idx,
7911 role: role_str(&message.role),
7912 author: message.author,
7913 created_at: message.created_at,
7914 content: message.content,
7915 })?;
7916 }
7917 }
7918
7919 Ok(())
7920 }
7921
7922 pub fn stream_grouped_messages_for_lexical_rebuild_between_conversation_ids<F>(
7926 &self,
7927 start_conversation_id: i64,
7928 end_conversation_id: i64,
7929 mut f: F,
7930 ) -> Result<()>
7931 where
7932 F: FnMut(i64, LexicalRebuildGroupedMessageRows, i64) -> Result<()>,
7933 {
7934 if end_conversation_id < start_conversation_id {
7935 return Ok(());
7936 }
7937
7938 let mut current_conversation_id: Option<i64> = None;
7939 let mut current_messages: LexicalRebuildGroupedMessageRows = SmallVec::new();
7940 let mut current_last_message_id = 0i64;
7941 let mut flush_current = |current_conversation_id: &mut Option<i64>,
7942 current_messages: &mut LexicalRebuildGroupedMessageRows,
7943 current_last_message_id: &mut i64|
7944 -> Result<()> {
7945 let Some(conversation_id) = current_conversation_id.take() else {
7946 return Ok(());
7947 };
7948 let messages = std::mem::take(current_messages);
7949 let last_message_id = std::mem::take(current_last_message_id);
7950 f(conversation_id, messages, last_message_id)
7951 };
7952
7953 self.stream_messages_for_lexical_rebuild_between_conversation_ids(
7954 start_conversation_id,
7955 end_conversation_id,
7956 |row| {
7957 if current_conversation_id != Some(row.conversation_id) {
7958 flush_current(
7959 &mut current_conversation_id,
7960 &mut current_messages,
7961 &mut current_last_message_id,
7962 )?;
7963 current_conversation_id = Some(row.conversation_id);
7964 }
7965 current_last_message_id = row.id;
7966 current_messages.push(LexicalRebuildGroupedMessageRow {
7967 idx: row.idx,
7968 is_tool_role: row.role == "tool",
7969 created_at: row.created_at,
7970 content: row.content,
7971 });
7972 Ok(())
7973 },
7974 )
7975 .with_context(|| "streaming grouped lexical rebuild messages")?;
7976
7977 flush_current(
7978 &mut current_conversation_id,
7979 &mut current_messages,
7980 &mut current_last_message_id,
7981 )
7982 .with_context(|| "flushing grouped lexical rebuild messages")
7983 }
7984
7985 pub fn stream_grouped_messages_for_lexical_rebuild_from_conversation_id<F>(
7988 &self,
7989 start_conversation_id: i64,
7990 f: F,
7991 ) -> Result<()>
7992 where
7993 F: FnMut(i64, LexicalRebuildGroupedMessageRows, i64) -> Result<()>,
7994 {
7995 self.stream_grouped_messages_for_lexical_rebuild_between_conversation_ids(
7996 start_conversation_id,
7997 i64::MAX,
7998 f,
7999 )
8000 }
8001
8002 pub fn stream_messages_for_lexical_rebuild_from_conversation_id<F>(
8005 &self,
8006 start_conversation_id: i64,
8007 f: F,
8008 ) -> Result<()>
8009 where
8010 F: FnMut(LexicalRebuildMessageRow) -> Result<()>,
8011 {
8012 self.stream_messages_for_lexical_rebuild_between_conversation_ids(
8013 start_conversation_id,
8014 i64::MAX,
8015 f,
8016 )
8017 }
8018
8019 pub fn get_source(&self, id: &str) -> Result<Option<Source>> {
8021 let result = self.conn.query_row_map(
8022 "SELECT id, kind, host_label, machine_id, platform, config_json, created_at, updated_at FROM sources WHERE id = ?1",
8023 fparams![id],
8024 |row| {
8025 let kind_str: String = row.get_typed(1)?;
8026 let config_json_str: Option<String> = row.get_typed(5)?;
8027 Ok(Source {
8028 id: row.get_typed(0)?,
8029 kind: SourceKind::parse(&kind_str).unwrap_or_default(),
8030 host_label: row.get_typed(2)?,
8031 machine_id: row.get_typed(3)?,
8032 platform: row.get_typed(4)?,
8033 config_json: config_json_str.and_then(|s| serde_json::from_str(&s).ok()),
8034 created_at: row.get_typed(6)?,
8035 updated_at: row.get_typed(7)?,
8036 })
8037 },
8038 );
8039 Ok(result.optional()?)
8040 }
8041
8042 pub fn list_sources(&self) -> Result<Vec<Source>> {
8044 self.conn
8045 .query_map_collect(
8046 "SELECT id, kind, host_label, machine_id, platform, config_json, created_at, updated_at FROM sources ORDER BY id",
8047 fparams![],
8048 |row| {
8049 let kind_str: String = row.get_typed(1)?;
8050 let config_json_str: Option<String> = row.get_typed(5)?;
8051 Ok(Source {
8052 id: row.get_typed(0)?,
8053 kind: SourceKind::parse(&kind_str).unwrap_or_default(),
8054 host_label: row.get_typed(2)?,
8055 machine_id: row.get_typed(3)?,
8056 platform: row.get_typed(4)?,
8057 config_json: config_json_str.and_then(|s| serde_json::from_str(&s).ok()),
8058 created_at: row.get_typed(6)?,
8059 updated_at: row.get_typed(7)?,
8060 })
8061 },
8062 )
8063 .with_context(|| "listing sources")
8064 }
8065
8066 pub fn get_source_ids(&self) -> Result<Vec<String>> {
8068 self.conn
8069 .query_map_collect(
8070 "SELECT id FROM sources WHERE id != 'local' ORDER BY id",
8071 fparams![],
8072 |row| row.get_typed(0),
8073 )
8074 .with_context(|| "listing source ids")
8075 }
8076
8077 pub fn upsert_source(&self, source: &Source) -> Result<()> {
8079 self.invalidate_conversation_source_cache(source.id.as_str());
8080 let now = Self::now_millis();
8081 let kind_str = source.kind.to_string();
8082 let config_json_str = source
8083 .config_json
8084 .as_ref()
8085 .map(serde_json::to_string)
8086 .transpose()?;
8087
8088 self.conn.execute_compat(
8092 "INSERT INTO sources(id, kind, host_label, machine_id, platform, config_json, created_at, updated_at)
8093 VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)
8094 ON CONFLICT(id) DO UPDATE SET
8095 kind = excluded.kind,
8096 host_label = excluded.host_label,
8097 machine_id = excluded.machine_id,
8098 platform = excluded.platform,
8099 config_json = excluded.config_json,
8100 updated_at = excluded.updated_at
8101 WHERE NOT (
8102 sources.kind IS excluded.kind
8103 AND sources.host_label IS excluded.host_label
8104 AND sources.machine_id IS excluded.machine_id
8105 AND sources.platform IS excluded.platform
8106 AND sources.config_json IS excluded.config_json
8107 )",
8108 fparams![
8109 source.id.as_str(),
8110 kind_str.as_str(),
8111 source.host_label.as_deref(),
8112 source.machine_id.as_deref(),
8113 source.platform.as_deref(),
8114 config_json_str.as_deref(),
8115 source.created_at.unwrap_or(now),
8116 now
8117 ],
8118 )?;
8119 Ok(())
8120 }
8121
8122 fn historical_bundle_key_hash(
8123 version: u32,
8124 bundle: &HistoricalDatabaseBundle,
8125 include_bundle_stats: bool,
8126 ) -> String {
8127 let signature = if include_bundle_stats {
8128 format!(
8129 "{}:{}:{}:{}",
8130 version,
8131 bundle.root_path.display(),
8132 bundle.total_bytes,
8133 bundle.modified_at_ms
8134 )
8135 } else {
8136 format!("{}:{}", version, bundle.root_path.display())
8137 };
8138 blake3::hash(signature.as_bytes()).to_hex().to_string()
8139 }
8140
8141 fn historical_bundle_meta_key(bundle: &HistoricalDatabaseBundle) -> String {
8142 format!(
8143 "historical_bundle_salvaged:{}",
8144 Self::historical_bundle_key_hash(HISTORICAL_SALVAGE_LEDGER_VERSION, bundle, false)
8145 )
8146 }
8147
8148 fn historical_bundle_legacy_meta_key(bundle: &HistoricalDatabaseBundle) -> String {
8149 let signature = format!(
8150 "{}:{}:{}:{}",
8151 HISTORICAL_SALVAGE_LEDGER_VERSION,
8152 bundle.root_path.display(),
8153 bundle.total_bytes,
8154 bundle.modified_at_ms
8155 );
8156 format!(
8157 "historical_bundle_salvaged:{}",
8158 blake3::hash(signature.as_bytes()).to_hex()
8159 )
8160 }
8161
8162 fn historical_bundle_progress_key(bundle: &HistoricalDatabaseBundle) -> String {
8163 format!(
8164 "historical_bundle_progress:{}",
8165 Self::historical_bundle_key_hash(HISTORICAL_SALVAGE_PROGRESS_VERSION, bundle, false)
8166 )
8167 }
8168
8169 fn historical_bundle_legacy_progress_key(bundle: &HistoricalDatabaseBundle) -> String {
8170 let signature = format!(
8171 "{}:{}:{}:{}",
8172 HISTORICAL_SALVAGE_PROGRESS_VERSION,
8173 bundle.root_path.display(),
8174 bundle.total_bytes,
8175 bundle.modified_at_ms
8176 );
8177 format!(
8178 "historical_bundle_progress:{}",
8179 blake3::hash(signature.as_bytes()).to_hex()
8180 )
8181 }
8182
8183 fn historical_bundle_already_imported(
8184 &self,
8185 bundle: &HistoricalDatabaseBundle,
8186 ) -> Result<bool> {
8187 for key in [
8188 Self::historical_bundle_meta_key(bundle),
8189 Self::historical_bundle_legacy_meta_key(bundle),
8190 ] {
8191 let existing: Option<String> = self
8192 .conn
8193 .query_row_map(
8194 "SELECT value FROM meta WHERE key = ?1",
8195 fparams![key.as_str()],
8196 |row| row.get_typed(0),
8197 )
8198 .optional()?;
8199 if existing.is_some() {
8200 return Ok(true);
8201 }
8202 }
8203 Ok(false)
8204 }
8205
8206 pub(crate) fn has_pending_historical_bundles(&self, canonical_db_path: &Path) -> Result<bool> {
8207 for bundle in discover_historical_database_bundles(canonical_db_path) {
8208 if !self.historical_bundle_already_imported(&bundle)? {
8209 return Ok(true);
8210 }
8211 }
8212 Ok(false)
8213 }
8214
8215 fn load_historical_bundle_progress(
8216 &self,
8217 bundle: &HistoricalDatabaseBundle,
8218 ) -> Result<Option<HistoricalBundleProgress>> {
8219 for key in [
8220 Self::historical_bundle_progress_key(bundle),
8221 Self::historical_bundle_legacy_progress_key(bundle),
8222 ] {
8223 let raw: Option<String> = self
8224 .conn
8225 .query_row_map(
8226 "SELECT value FROM meta WHERE key = ?1",
8227 fparams![key.as_str()],
8228 |row| row.get_typed(0),
8229 )
8230 .optional()?;
8231 let Some(raw) = raw else {
8232 continue;
8233 };
8234 let parsed: HistoricalBundleProgress =
8235 serde_json::from_str(&raw).with_context(|| {
8236 format!(
8237 "parsing historical salvage progress checkpoint for {}",
8238 bundle.root_path.display()
8239 )
8240 })?;
8241 if parsed.progress_version == HISTORICAL_SALVAGE_PROGRESS_VERSION {
8242 return Ok(Some(parsed));
8243 }
8244 }
8245 Ok(None)
8246 }
8247
8248 fn record_historical_bundle_progress(
8249 &self,
8250 bundle: &HistoricalDatabaseBundle,
8251 method: &str,
8252 last_completed_source_row_id: i64,
8253 conversations_imported: usize,
8254 messages_imported: usize,
8255 ) -> Result<()> {
8256 let key = Self::historical_bundle_progress_key(bundle);
8257 let value = HistoricalBundleProgress {
8258 progress_version: HISTORICAL_SALVAGE_PROGRESS_VERSION,
8259 path: bundle.root_path.display().to_string(),
8260 bytes: bundle.total_bytes,
8261 modified_at_ms: bundle.modified_at_ms,
8262 method: method.to_string(),
8263 last_completed_source_row_id,
8264 conversations_imported,
8265 messages_imported,
8266 updated_at_ms: Self::now_millis(),
8267 };
8268 let value_str = serde_json::to_string(&value)?;
8269 self.conn.execute_compat(
8270 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
8271 fparams![key.as_str(), value_str.as_str()],
8272 )?;
8273 Ok(())
8274 }
8275
8276 fn clear_historical_bundle_progress(&self, bundle: &HistoricalDatabaseBundle) -> Result<()> {
8277 for key in [
8278 Self::historical_bundle_progress_key(bundle),
8279 Self::historical_bundle_legacy_progress_key(bundle),
8280 ] {
8281 self.conn
8282 .execute_compat("DELETE FROM meta WHERE key = ?1", fparams![key.as_str()])?;
8283 }
8284 Ok(())
8285 }
8286
8287 fn record_historical_bundle_import(
8288 &self,
8289 bundle: &HistoricalDatabaseBundle,
8290 method: &str,
8291 conversations_imported: usize,
8292 messages_imported: usize,
8293 ) -> Result<()> {
8294 let key = Self::historical_bundle_meta_key(bundle);
8295 let value = serde_json::json!({
8296 "salvage_version": HISTORICAL_SALVAGE_LEDGER_VERSION,
8297 "path": bundle.root_path.display().to_string(),
8298 "bytes": bundle.total_bytes,
8299 "modified_at_ms": bundle.modified_at_ms,
8300 "method": method,
8301 "conversations_imported": conversations_imported,
8302 "messages_imported": messages_imported,
8303 "recorded_at_ms": Self::now_millis(),
8304 });
8305 let value_str = serde_json::to_string(&value)?;
8306 self.conn.execute_compat(
8307 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
8308 fparams![key.as_str(), value_str.as_str()],
8309 )?;
8310 Ok(())
8311 }
8312
8313 fn historical_import_error_is_split_retryable(err: &anyhow::Error) -> bool {
8314 const RETRYABLE_PATTERNS: &[&str] = &[
8315 "out of memory",
8316 "string or blob too big",
8317 "too many sql variables",
8318 ];
8319 err.chain().any(|cause| {
8320 let rendered = cause.to_string().to_ascii_lowercase();
8321 RETRYABLE_PATTERNS
8322 .iter()
8323 .any(|pattern| rendered.contains(pattern))
8324 })
8325 }
8326
8327 fn split_historical_batch_entry_messages(
8328 entry: &HistoricalBatchEntry,
8329 ) -> Option<(HistoricalBatchEntry, HistoricalBatchEntry)> {
8330 if entry.conversation.messages.len() < 2 {
8331 return None;
8332 }
8333 let split_at = entry.conversation.messages.len() / 2;
8334 if split_at == 0 || split_at >= entry.conversation.messages.len() {
8335 return None;
8336 }
8337
8338 let mut left = entry.clone();
8339 left.conversation.messages = entry.conversation.messages[..split_at].to_vec();
8340
8341 let mut right = entry.clone();
8342 right.conversation.messages = entry.conversation.messages[split_at..].to_vec();
8343
8344 Some((left, right))
8345 }
8346
8347 fn import_historical_batch_with_retry<F>(
8348 entries: &[HistoricalBatchEntry],
8349 insert_batch: &mut F,
8350 ) -> Result<HistoricalBatchImportTotals>
8351 where
8352 F: FnMut(&[HistoricalBatchEntry]) -> Result<HistoricalBatchImportTotals>,
8353 {
8354 match insert_batch(entries) {
8355 Ok(totals) => Ok(totals),
8356 Err(err) if Self::historical_import_error_is_split_retryable(&err) => {
8357 if entries.len() > 1 {
8358 let mid = entries.len() / 2;
8359 tracing::warn!(
8360 batch_entries = entries.len(),
8361 split_left = mid,
8362 split_right = entries.len() - mid,
8363 error = %err,
8364 "historical salvage batch failed; retrying in smaller sub-batches"
8365 );
8366 let left =
8367 Self::import_historical_batch_with_retry(&entries[..mid], insert_batch)?;
8368 let right =
8369 Self::import_historical_batch_with_retry(&entries[mid..], insert_batch)?;
8370 return Ok(HistoricalBatchImportTotals {
8371 inserted_source_rows: left.inserted_source_rows
8372 + right.inserted_source_rows,
8373 inserted_messages: left.inserted_messages + right.inserted_messages,
8374 });
8375 }
8376
8377 if let Some(entry) = entries.first()
8378 && let Some((left, right)) = Self::split_historical_batch_entry_messages(entry)
8379 {
8380 tracing::warn!(
8381 source_row_id = entry.source_row_id,
8382 message_count = entry.conversation.messages.len(),
8383 error = %err,
8384 "historical salvage conversation failed; retrying in smaller message slices"
8385 );
8386 let left_totals = Self::import_historical_batch_with_retry(
8387 std::slice::from_ref(&left),
8388 insert_batch,
8389 )?;
8390 let right_totals = Self::import_historical_batch_with_retry(
8391 std::slice::from_ref(&right),
8392 insert_batch,
8393 )?;
8394 return Ok(HistoricalBatchImportTotals {
8395 inserted_source_rows: usize::from(
8396 left_totals.inserted_source_rows > 0
8397 || right_totals.inserted_source_rows > 0,
8398 ),
8399 inserted_messages: left_totals
8400 .inserted_messages
8401 .saturating_add(right_totals.inserted_messages),
8402 });
8403 }
8404
8405 Err(err)
8406 }
8407 Err(err) => Err(err),
8408 }
8409 }
8410
8411 fn import_historical_sources(&self, source_conn: &FrankenConnection) -> Result<()> {
8412 let sources: Vec<Source> = match source_conn.query_map_collect(
8413 "SELECT id, kind, host_label, machine_id, platform, config_json, created_at, updated_at
8414 FROM sources",
8415 fparams![],
8416 |row| {
8417 let raw_source_id: String = row.get_typed(0)?;
8418 let kind_str: String = row.get_typed(1)?;
8419 let raw_host_label: Option<String> = row.get_typed(2)?;
8420 let config_json_raw: Option<String> = row.get_typed(5)?;
8421 let (source_id, source_kind, host_label) = normalized_storage_source_parts(
8422 Some(raw_source_id.as_str()),
8423 Some(kind_str.as_str()),
8424 raw_host_label.as_deref(),
8425 );
8426 Ok(Source {
8427 id: source_id,
8428 kind: source_kind,
8429 host_label,
8430 machine_id: row.get_typed(3)?,
8431 platform: row.get_typed(4)?,
8432 config_json: config_json_raw.and_then(|raw| serde_json::from_str(&raw).ok()),
8433 created_at: row.get_typed(6)?,
8434 updated_at: row.get_typed(7)?,
8435 })
8436 },
8437 ) {
8438 Ok(rows) => rows,
8439 Err(err) => {
8440 tracing::warn!(error = %err, "historical sources table unavailable; skipping source import");
8441 return Ok(());
8442 }
8443 };
8444
8445 for source in sources {
8446 self.upsert_source(&source)?;
8447 }
8448 Ok(())
8449 }
8450
8451 fn import_historical_conversations(
8452 &self,
8453 bundle: &HistoricalDatabaseBundle,
8454 salvage_method: &str,
8455 source_conn: &FrankenConnection,
8456 ) -> Result<(usize, usize)> {
8457 let batch_limits = historical_import_batch_limits();
8458 let cache_enabled = IndexingCache::is_enabled();
8459 let mut indexing_cache = IndexingCache::new();
8460 let mut known_sources: HashSet<String> = self
8461 .list_sources()?
8462 .into_iter()
8463 .map(|source| source.id)
8464 .collect();
8465 let resume_progress = self.load_historical_bundle_progress(bundle)?;
8466 let resume_after_row_id = resume_progress
8467 .as_ref()
8468 .map(|progress| progress.last_completed_source_row_id)
8469 .filter(|row_id| *row_id > 0);
8470
8471 tracing::info!(
8472 target: "cass::historical_salvage",
8473 batch_conversations = batch_limits.conversations,
8474 batch_messages = batch_limits.messages,
8475 batch_payload_chars = batch_limits.payload_chars,
8476 cache_enabled,
8477 resume_after_row_id,
8478 "configured historical salvage batch limits"
8479 );
8480
8481 if let Some(progress) = &resume_progress {
8482 tracing::info!(
8483 target: "cass::historical_salvage",
8484 path = %bundle.root_path.display(),
8485 resume_after_row_id = progress.last_completed_source_row_id,
8486 prior_conversations_imported = progress.conversations_imported,
8487 prior_messages_imported = progress.messages_imported,
8488 "resuming historical salvage bundle from durable checkpoint"
8489 );
8490 }
8491
8492 let conv_sql = if resume_after_row_id.is_some() {
8498 "SELECT
8499 c.id,
8500 COALESCE(a.slug, 'unknown'),
8501 w.path,
8502 c.external_id,
8503 c.title,
8504 c.source_path,
8505 c.started_at,
8506 c.ended_at,
8507 c.approx_tokens,
8508 c.metadata_json,
8509 c.source_id,
8510 c.origin_host
8511 FROM conversations c
8512 LEFT JOIN agents a ON c.agent_id = a.id
8513 LEFT JOIN workspaces w ON c.workspace_id = w.id
8514 WHERE c.id > ?1
8515 ORDER BY c.id"
8516 } else {
8517 "SELECT
8518 c.id,
8519 COALESCE(a.slug, 'unknown'),
8520 w.path,
8521 c.external_id,
8522 c.title,
8523 c.source_path,
8524 c.started_at,
8525 c.ended_at,
8526 c.approx_tokens,
8527 c.metadata_json,
8528 c.source_id,
8529 c.origin_host
8530 FROM conversations c
8531 LEFT JOIN agents a ON c.agent_id = a.id
8532 LEFT JOIN workspaces w ON c.workspace_id = w.id
8533 ORDER BY c.id"
8534 };
8535 let conv_params: &[ParamValue] =
8536 if let Some(last_completed_source_row_id) = resume_after_row_id {
8537 &[ParamValue::from(last_completed_source_row_id)]
8538 } else {
8539 &[]
8540 };
8541
8542 #[allow(clippy::type_complexity)]
8543 let conv_rows: Vec<(
8544 i64,
8545 String,
8546 Option<String>,
8547 Option<String>,
8548 Option<String>,
8549 String,
8550 Option<i64>,
8551 Option<i64>,
8552 Option<i64>,
8553 Option<String>,
8554 Option<String>,
8555 Option<String>,
8556 )> = source_conn
8557 .query_map_collect(conv_sql, conv_params, |row| {
8558 Ok((
8559 row.get_typed::<i64>(0)?,
8560 row.get_typed::<String>(1)?,
8561 row.get_typed::<Option<String>>(2)?,
8562 row.get_typed::<Option<String>>(3)?,
8563 row.get_typed::<Option<String>>(4)?,
8564 row.get_typed::<String>(5)?,
8565 row.get_typed::<Option<i64>>(6)?,
8566 row.get_typed::<Option<i64>>(7)?,
8567 row.get_typed::<Option<i64>>(8)?,
8568 row.get_typed::<Option<String>>(9)?,
8569 row.get_typed::<Option<String>>(10)?,
8570 row.get_typed::<Option<String>>(11)?,
8571 ))
8572 })
8573 .context("querying historical conversations")?;
8574
8575 let msg_sql = "SELECT idx, role, author, created_at, content, extra_json
8576 FROM messages
8577 WHERE conversation_id = ?1
8578 ORDER BY idx";
8579
8580 let mut imported_conversations = resume_progress
8581 .as_ref()
8582 .map(|progress| progress.conversations_imported)
8583 .unwrap_or(0);
8584 let mut imported_messages = resume_progress
8585 .as_ref()
8586 .map(|progress| progress.messages_imported)
8587 .unwrap_or(0);
8588 let mut pending_batch: Vec<HistoricalBatchEntry> = Vec::new();
8589 let mut pending_batch_messages = 0usize;
8590 let mut pending_batch_chars = 0usize;
8591 let mut pending_batch_first_row_id: Option<i64> = None;
8592 let mut pending_batch_last_row_id: Option<i64> = None;
8593
8594 let flush_batch = |storage: &FrankenStorage,
8595 batch: &mut Vec<HistoricalBatchEntry>,
8596 pending_messages: &mut usize,
8597 pending_chars: &mut usize,
8598 first_row_id: &mut Option<i64>,
8599 last_row_id: &mut Option<i64>,
8600 imported_conversations: &mut usize,
8601 imported_messages: &mut usize|
8602 -> Result<()> {
8603 if batch.is_empty() {
8604 return Ok(());
8605 }
8606
8607 let batch_first_row_id = *first_row_id;
8608 let batch_last_row_id = *last_row_id;
8609 if historical_salvage_debug_enabled() {
8610 eprintln!(
8611 "[historical-salvage] flushing batch rows {:?}..{:?} conversations={} messages={} payload_chars={}",
8612 batch_first_row_id,
8613 batch_last_row_id,
8614 batch.len(),
8615 *pending_messages,
8616 *pending_chars
8617 );
8618 }
8619 tracing::info!(
8620 target: "cass::historical_salvage",
8621 batch_conversations = batch.len(),
8622 batch_messages = *pending_messages,
8623 batch_payload_chars = *pending_chars,
8624 first_source_row_id = batch_first_row_id,
8625 last_source_row_id = batch_last_row_id,
8626 "flushing historical salvage batch"
8627 );
8628
8629 let mut insert_batch =
8630 |entries: &[HistoricalBatchEntry]| -> Result<HistoricalBatchImportTotals> {
8631 let borrowed_batch: Vec<(i64, Option<i64>, &Conversation)> = entries
8632 .iter()
8633 .map(|entry| (entry.agent_id, entry.workspace_id, &entry.conversation))
8634 .collect();
8635 let outcomes = storage
8636 .insert_conversations_batched(&borrowed_batch)
8637 .with_context(|| {
8638 let first_source_row_id =
8639 entries.first().map(|entry| entry.source_row_id);
8640 let last_source_row_id =
8641 entries.last().map(|entry| entry.source_row_id);
8642 format!(
8643 "inserting historical salvage batch source rows {:?}..{:?}",
8644 first_source_row_id, last_source_row_id
8645 )
8646 })?;
8647 let mut totals = HistoricalBatchImportTotals::default();
8648 for outcome in outcomes {
8649 if !outcome.inserted_indices.is_empty() {
8650 totals.inserted_source_rows += 1;
8651 totals.inserted_messages += outcome.inserted_indices.len();
8652 }
8653 }
8654 Ok(totals)
8655 };
8656 let totals =
8657 Self::import_historical_batch_with_retry(batch.as_slice(), &mut insert_batch)?;
8658 *imported_conversations =
8659 (*imported_conversations).saturating_add(totals.inserted_source_rows);
8660 *imported_messages = (*imported_messages).saturating_add(totals.inserted_messages);
8661 if let Some(last_completed_row_id) = batch_last_row_id {
8662 storage.record_historical_bundle_progress(
8663 bundle,
8664 salvage_method,
8665 last_completed_row_id,
8666 *imported_conversations,
8667 *imported_messages,
8668 )?;
8669 }
8670 tracing::info!(
8671 target: "cass::historical_salvage",
8672 batch_conversations = batch.len(),
8673 batch_messages = *pending_messages,
8674 imported_conversations = *imported_conversations,
8675 imported_messages = *imported_messages,
8676 first_source_row_id = batch_first_row_id,
8677 last_source_row_id = batch_last_row_id,
8678 "historical salvage batch committed"
8679 );
8680 if historical_salvage_debug_enabled() {
8681 eprintln!(
8682 "[historical-salvage] committed batch rows {:?}..{:?} imported_conversations={} imported_messages={}",
8683 batch_first_row_id,
8684 batch_last_row_id,
8685 *imported_conversations,
8686 *imported_messages
8687 );
8688 }
8689 batch.clear();
8690 *pending_messages = 0;
8691 *pending_chars = 0;
8692 *first_row_id = None;
8693 *last_row_id = None;
8694 Ok(())
8695 };
8696
8697 for (
8698 conversation_row_id,
8699 agent_slug,
8700 workspace_path,
8701 external_id,
8702 title,
8703 source_path,
8704 started_at,
8705 ended_at,
8706 approx_tokens,
8707 metadata_json_raw,
8708 raw_source_id,
8709 raw_origin_host,
8710 ) in conv_rows
8711 {
8712 let source_id = crate::search::tantivy::normalized_index_source_id(
8713 raw_source_id.as_deref(),
8714 None,
8715 raw_origin_host.as_deref(),
8716 );
8717 let origin_host =
8718 crate::search::tantivy::normalized_index_origin_host(raw_origin_host.as_deref());
8719
8720 let messages: Vec<Message> = source_conn
8721 .query_map_collect(msg_sql, fparams![conversation_row_id], |msg_row| {
8722 let role: String = msg_row.get_typed(1)?;
8723 Ok(Message {
8724 id: None,
8725 idx: msg_row.get_typed(0)?,
8726 role: match role.as_str() {
8727 "user" => MessageRole::User,
8728 "agent" | "assistant" => MessageRole::Agent,
8729 "tool" => MessageRole::Tool,
8730 "system" => MessageRole::System,
8731 other => MessageRole::Other(other.to_string()),
8732 },
8733 author: msg_row.get_typed(2)?,
8734 created_at: msg_row.get_typed(3)?,
8735 content: msg_row.get_typed(4)?,
8736 extra_json: parse_historical_json_column(msg_row.get_typed(5)?),
8737 snippets: Vec::new(),
8738 })
8739 })
8740 .context("collecting historical message rows")?;
8741
8742 if messages.is_empty() {
8743 continue;
8744 }
8745
8746 let conversation_message_count = messages.len();
8747 let conversation_chars = messages
8748 .iter()
8749 .map(message_payload_size_hint)
8750 .sum::<usize>();
8751
8752 let conversation = Conversation {
8753 id: None,
8754 agent_slug: agent_slug.clone(),
8755 workspace: workspace_path.map(PathBuf::from),
8756 external_id,
8757 title,
8758 source_path: PathBuf::from(source_path),
8759 started_at,
8760 ended_at,
8761 approx_tokens,
8762 metadata_json: parse_json_column(metadata_json_raw),
8763 messages,
8764 source_id,
8765 origin_host,
8766 };
8767
8768 if !known_sources.contains(&conversation.source_id) {
8769 let placeholder = if conversation.source_id == LOCAL_SOURCE_ID {
8770 Source::local()
8771 } else {
8772 Source {
8773 id: conversation.source_id.clone(),
8774 kind: SourceKind::Ssh,
8775 host_label: conversation.origin_host.clone(),
8776 machine_id: None,
8777 platform: None,
8778 config_json: None,
8779 created_at: None,
8780 updated_at: None,
8781 }
8782 };
8783 self.upsert_source(&placeholder)?;
8784 known_sources.insert(conversation.source_id.clone());
8785 }
8786
8787 let agent = Agent {
8788 id: None,
8789 slug: agent_slug.clone(),
8790 name: agent_slug,
8791 version: None,
8792 kind: AgentKind::Cli,
8793 };
8794 let agent_id = if cache_enabled {
8795 indexing_cache.get_or_insert_agent(self, &agent)?
8796 } else {
8797 self.ensure_agent(&agent)?
8798 };
8799 let workspace_id = if let Some(workspace) = &conversation.workspace {
8800 if cache_enabled {
8801 Some(indexing_cache.get_or_insert_workspace(self, workspace, None)?)
8802 } else {
8803 Some(self.ensure_workspace(workspace, None)?)
8804 }
8805 } else {
8806 None
8807 };
8808
8809 let exceeds_pending_limits = !pending_batch.is_empty()
8810 && (pending_batch.len() >= batch_limits.conversations
8811 || pending_batch_messages.saturating_add(conversation_message_count)
8812 > batch_limits.messages
8813 || pending_batch_chars.saturating_add(conversation_chars)
8814 > batch_limits.payload_chars);
8815 if exceeds_pending_limits {
8816 flush_batch(
8817 self,
8818 &mut pending_batch,
8819 &mut pending_batch_messages,
8820 &mut pending_batch_chars,
8821 &mut pending_batch_first_row_id,
8822 &mut pending_batch_last_row_id,
8823 &mut imported_conversations,
8824 &mut imported_messages,
8825 )?;
8826 }
8827
8828 if pending_batch_first_row_id.is_none() {
8829 pending_batch_first_row_id = Some(conversation_row_id);
8830 }
8831 pending_batch_last_row_id = Some(conversation_row_id);
8832 pending_batch_messages =
8833 pending_batch_messages.saturating_add(conversation_message_count);
8834 pending_batch_chars = pending_batch_chars.saturating_add(conversation_chars);
8835 pending_batch.push(HistoricalBatchEntry {
8836 source_row_id: conversation_row_id,
8837 agent_id,
8838 workspace_id,
8839 conversation,
8840 });
8841
8842 if pending_batch.len() >= batch_limits.conversations
8843 || pending_batch_messages >= batch_limits.messages
8844 || pending_batch_chars >= batch_limits.payload_chars
8845 {
8846 flush_batch(
8847 self,
8848 &mut pending_batch,
8849 &mut pending_batch_messages,
8850 &mut pending_batch_chars,
8851 &mut pending_batch_first_row_id,
8852 &mut pending_batch_last_row_id,
8853 &mut imported_conversations,
8854 &mut imported_messages,
8855 )?;
8856 }
8857 }
8858
8859 flush_batch(
8860 self,
8861 &mut pending_batch,
8862 &mut pending_batch_messages,
8863 &mut pending_batch_chars,
8864 &mut pending_batch_first_row_id,
8865 &mut pending_batch_last_row_id,
8866 &mut imported_conversations,
8867 &mut imported_messages,
8868 )?;
8869
8870 if cache_enabled {
8871 let (hits, misses, hit_rate) = indexing_cache.stats();
8872 tracing::info!(
8873 target: "cass::historical_salvage",
8874 hits,
8875 misses,
8876 hit_rate = format!("{:.1}%", hit_rate * 100.0),
8877 agents = indexing_cache.agent_count(),
8878 workspaces = indexing_cache.workspace_count(),
8879 sources = known_sources.len(),
8880 "historical salvage cache stats"
8881 );
8882 }
8883
8884 Ok((imported_conversations, imported_messages))
8885 }
8886
8887 pub fn salvage_historical_databases(
8888 &self,
8889 canonical_db_path: &Path,
8890 ) -> Result<HistoricalSalvageOutcome> {
8891 let ordered_bundles = discover_historical_database_bundles(canonical_db_path);
8892 let mut outcome = HistoricalSalvageOutcome {
8893 bundles_considered: ordered_bundles.len(),
8894 ..HistoricalSalvageOutcome::default()
8895 };
8896
8897 for bundle in ordered_bundles {
8898 if self.historical_bundle_already_imported(&bundle)? {
8899 self.clear_historical_bundle_progress(&bundle)?;
8900 continue;
8901 }
8902
8903 let source = match open_historical_bundle_for_salvage(&bundle).with_context(|| {
8904 format!(
8905 "opening historical bundle {} for salvage",
8906 bundle.root_path.display()
8907 )
8908 }) {
8909 Ok(source) => source,
8910 Err(err) => {
8911 tracing::warn!(
8912 path = %bundle.root_path.display(),
8913 error = %err,
8914 "skipping unreadable historical cass database bundle during salvage"
8915 );
8916 self.clear_historical_bundle_progress(&bundle)?;
8917 continue;
8918 }
8919 };
8920
8921 if let Some(progress) = self.load_historical_bundle_progress(&bundle)? {
8929 let backup_max_conversation_id: i64 = source
8930 .conn
8931 .query_row_map(
8932 "SELECT COALESCE(MAX(id), 0) FROM conversations",
8933 fparams![],
8934 |row| row.get_typed(0),
8935 )
8936 .unwrap_or(0);
8937 if backup_max_conversation_id > 0
8938 && progress.last_completed_source_row_id >= backup_max_conversation_id
8939 {
8940 self.record_historical_bundle_import(
8941 &bundle,
8942 source.method,
8943 progress.conversations_imported,
8944 progress.messages_imported,
8945 )?;
8946 self.clear_historical_bundle_progress(&bundle)?;
8947 tracing::info!(
8948 path = %bundle.root_path.display(),
8949 last_completed_source_row_id = progress.last_completed_source_row_id,
8950 backup_max_conversation_id,
8951 conversations_imported = progress.conversations_imported,
8952 messages_imported = progress.messages_imported,
8953 "historical bundle already fully imported per checkpoint; marking salvaged and skipping O(n) re-scan"
8954 );
8955 continue;
8956 }
8957 }
8958
8959 self.import_historical_sources(&source.conn)?;
8960 let (imported_conversations, imported_messages) =
8961 self.import_historical_conversations(&bundle, source.method, &source.conn)?;
8962 self.record_historical_bundle_import(
8963 &bundle,
8964 source.method,
8965 imported_conversations,
8966 imported_messages,
8967 )?;
8968 self.clear_historical_bundle_progress(&bundle)?;
8969
8970 outcome.bundles_imported += 1;
8971 outcome.conversations_imported += imported_conversations;
8972 outcome.messages_imported += imported_messages;
8973
8974 tracing::info!(
8975 path = %bundle.root_path.display(),
8976 bytes = bundle.total_bytes,
8977 method = source.method,
8978 imported_conversations,
8979 imported_messages,
8980 "salvaged historical cass database bundle"
8981 );
8982 }
8983
8984 Ok(outcome)
8985 }
8986
8987 pub fn delete_source(&self, id: &str, _cascade: bool) -> Result<bool> {
8989 if id == LOCAL_SOURCE_ID {
8990 anyhow::bail!("cannot delete the local source");
8991 }
8992 let count = self
8993 .conn
8994 .execute_compat("DELETE FROM sources WHERE id = ?1", fparams![id])?;
8995 if count > 0 {
8996 self.invalidate_conversation_source_cache(id);
8997 }
8998 Ok(count > 0)
8999 }
9000
9001 pub fn insert_conversation_tree(
9003 &self,
9004 agent_id: i64,
9005 workspace_id: Option<i64>,
9006 conv: &Conversation,
9007 ) -> Result<InsertOutcome> {
9008 let normalized_conv = normalized_conversation_for_storage(conv);
9009 let conv = normalized_conv.as_ref();
9010 self.ensure_source_for_conversation(conv)?;
9011 let defer_lexical_updates = defer_storage_lexical_updates_enabled();
9012 let defer_analytics_updates = defer_analytics_updates_enabled();
9013 let conversation_key = conversation_merge_key(agent_id, conv);
9014 let mut tx = self.conn.transaction()?;
9015 let existing = franken_find_existing_conversation_with_tail_by_key(
9016 &tx,
9017 &conversation_key,
9018 Some(conv),
9019 )?;
9020 if let Some(existing) = existing {
9021 let outcome = self.franken_append_messages_with_tail_in_tx(
9022 &tx,
9023 agent_id,
9024 existing.id,
9025 conv,
9026 existing.tail_state,
9027 defer_lexical_updates,
9028 defer_analytics_updates,
9029 )?;
9030 tx.commit()?;
9031 return Ok(outcome);
9032 }
9033
9034 let conv_id = match franken_insert_conversation_or_get_existing_after_miss(
9035 &tx,
9036 agent_id,
9037 workspace_id,
9038 conv,
9039 &conversation_key,
9040 )? {
9041 ConversationInsertStatus::Inserted(conv_id) => conv_id,
9042 ConversationInsertStatus::Existing(existing_id) => {
9043 let ExistingMessageLookup {
9044 by_idx: mut existing_messages,
9045 replay: mut existing_replay_fingerprints,
9046 } = franken_existing_message_lookup(&tx, existing_id, &conv.messages)?;
9047 let ExistingConversationNewMessages {
9048 messages: new_messages,
9049 new_chars,
9050 idx_collision_count,
9051 first_collision_idx,
9052 } = collect_new_messages_for_existing_conversation(
9053 existing_id,
9054 conv,
9055 &mut existing_messages,
9056 &mut existing_replay_fingerprints,
9057 "skipping replay-equivalent recovered message with shifted idx",
9058 );
9059 let (inserted_last_idx, inserted_last_created_at) =
9060 borrowed_messages_tail_state(&new_messages);
9061 let mut inserted_indices = Vec::new();
9062 let mut fts_entries = Vec::new();
9063 let mut fts_pending_chars = 0usize;
9064 let mut _fts_inserted_total = 0usize;
9065 let inserted_message_ids =
9066 franken_append_insert_new_messages(&tx, existing_id, &new_messages)?;
9067 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
9068 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
9069 if !defer_lexical_updates {
9070 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
9071 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
9072 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
9073 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
9074 {
9075 flush_pending_fts_entries(
9076 self,
9077 &tx,
9078 &mut fts_entries,
9079 &mut fts_pending_chars,
9080 &mut _fts_inserted_total,
9081 )?;
9082 }
9083 }
9084 inserted_indices.push(msg.idx);
9085 }
9086
9087 if idx_collision_count > 0 {
9088 tracing::warn!(
9089 conversation_id = existing_id,
9090 collision_count = idx_collision_count,
9091 first_idx = first_collision_idx,
9092 source_path = %conv.source_path.display(),
9093 "message idx collisions encountered while merging recovered conversation; retaining canonical message variants"
9094 );
9095 }
9096
9097 if !defer_lexical_updates {
9098 flush_pending_fts_entries(
9099 self,
9100 &tx,
9101 &mut fts_entries,
9102 &mut fts_pending_chars,
9103 &mut _fts_inserted_total,
9104 )?;
9105 }
9106
9107 let conv_last_ts = conv.messages.iter().filter_map(|m| m.created_at).max();
9108 franken_update_conversation_tail_state(
9109 &tx,
9110 existing_id,
9111 conv_last_ts,
9112 inserted_last_idx,
9113 inserted_last_created_at,
9114 )?;
9115 if let Some(lookup_key) = conversation_external_lookup_key_for_conv(agent_id, conv)
9116 {
9117 franken_update_external_conversation_tail_lookup_key(
9118 &tx,
9119 &lookup_key,
9120 conv_last_ts,
9121 inserted_last_idx,
9122 inserted_last_created_at,
9123 )?;
9124 }
9125
9126 if !defer_analytics_updates && !inserted_indices.is_empty() {
9127 franken_update_daily_stats_in_tx(
9128 self,
9129 &tx,
9130 &conv.agent_slug,
9131 &conv.source_id,
9132 conversation_effective_started_at(conv),
9133 StatsDelta {
9134 session_count_delta: 0,
9135 message_count_delta: inserted_indices.len() as i64,
9136 total_chars_delta: new_chars,
9137 },
9138 )?;
9139 }
9140
9141 tx.commit()?;
9142 return Ok(InsertOutcome {
9143 conversation_id: existing_id,
9144 conversation_inserted: false,
9145 inserted_indices,
9146 });
9147 }
9148 };
9149 let mut fts_entries = Vec::new();
9150 let mut fts_pending_chars = 0usize;
9151 let mut _fts_inserted_total = 0usize;
9152 let mut total_chars: i64 = 0;
9153 let mut inserted_indices = Vec::new();
9154 let mut pending_messages = HashMap::new();
9155 let mut pending_replay_fingerprints = HashSet::new();
9156 let mut idx_collision_count = 0usize;
9157 let mut first_collision_idx: Option<i64> = None;
9158 let mut new_messages = Vec::new();
9159 for msg in &conv.messages {
9160 let incoming_fingerprint = message_merge_fingerprint(msg);
9161 if let Some(existing_fingerprint) = pending_messages.get(&msg.idx) {
9162 if existing_fingerprint != &incoming_fingerprint {
9163 idx_collision_count = idx_collision_count.saturating_add(1);
9164 first_collision_idx.get_or_insert(msg.idx);
9165 }
9166 continue;
9167 }
9168 let incoming_replay = message_replay_fingerprint(msg);
9169 if pending_replay_fingerprints.contains(&incoming_replay) {
9170 tracing::debug!(
9171 conversation_id = conv_id,
9172 idx = msg.idx,
9173 source_path = %conv.source_path.display(),
9174 "skipping replay-equivalent duplicate message within new conversation insert"
9175 );
9176 continue;
9177 }
9178 pending_messages.insert(msg.idx, incoming_fingerprint);
9179 pending_replay_fingerprints.insert(incoming_replay);
9180 new_messages.push(msg);
9181 }
9182 let inserted_message_ids = franken_batch_insert_new_messages(&tx, conv_id, &new_messages)?;
9183 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
9184 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
9185 if !defer_lexical_updates {
9186 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
9187 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
9188 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
9189 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
9190 {
9191 flush_pending_fts_entries(
9192 self,
9193 &tx,
9194 &mut fts_entries,
9195 &mut fts_pending_chars,
9196 &mut _fts_inserted_total,
9197 )?;
9198 }
9199 }
9200 total_chars += msg.content.len() as i64;
9201 inserted_indices.push(msg.idx);
9202 }
9203 if idx_collision_count > 0 {
9204 tracing::warn!(
9205 conversation_id = conv_id,
9206 collision_count = idx_collision_count,
9207 first_idx = first_collision_idx,
9208 source_path = %conv.source_path.display(),
9209 "message idx collisions encountered while inserting a new conversation; retaining the first canonical variant per idx"
9210 );
9211 }
9212 if !defer_lexical_updates {
9213 flush_pending_fts_entries(
9214 self,
9215 &tx,
9216 &mut fts_entries,
9217 &mut fts_pending_chars,
9218 &mut _fts_inserted_total,
9219 )?;
9220 }
9221
9222 if !defer_analytics_updates {
9223 franken_update_daily_stats_in_tx(
9224 self,
9225 &tx,
9226 &conv.agent_slug,
9227 &conv.source_id,
9228 conversation_effective_started_at(conv),
9229 StatsDelta {
9230 session_count_delta: 1,
9231 message_count_delta: inserted_indices.len() as i64,
9232 total_chars_delta: total_chars,
9233 },
9234 )?;
9235 }
9236
9237 tx.commit()?;
9238 Ok(InsertOutcome {
9239 conversation_id: conv_id,
9240 conversation_inserted: true,
9241 inserted_indices,
9242 })
9243 }
9244
9245 #[cfg(test)]
9246 fn insert_conversation_tree_with_profile(
9247 &self,
9248 agent_id: i64,
9249 workspace_id: Option<i64>,
9250 conv: &Conversation,
9251 profile: &mut InsertConversationTreePerfProfile,
9252 ) -> Result<InsertOutcome> {
9253 let total_start = Instant::now();
9254 let normalized_conv = normalized_conversation_for_storage(conv);
9255 let conv = normalized_conv.as_ref();
9256
9257 let source_start = Instant::now();
9258 self.ensure_source_for_conversation(conv)?;
9259 profile.source_duration += source_start.elapsed();
9260
9261 let defer_lexical_updates = defer_storage_lexical_updates_enabled();
9262 let defer_analytics_updates = defer_analytics_updates_enabled();
9263 let conversation_key = conversation_merge_key(agent_id, conv);
9264
9265 let tx_open_start = Instant::now();
9266 let mut tx = self.conn.transaction()?;
9267 profile.tx_open_duration += tx_open_start.elapsed();
9268
9269 let existing_lookup_start = Instant::now();
9270 let existing =
9271 franken_find_existing_conversation_by_key(&tx, &conversation_key, Some(conv))?;
9272 profile.existing_lookup_duration += existing_lookup_start.elapsed();
9273 if let Some(existing_id) = existing {
9274 return Err(anyhow!(
9275 "profile helper expects new conversation path, found existing id {existing_id}"
9276 ));
9277 }
9278
9279 let conversation_row_start = Instant::now();
9280 let conv_id = match franken_insert_conversation_or_get_existing_after_miss(
9281 &tx,
9282 agent_id,
9283 workspace_id,
9284 conv,
9285 &conversation_key,
9286 )? {
9287 ConversationInsertStatus::Inserted(conv_id) => conv_id,
9288 ConversationInsertStatus::Existing(existing_id) => {
9289 return Err(anyhow!(
9290 "profile helper expected inserted conversation row, reused existing id {existing_id}"
9291 ));
9292 }
9293 };
9294 profile.conversation_row_duration += conversation_row_start.elapsed();
9295
9296 let mut fts_entries = Vec::new();
9297 let mut fts_pending_chars = 0usize;
9298 let mut fts_inserted_total = 0usize;
9299 let mut total_chars: i64 = 0;
9300 let mut inserted_indices = Vec::new();
9301 let mut pending_messages = HashMap::new();
9302 let mut pending_replay_fingerprints = HashSet::new();
9303 let mut idx_collision_count = 0usize;
9304 let mut first_collision_idx: Option<i64> = None;
9305 let mut new_messages = Vec::new();
9306
9307 for msg in &conv.messages {
9308 let incoming_fingerprint = message_merge_fingerprint(msg);
9309 if let Some(existing_fingerprint) = pending_messages.get(&msg.idx) {
9310 if existing_fingerprint != &incoming_fingerprint {
9311 idx_collision_count = idx_collision_count.saturating_add(1);
9312 first_collision_idx.get_or_insert(msg.idx);
9313 }
9314 continue;
9315 }
9316
9317 let incoming_replay = message_replay_fingerprint(msg);
9318 if pending_replay_fingerprints.contains(&incoming_replay) {
9319 tracing::debug!(
9320 conversation_id = conv_id,
9321 idx = msg.idx,
9322 source_path = %conv.source_path.display(),
9323 "skipping replay-equivalent duplicate message within profiled new conversation insert"
9324 );
9325 continue;
9326 }
9327
9328 pending_messages.insert(msg.idx, incoming_fingerprint);
9329 pending_replay_fingerprints.insert(incoming_replay);
9330 new_messages.push(msg);
9331 }
9332
9333 let message_insert_start = Instant::now();
9334 let inserted_message_ids = franken_batch_insert_new_messages_with_profile(
9335 &tx,
9336 conv_id,
9337 &new_messages,
9338 &mut profile.message_insert_breakdown,
9339 )?;
9340 profile.message_insert_duration += message_insert_start.elapsed();
9341
9342 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
9343 let snippet_insert_start = Instant::now();
9344 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
9345 profile.snippet_insert_duration += snippet_insert_start.elapsed();
9346
9347 if !defer_lexical_updates {
9348 let fts_entry_start = Instant::now();
9349 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
9350 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
9351 profile.fts_entry_duration += fts_entry_start.elapsed();
9352 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
9353 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
9354 {
9355 let fts_flush_start = Instant::now();
9356 flush_pending_fts_entries(
9357 self,
9358 &tx,
9359 &mut fts_entries,
9360 &mut fts_pending_chars,
9361 &mut fts_inserted_total,
9362 )?;
9363 profile.fts_flush_duration += fts_flush_start.elapsed();
9364 }
9365 }
9366
9367 total_chars += msg.content.len() as i64;
9368 inserted_indices.push(msg.idx);
9369 }
9370
9371 if idx_collision_count > 0 {
9372 tracing::warn!(
9373 conversation_id = conv_id,
9374 collision_count = idx_collision_count,
9375 first_idx = first_collision_idx,
9376 source_path = %conv.source_path.display(),
9377 "message idx collisions encountered while profiling a new conversation insert; retaining the first canonical variant per idx"
9378 );
9379 }
9380
9381 if !defer_lexical_updates {
9382 let fts_flush_start = Instant::now();
9383 flush_pending_fts_entries(
9384 self,
9385 &tx,
9386 &mut fts_entries,
9387 &mut fts_pending_chars,
9388 &mut fts_inserted_total,
9389 )?;
9390 profile.fts_flush_duration += fts_flush_start.elapsed();
9391 }
9392
9393 if !defer_analytics_updates {
9394 let analytics_start = Instant::now();
9395 franken_update_daily_stats_in_tx(
9396 self,
9397 &tx,
9398 &conv.agent_slug,
9399 &conv.source_id,
9400 conversation_effective_started_at(conv),
9401 StatsDelta {
9402 session_count_delta: 1,
9403 message_count_delta: inserted_indices.len() as i64,
9404 total_chars_delta: total_chars,
9405 },
9406 )?;
9407 profile.analytics_duration += analytics_start.elapsed();
9408 }
9409
9410 let commit_start = Instant::now();
9411 tx.commit()?;
9412 profile.commit_duration += commit_start.elapsed();
9413 profile.invocations += 1;
9414 profile.messages += conv.messages.len();
9415 profile.inserted_messages += inserted_indices.len();
9416 profile.total_duration += total_start.elapsed();
9417
9418 Ok(InsertOutcome {
9419 conversation_id: conv_id,
9420 conversation_inserted: true,
9421 inserted_indices,
9422 })
9423 }
9424
9425 #[cfg(test)]
9426 fn append_existing_conversation_with_profile(
9427 &self,
9428 agent_id: i64,
9429 _workspace_id: Option<i64>,
9430 conv: &Conversation,
9431 profile: &mut InsertConversationTreePerfProfile,
9432 ) -> Result<InsertOutcome> {
9433 let total_start = Instant::now();
9434 let normalized_conv = normalized_conversation_for_storage(conv);
9435 let conv = normalized_conv.as_ref();
9436
9437 let source_start = Instant::now();
9438 self.ensure_source_for_conversation(conv)?;
9439 profile.source_duration += source_start.elapsed();
9440
9441 let defer_lexical_updates = defer_storage_lexical_updates_enabled();
9442 let defer_analytics_updates = defer_analytics_updates_enabled();
9443 let conversation_key = conversation_merge_key(agent_id, conv);
9444
9445 let tx_open_start = Instant::now();
9446 let mut tx = self.conn.transaction()?;
9447 profile.tx_open_duration += tx_open_start.elapsed();
9448
9449 let existing_lookup_start = Instant::now();
9450 let existing = franken_find_existing_conversation_with_tail_by_key(
9451 &tx,
9452 &conversation_key,
9453 Some(conv),
9454 )?;
9455 profile.existing_lookup_duration += existing_lookup_start.elapsed();
9456 let existing = existing.ok_or_else(|| {
9457 anyhow!("append profile helper expects existing conversation for {conversation_key:?}")
9458 })?;
9459 let existing_id = existing.id;
9460
9461 let existing_idx_lookup_start = Instant::now();
9462 let append_tail_state = existing.tail_state;
9463 let append_tail_ended_at = append_tail_state.and_then(|state| state.ended_at);
9464 let existing_plan = append_tail_state.as_ref().and_then(|state| {
9465 collect_append_only_tail_messages(
9466 conv,
9467 state.last_message_idx,
9468 state.last_message_created_at,
9469 )
9470 });
9471 let used_append_tail_plan = existing_plan.is_some();
9472 profile.existing_idx_lookup_duration += existing_idx_lookup_start.elapsed();
9473
9474 let dedupe_filter_start = Instant::now();
9475 let ExistingConversationNewMessages {
9476 messages: new_messages,
9477 new_chars,
9478 idx_collision_count,
9479 first_collision_idx,
9480 } = if let Some(existing_plan) = existing_plan {
9481 existing_plan
9482 } else {
9483 let ExistingMessageLookup {
9484 by_idx: mut existing_messages,
9485 replay: mut existing_replay_fingerprints,
9486 } = franken_existing_message_lookup(&tx, existing_id, &conv.messages)?;
9487 collect_new_messages_for_existing_conversation(
9488 existing_id,
9489 conv,
9490 &mut existing_messages,
9491 &mut existing_replay_fingerprints,
9492 "skipping replay-equivalent profiled append message with shifted idx",
9493 )
9494 };
9495 profile.dedupe_filter_duration += dedupe_filter_start.elapsed();
9496
9497 let mut inserted_indices = Vec::new();
9498 let mut fts_entries = Vec::new();
9499 let mut fts_pending_chars = 0usize;
9500 let mut fts_inserted_total = 0usize;
9501 let (inserted_last_idx, inserted_last_created_at) =
9502 borrowed_messages_tail_state(&new_messages);
9503
9504 let message_insert_start = Instant::now();
9505 let inserted_message_ids = franken_append_insert_new_messages_with_profile(
9506 &tx,
9507 existing_id,
9508 &new_messages,
9509 &mut profile.message_insert_breakdown,
9510 )?;
9511 profile.message_insert_duration += message_insert_start.elapsed();
9512
9513 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
9514 let snippet_insert_start = Instant::now();
9515 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
9516 profile.snippet_insert_duration += snippet_insert_start.elapsed();
9517
9518 if !defer_lexical_updates {
9519 let fts_entry_start = Instant::now();
9520 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
9521 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
9522 profile.fts_entry_duration += fts_entry_start.elapsed();
9523 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
9524 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
9525 {
9526 let fts_flush_start = Instant::now();
9527 flush_pending_fts_entries(
9528 self,
9529 &tx,
9530 &mut fts_entries,
9531 &mut fts_pending_chars,
9532 &mut fts_inserted_total,
9533 )?;
9534 profile.fts_flush_duration += fts_flush_start.elapsed();
9535 }
9536 }
9537
9538 inserted_indices.push(msg.idx);
9539 }
9540
9541 if idx_collision_count > 0 {
9542 tracing::warn!(
9543 conversation_id = existing_id,
9544 collision_count = idx_collision_count,
9545 first_idx = first_collision_idx,
9546 source_path = %conv.source_path.display(),
9547 "message idx collisions encountered while profiling append merge; retaining canonical message variants"
9548 );
9549 }
9550
9551 if !defer_lexical_updates {
9552 let fts_flush_start = Instant::now();
9553 flush_pending_fts_entries(
9554 self,
9555 &tx,
9556 &mut fts_entries,
9557 &mut fts_pending_chars,
9558 &mut fts_inserted_total,
9559 )?;
9560 profile.fts_flush_duration += fts_flush_start.elapsed();
9561 }
9562
9563 let conversation_row_start = Instant::now();
9564 let mut exact_append_tail_set = false;
9565 if used_append_tail_plan {
9566 if let (Some(last_message_idx), Some(last_message_created_at)) =
9567 (inserted_last_idx, inserted_last_created_at)
9568 {
9569 if append_tail_ended_at.is_none_or(|ended_at| ended_at <= last_message_created_at) {
9570 franken_set_conversation_tail_state_after_append(
9571 &tx,
9572 existing_id,
9573 last_message_created_at,
9574 last_message_idx,
9575 last_message_created_at,
9576 )?;
9577 exact_append_tail_set = true;
9578 } else {
9579 franken_update_conversation_tail_state(
9580 &tx,
9581 existing_id,
9582 Some(last_message_created_at),
9583 inserted_last_idx,
9584 inserted_last_created_at,
9585 )?;
9586 }
9587 }
9588 } else {
9589 let conv_last_ts = conv.messages.iter().filter_map(|m| m.created_at).max();
9590 franken_update_conversation_tail_state(
9591 &tx,
9592 existing_id,
9593 conv_last_ts,
9594 inserted_last_idx,
9595 inserted_last_created_at,
9596 )?;
9597 }
9598 franken_update_external_conversation_tail_after_append(
9599 &tx,
9600 agent_id,
9601 conv,
9602 used_append_tail_plan,
9603 exact_append_tail_set,
9604 inserted_last_idx,
9605 inserted_last_created_at,
9606 )?;
9607 profile.conversation_row_duration += conversation_row_start.elapsed();
9608
9609 if !defer_analytics_updates && !inserted_indices.is_empty() {
9610 let analytics_start = Instant::now();
9611 franken_update_daily_stats_in_tx(
9612 self,
9613 &tx,
9614 &conv.agent_slug,
9615 &conv.source_id,
9616 conversation_effective_started_at(conv),
9617 StatsDelta {
9618 session_count_delta: 0,
9619 message_count_delta: inserted_indices.len() as i64,
9620 total_chars_delta: new_chars,
9621 },
9622 )?;
9623 profile.analytics_duration += analytics_start.elapsed();
9624 }
9625
9626 let commit_start = Instant::now();
9627 tx.commit()?;
9628 profile.commit_duration += commit_start.elapsed();
9629 profile.invocations += 1;
9630 profile.messages += conv.messages.len();
9631 profile.inserted_messages += inserted_indices.len();
9632 profile.total_duration += total_start.elapsed();
9633
9634 Ok(InsertOutcome {
9635 conversation_id: existing_id,
9636 conversation_inserted: false,
9637 inserted_indices,
9638 })
9639 }
9640
9641 #[allow(clippy::too_many_arguments)]
9643 fn franken_append_messages_with_tail_in_tx(
9644 &self,
9645 tx: &FrankenTransaction<'_>,
9646 agent_id: i64,
9647 conversation_id: i64,
9648 conv: &Conversation,
9649 append_tail_state: Option<ExistingConversationTailState>,
9650 defer_lexical_updates: bool,
9651 defer_analytics_updates: bool,
9652 ) -> Result<InsertOutcome> {
9653 let append_tail_ended_at = append_tail_state.and_then(|state| state.ended_at);
9654 let append_plan = append_tail_state.as_ref().and_then(|state| {
9655 collect_append_only_tail_messages(
9656 conv,
9657 state.last_message_idx,
9658 state.last_message_created_at,
9659 )
9660 });
9661 let used_append_tail_plan = append_plan.is_some();
9662 let ExistingConversationNewMessages {
9663 messages: new_messages,
9664 new_chars,
9665 idx_collision_count,
9666 first_collision_idx,
9667 } = if let Some(append_plan) = append_plan {
9668 append_plan
9669 } else {
9670 let ExistingMessageLookup {
9671 by_idx: mut existing_messages,
9672 replay: mut existing_replay_fingerprints,
9673 } = franken_existing_message_lookup(tx, conversation_id, &conv.messages)?;
9674 collect_new_messages_for_existing_conversation(
9675 conversation_id,
9676 conv,
9677 &mut existing_messages,
9678 &mut existing_replay_fingerprints,
9679 "skipping replay-equivalent recovered message with shifted idx",
9680 )
9681 };
9682
9683 let mut inserted_indices = Vec::new();
9684 let mut fts_entries = Vec::new();
9685 let mut fts_pending_chars = 0usize;
9686 let mut _fts_inserted_total = 0usize;
9687 let (inserted_last_idx, inserted_last_created_at) =
9688 borrowed_messages_tail_state(&new_messages);
9689 let inserted_message_ids =
9690 franken_append_insert_new_messages(tx, conversation_id, &new_messages)?;
9691 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
9692 franken_insert_snippets(tx, msg_id, &msg.snippets)?;
9693 if !defer_lexical_updates {
9694 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
9695 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
9696 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
9697 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
9698 {
9699 flush_pending_fts_entries(
9700 self,
9701 tx,
9702 &mut fts_entries,
9703 &mut fts_pending_chars,
9704 &mut _fts_inserted_total,
9705 )?;
9706 }
9707 }
9708 inserted_indices.push(msg.idx);
9709 }
9710
9711 if idx_collision_count > 0 {
9712 tracing::warn!(
9713 conversation_id,
9714 collision_count = idx_collision_count,
9715 first_idx = first_collision_idx,
9716 source_path = %conv.source_path.display(),
9717 "message idx collisions encountered while appending to an existing conversation; retaining canonical message variants"
9718 );
9719 }
9720
9721 if !defer_lexical_updates {
9722 flush_pending_fts_entries(
9723 self,
9724 tx,
9725 &mut fts_entries,
9726 &mut fts_pending_chars,
9727 &mut _fts_inserted_total,
9728 )?;
9729 }
9730
9731 let mut exact_append_tail_set = false;
9732 if used_append_tail_plan {
9733 if let (Some(last_message_idx), Some(last_message_created_at)) =
9734 (inserted_last_idx, inserted_last_created_at)
9735 {
9736 if append_tail_ended_at.is_none_or(|ended_at| ended_at <= last_message_created_at) {
9737 franken_set_conversation_tail_state_after_append(
9738 tx,
9739 conversation_id,
9740 last_message_created_at,
9741 last_message_idx,
9742 last_message_created_at,
9743 )?;
9744 exact_append_tail_set = true;
9745 } else {
9746 franken_update_conversation_tail_state(
9747 tx,
9748 conversation_id,
9749 Some(last_message_created_at),
9750 inserted_last_idx,
9751 inserted_last_created_at,
9752 )?;
9753 }
9754 }
9755 } else {
9756 let conv_last_ts = conv.messages.iter().filter_map(|m| m.created_at).max();
9757 franken_update_conversation_tail_state(
9758 tx,
9759 conversation_id,
9760 conv_last_ts,
9761 inserted_last_idx,
9762 inserted_last_created_at,
9763 )?;
9764 }
9765 franken_update_external_conversation_tail_after_append(
9766 tx,
9767 agent_id,
9768 conv,
9769 used_append_tail_plan,
9770 exact_append_tail_set,
9771 inserted_last_idx,
9772 inserted_last_created_at,
9773 )?;
9774
9775 if !defer_analytics_updates && !inserted_indices.is_empty() {
9776 let message_count = inserted_indices.len() as i64;
9777 franken_update_daily_stats_in_tx(
9778 self,
9779 tx,
9780 &conv.agent_slug,
9781 &conv.source_id,
9782 conversation_effective_started_at(conv),
9783 StatsDelta {
9784 session_count_delta: 0,
9785 message_count_delta: message_count,
9786 total_chars_delta: new_chars,
9787 },
9788 )?;
9789 }
9790
9791 Ok(InsertOutcome {
9792 conversation_id,
9793 conversation_inserted: false,
9794 inserted_indices,
9795 })
9796 }
9797
9798 pub fn rebuild_fts(&self) -> Result<()> {
9800 self.rebuild_fts_via_frankensqlite().map(|_| ())
9801 }
9802
9803 pub(crate) fn ensure_search_fallback_fts_consistency(&self) -> Result<FtsConsistencyRepair> {
9808 self.ensure_fts_consistency_via_frankensqlite()
9809 }
9810
9811 pub(crate) fn validate_fts_messages_integrity(&self) -> Result<()> {
9812 validate_fts_messages_integrity_for_connection(&self.conn)
9813 }
9814
9815 pub(crate) fn fallback_fts_is_known_healthy_for_archive_fingerprint(
9816 &self,
9817 archive_fingerprint: &str,
9818 ) -> Result<bool> {
9819 Ok(
9820 self.read_fts_franken_rebuild_generation()? == Some(FTS_FRANKEN_REBUILD_GENERATION)
9821 && self
9822 .read_fts_franken_rebuild_archive_fingerprint()?
9823 .as_deref()
9824 == Some(archive_fingerprint),
9825 )
9826 }
9827
9828 pub(crate) fn record_search_fallback_fts_archive_fingerprint(
9829 &self,
9830 archive_fingerprint: &str,
9831 ) -> Result<()> {
9832 self.conn
9833 .execute_compat(
9834 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
9835 fparams![
9836 FTS_FRANKEN_REBUILD_FINGERPRINT_META_KEY,
9837 archive_fingerprint.to_string()
9838 ],
9839 )
9840 .with_context(|| "recording frankensqlite FTS archive fingerprint")?;
9841 Ok(())
9842 }
9843
9844 pub(crate) fn daily_stats_is_known_healthy_for_archive_fingerprint(
9845 &self,
9846 archive_fingerprint: &str,
9847 ) -> Result<bool> {
9848 Ok(
9849 self.read_daily_stats_health_generation()? == Some(DAILY_STATS_HEALTH_GENERATION)
9850 && self.read_daily_stats_archive_fingerprint()?.as_deref()
9851 == Some(archive_fingerprint),
9852 )
9853 }
9854
9855 pub(crate) fn record_daily_stats_archive_fingerprint(
9856 &self,
9857 archive_fingerprint: &str,
9858 ) -> Result<()> {
9859 self.conn
9860 .execute_compat(
9861 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
9862 fparams![
9863 DAILY_STATS_HEALTH_GENERATION_META_KEY,
9864 DAILY_STATS_HEALTH_GENERATION.to_string()
9865 ],
9866 )
9867 .with_context(|| "recording daily_stats health generation")?;
9868 self.conn
9869 .execute_compat(
9870 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
9871 fparams![DAILY_STATS_HEALTH_META_KEY, archive_fingerprint.to_string()],
9872 )
9873 .with_context(|| "recording daily_stats archive fingerprint")?;
9874 Ok(())
9875 }
9876
9877 fn read_fts_franken_rebuild_generation(&self) -> Result<Option<i64>> {
9878 let value: Option<String> = self
9879 .conn
9880 .query_row_map(
9881 "SELECT value FROM meta WHERE key = ?1",
9882 fparams![FTS_FRANKEN_REBUILD_META_KEY],
9883 |row| row.get_typed(0),
9884 )
9885 .optional()?;
9886 Ok(value.and_then(|v| v.parse::<i64>().ok()))
9887 }
9888
9889 fn read_fts_franken_rebuild_archive_fingerprint(&self) -> Result<Option<String>> {
9890 Ok(self
9891 .conn
9892 .query_row_map(
9893 "SELECT value FROM meta WHERE key = ?1",
9894 fparams![FTS_FRANKEN_REBUILD_FINGERPRINT_META_KEY],
9895 |row| row.get_typed(0),
9896 )
9897 .optional()?)
9898 }
9899
9900 fn read_daily_stats_health_generation(&self) -> Result<Option<i64>> {
9901 let value: Option<String> = self
9902 .conn
9903 .query_row_map(
9904 "SELECT value FROM meta WHERE key = ?1",
9905 fparams![DAILY_STATS_HEALTH_GENERATION_META_KEY],
9906 |row| row.get_typed(0),
9907 )
9908 .optional()?;
9909 Ok(value.and_then(|value| value.parse::<i64>().ok()))
9910 }
9911
9912 fn read_daily_stats_archive_fingerprint(&self) -> Result<Option<String>> {
9913 Ok(self
9914 .conn
9915 .query_row_map(
9916 "SELECT value FROM meta WHERE key = ?1",
9917 fparams![DAILY_STATS_HEALTH_META_KEY],
9918 |row| row.get_typed(0),
9919 )
9920 .optional()?)
9921 }
9922
9923 fn record_fts_franken_rebuild_generation(&self) -> Result<()> {
9924 self.conn
9925 .execute_compat(
9926 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
9927 fparams![
9928 FTS_FRANKEN_REBUILD_META_KEY,
9929 FTS_FRANKEN_REBUILD_GENERATION.to_string()
9930 ],
9931 )
9932 .with_context(|| "recording frankensqlite FTS rebuild generation")?;
9933 Ok(())
9934 }
9935
9936 fn ensure_fts_consistency_via_frankensqlite(&self) -> Result<FtsConsistencyRepair> {
9937 if self.read_fts_franken_rebuild_generation()? != Some(FTS_FRANKEN_REBUILD_GENERATION) {
9938 let fts_already_healthy = (|| -> Result<bool> {
9943 let fts_exists: i64 = self.conn.query_row_map(
9944 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
9945 fparams![],
9946 |row| row.get_typed(0),
9947 )?;
9948 if fts_exists != 1 {
9949 return Ok(false);
9950 }
9951 let total: i64 = self.conn.query_row_map(
9952 "SELECT COUNT(*) FROM messages",
9953 fparams![],
9954 |row| row.get_typed(0),
9955 )?;
9956 if total == 0 {
9957 return Ok(false);
9958 }
9959 let indexed: i64 = self.conn.query_row_map(
9960 "SELECT COUNT(*) FROM fts_messages",
9961 fparams![],
9962 |row| row.get_typed(0),
9963 )?;
9964 Ok(indexed > 0 && indexed * 100 >= total * 90)
9966 })()
9967 .unwrap_or(false);
9968
9969 if fts_already_healthy {
9970 tracing::info!(
9971 target: "cass::fts_rebuild",
9972 "FTS already populated and consistent; setting generation marker without rebuild"
9973 );
9974 self.record_fts_franken_rebuild_generation()?;
9975 self.set_fts_messages_present_cache(true);
9976 } else {
9977 let inserted_rows = self.rebuild_fts_via_frankensqlite()?;
9978 self.record_fts_franken_rebuild_generation()?;
9979 return Ok(FtsConsistencyRepair::Rebuilt { inserted_rows });
9980 }
9981 }
9982
9983 let inspection = (|| -> Result<(i64, bool)> {
9984 let fts_schema_rows = self.conn.query_row_map(
9985 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
9986 fparams![],
9987 |row| row.get_typed::<i64>(0),
9988 )?;
9989 let fts_queryable = fts_schema_rows == 1
9990 && self.conn.query("SELECT COUNT(*) FROM fts_messages").is_ok();
9991 Ok((fts_schema_rows, fts_queryable))
9992 })();
9993
9994 let (fts_schema_rows, fts_queryable) = match inspection {
9995 Ok(result) => result,
9996 Err(err) => {
9997 tracing::warn!(
9998 error = %err,
9999 "frankensqlite FTS consistency probe failed; rebuilding authoritative FTS"
10000 );
10001 let inserted_rows = self.rebuild_fts_via_frankensqlite()?;
10002 self.record_fts_franken_rebuild_generation()?;
10003 return Ok(FtsConsistencyRepair::Rebuilt { inserted_rows });
10004 }
10005 };
10006
10007 if fts_schema_rows != 1 || !fts_queryable {
10008 let inserted_rows = self.rebuild_fts_via_frankensqlite()?;
10009 self.record_fts_franken_rebuild_generation()?;
10010 return Ok(FtsConsistencyRepair::Rebuilt { inserted_rows });
10011 }
10012
10013 let total_messages =
10014 self.conn
10015 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
10016 row.get_typed::<i64>(0)
10017 })?;
10018 let indexed_messages =
10019 self.conn
10020 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
10021 row.get_typed::<i64>(0)
10022 })?;
10023
10024 if indexed_messages == total_messages {
10025 self.set_fts_messages_present_cache(true);
10026 return Ok(FtsConsistencyRepair::AlreadyHealthy {
10027 rows: usize::try_from(total_messages.max(0)).unwrap_or(usize::MAX),
10028 });
10029 }
10030
10031 if indexed_messages > total_messages {
10032 let inserted_rows = self.rebuild_fts_via_frankensqlite()?;
10033 self.record_fts_franken_rebuild_generation()?;
10034 return Ok(FtsConsistencyRepair::Rebuilt { inserted_rows });
10035 }
10036
10037 let inserted_rows = self
10038 .stream_fts_rows_via_frankensqlite(true)
10039 .with_context(|| "incrementally repairing missing FTS rows via frankensqlite")?;
10040 let repaired_rows =
10041 self.conn
10042 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
10043 row.get_typed::<i64>(0)
10044 })?;
10045 if repaired_rows == total_messages {
10046 self.set_fts_messages_present_cache(true);
10047 return Ok(FtsConsistencyRepair::IncrementalCatchUp {
10048 inserted_rows,
10049 total_rows: usize::try_from(repaired_rows.max(0)).unwrap_or(usize::MAX),
10050 });
10051 }
10052
10053 if inserted_rows == 0 {
10061 tracing::debug!(
10062 target: "cass::fts_rebuild",
10063 indexed_messages = repaired_rows,
10064 total_messages,
10065 un_indexable_gap = total_messages.saturating_sub(repaired_rows),
10066 "FTS catch-up inserted 0 rows; remaining gap is un-indexable (likely orphaned messages with dangling conversation_id)"
10067 );
10068 self.set_fts_messages_present_cache(true);
10069 return Ok(FtsConsistencyRepair::IncrementalCatchUp {
10070 inserted_rows: 0,
10071 total_rows: usize::try_from(repaired_rows.max(0)).unwrap_or(usize::MAX),
10072 });
10073 }
10074
10075 let inserted_rows = self.rebuild_fts_via_frankensqlite()?;
10078 self.record_fts_franken_rebuild_generation()?;
10079 Ok(FtsConsistencyRepair::Rebuilt { inserted_rows })
10080 }
10081
10082 pub(crate) fn rebuild_fts_via_frankensqlite(&self) -> Result<usize> {
10083 self.invalidate_fts_messages_present_cache();
10084 self.conn
10085 .execute("DROP TABLE IF EXISTS fts_messages;")
10086 .with_context(|| "dropping derived fts_messages before frankensqlite rebuild")?;
10087 self.conn
10088 .execute_compat(FTS5_REGISTER_SQL, fparams![])
10089 .with_context(|| "creating derived fts_messages via frankensqlite rebuild")?;
10090 self.set_fts_messages_present_cache(true);
10091
10092 self.stream_fts_rows_via_frankensqlite(false)
10093 }
10094
10095 fn stream_fts_rows_via_frankensqlite(&self, missing_only: bool) -> Result<usize> {
10096 let batch_size = fts_rebuild_batch_size().max(1);
10097 let batch_limit = i64::try_from(batch_size).unwrap_or(i64::MAX);
10098 let mut total_inserted: usize = 0;
10099 let mut total_skipped_orphans: usize = 0;
10100 let mut total_skipped_existing: usize = 0;
10101 let mut last_rowid: i64 = 0;
10102 let conversation_by_id = self.load_fts_conversation_projection_map()?;
10103 let agent_slug_by_id = self.load_fts_agent_slug_map()?;
10104 let workspace_path_by_id = self.load_fts_workspace_path_map()?;
10105 let existing_fts_rowids = if missing_only {
10106 Some(self.load_fts_message_rowid_set()?)
10107 } else {
10108 None
10109 };
10110 let mut entries = Vec::new();
10111 let mut pending_chars = 0usize;
10112
10113 loop {
10114 let rows = self.fetch_fts_rebuild_message_rows(last_rowid, batch_limit)?;
10115 let fetched_count = rows.len();
10116 if fetched_count == 0 {
10117 break;
10118 }
10119
10120 let inserted_before_batch = total_inserted;
10121 let skipped_before_batch = total_skipped_orphans;
10122 let existing_before_batch = total_skipped_existing;
10123
10124 for row in rows {
10125 last_rowid = row.rowid;
10126 if existing_fts_rowids
10127 .as_ref()
10128 .is_some_and(|rowids| rowids.contains(&row.message_id))
10129 {
10130 total_skipped_existing = total_skipped_existing.saturating_add(1);
10131 continue;
10132 }
10133 let Some(conversation) = conversation_by_id.get(&row.conversation_id) else {
10134 total_skipped_orphans = total_skipped_orphans.saturating_add(1);
10135 continue;
10136 };
10137 let agent = conversation
10138 .agent_id
10139 .and_then(|agent_id| agent_slug_by_id.get(&agent_id))
10140 .filter(|slug| !slug.is_empty())
10141 .cloned()
10142 .unwrap_or_else(|| "unknown".to_string());
10143 let workspace = conversation
10144 .workspace_id
10145 .and_then(|workspace_id| workspace_path_by_id.get(&workspace_id))
10146 .cloned()
10147 .unwrap_or_default();
10148 pending_chars = pending_chars.saturating_add(row.content.len());
10149 entries.push(FtsEntry {
10150 content: row.content,
10151 title: conversation.title.clone(),
10152 agent,
10153 workspace,
10154 source_path: conversation.source_path.clone(),
10155 created_at: row.created_at,
10156 message_id: row.message_id,
10157 });
10158 if entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
10159 || pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
10160 {
10161 total_inserted = total_inserted.saturating_add(
10162 franken_batch_insert_fts_on_connection(&self.conn, &entries)?,
10163 );
10164 entries.clear();
10165 pending_chars = 0;
10166 }
10167 }
10168
10169 if !entries.is_empty() {
10170 total_inserted = total_inserted.saturating_add(
10171 franken_batch_insert_fts_on_connection(&self.conn, &entries)?,
10172 );
10173 entries.clear();
10174 pending_chars = 0;
10175 }
10176
10177 tracing::debug!(
10178 target: "cass::fts_rebuild",
10179 batch_rows = fetched_count,
10180 batch_inserted = total_inserted.saturating_sub(inserted_before_batch),
10181 batch_skipped_orphans = total_skipped_orphans.saturating_sub(skipped_before_batch),
10182 batch_skipped_existing = total_skipped_existing.saturating_sub(existing_before_batch),
10183 total_inserted,
10184 total_skipped_orphans,
10185 total_skipped_existing,
10186 last_rowid,
10187 missing_only,
10188 "FTS streaming maintenance batch complete"
10189 );
10190
10191 if fetched_count < batch_size {
10192 break;
10193 }
10194 }
10195
10196 Ok(total_inserted)
10197 }
10198
10199 fn fetch_fts_rebuild_message_rows(
10200 &self,
10201 last_rowid: i64,
10202 batch_limit: i64,
10203 ) -> Result<Vec<FtsRebuildMessageRow>> {
10204 self.conn
10205 .query_map_collect(
10206 "SELECT m.rowid, m.id, m.conversation_id, m.content, m.created_at
10207 FROM messages m
10208 WHERE m.rowid > ?1
10209 ORDER BY m.rowid
10210 LIMIT ?2",
10211 fparams![last_rowid, batch_limit],
10212 |row| {
10213 Ok(FtsRebuildMessageRow {
10214 rowid: row.get_typed(0)?,
10215 message_id: row.get_typed(1)?,
10216 conversation_id: row.get_typed(2)?,
10217 content: row.get_typed::<Option<String>>(3)?.unwrap_or_default(),
10218 created_at: row.get_typed(4)?,
10219 })
10220 },
10221 )
10222 .with_context(|| format!("fetching FTS maintenance messages after rowid {last_rowid}"))
10223 }
10224
10225 fn load_fts_message_rowid_set(&self) -> Result<HashSet<i64>> {
10226 let rows: Vec<i64> = self
10227 .conn
10228 .query_map_collect("SELECT rowid FROM fts_messages", fparams![], |row| {
10229 row.get_typed(0)
10230 })
10231 .with_context(|| "loading existing FTS message rowids")?;
10232 Ok(rows.into_iter().collect())
10233 }
10234
10235 fn load_fts_conversation_projection_map(
10236 &self,
10237 ) -> Result<HashMap<i64, FtsConversationProjection>> {
10238 let rows: Vec<(i64, FtsConversationProjection)> = self
10239 .conn
10240 .query_map_collect(
10241 "SELECT id, title, agent_id, workspace_id, source_path
10242 FROM conversations",
10243 fparams![],
10244 |row| {
10245 Ok((
10246 row.get_typed(0)?,
10247 FtsConversationProjection {
10248 title: row.get_typed::<Option<String>>(1)?.unwrap_or_default(),
10249 agent_id: row.get_typed(2)?,
10250 workspace_id: row.get_typed(3)?,
10251 source_path: row.get_typed::<Option<String>>(4)?.unwrap_or_default(),
10252 },
10253 ))
10254 },
10255 )
10256 .with_context(|| "loading FTS conversation projection map")?;
10257 Ok(rows.into_iter().collect())
10258 }
10259
10260 fn load_fts_agent_slug_map(&self) -> Result<HashMap<i64, String>> {
10261 let rows: Vec<(i64, String)> = self
10262 .conn
10263 .query_map_collect("SELECT id, slug FROM agents", fparams![], |row| {
10264 Ok((
10265 row.get_typed(0)?,
10266 row.get_typed::<Option<String>>(1)?
10267 .unwrap_or_else(|| "unknown".to_string()),
10268 ))
10269 })
10270 .with_context(|| "loading FTS agent slug map")?;
10271 Ok(rows.into_iter().collect())
10272 }
10273
10274 fn load_fts_workspace_path_map(&self) -> Result<HashMap<i64, String>> {
10275 let rows: Vec<(i64, String)> = self
10276 .conn
10277 .query_map_collect("SELECT id, path FROM workspaces", fparams![], |row| {
10278 Ok((
10279 row.get_typed(0)?,
10280 row.get_typed::<Option<String>>(1)?.unwrap_or_default(),
10281 ))
10282 })
10283 .with_context(|| "loading FTS workspace path map")?;
10284 Ok(rows.into_iter().collect())
10285 }
10286
10287 pub fn fetch_messages_for_embedding(&self) -> Result<Vec<MessageForEmbedding>> {
10289 self.conn
10294 .query_map_collect(
10295 "SELECT m.id, m.created_at, COALESCE(c.agent_id, 0), c.workspace_id, c.source_id, m.role, m.content
10296 FROM messages m
10297 JOIN conversations c ON m.conversation_id = c.id
10298 ORDER BY m.id",
10299 fparams![],
10300 |row| {
10301 let source_id: String = row.get_typed::<Option<String>>(4)?
10302 .unwrap_or_else(|| "local".to_string());
10303 Ok(MessageForEmbedding {
10304 message_id: row.get_typed(0)?,
10305 created_at: row.get_typed(1)?,
10306 agent_id: row.get_typed(2)?,
10307 workspace_id: row.get_typed(3)?,
10308 source_id_hash: crc32fast::hash(source_id.as_bytes()),
10309 role: row.get_typed(5)?,
10310 content: row.get_typed(6)?,
10311 })
10312 },
10313 )
10314 .with_context(|| "fetching messages for embedding")
10315 }
10316
10317 pub fn get_last_embedded_message_id(&self) -> Result<Option<i64>> {
10319 let result: Result<String, _> = self.conn.query_row_map(
10320 "SELECT value FROM meta WHERE key = 'last_embedded_message_id'",
10321 fparams![],
10322 |row| row.get_typed(0),
10323 );
10324 match result.optional() {
10325 Ok(Some(s)) => Ok(s.parse().ok()),
10326 Ok(None) => Ok(None),
10327 Err(e) => Err(e.into()),
10328 }
10329 }
10330
10331 pub fn set_last_embedded_message_id(&self, id: i64) -> Result<()> {
10333 self.conn.execute_compat(
10334 "INSERT OR REPLACE INTO meta(key, value) VALUES('last_embedded_message_id', ?1)",
10335 fparams![id.to_string()],
10336 )?;
10337 Ok(())
10338 }
10339
10340 pub fn get_embedding_jobs(&self, db_path: &str) -> Result<Vec<EmbeddingJobRow>> {
10342 self.conn
10343 .query_map_collect(
10344 "SELECT id, db_path, model_id, status, total_docs, completed_docs, error_message, created_at, started_at, completed_at
10345 FROM embedding_jobs WHERE db_path = ?1 ORDER BY id DESC",
10346 fparams![db_path],
10347 |row| {
10348 Ok(EmbeddingJobRow {
10349 id: row.get_typed(0)?,
10350 db_path: row.get_typed(1)?,
10351 model_id: row.get_typed(2)?,
10352 status: row.get_typed(3)?,
10353 total_docs: row.get_typed(4)?,
10354 completed_docs: row.get_typed(5)?,
10355 error_message: row.get_typed(6)?,
10356 created_at: row.get_typed(7)?,
10357 started_at: row.get_typed(8)?,
10358 completed_at: row.get_typed(9)?,
10359 })
10360 },
10361 )
10362 .with_context(|| format!("fetching embedding jobs for {db_path}"))
10363 }
10364
10365 pub fn upsert_embedding_job(
10367 &self,
10368 db_path: &str,
10369 model_id: &str,
10370 total_docs: i64,
10371 ) -> Result<i64> {
10372 let updated = self.conn.execute_compat(
10373 "UPDATE embedding_jobs
10374 SET total_docs = ?3
10375 WHERE db_path = ?1 AND model_id = ?2 AND status IN ('pending', 'running')",
10376 fparams![db_path, model_id, total_docs],
10377 )?;
10378 if updated == 0 {
10379 let insert_result = self.conn.execute_compat(
10380 "INSERT INTO embedding_jobs(db_path, model_id, total_docs) VALUES(?1,?2,?3)",
10381 fparams![db_path, model_id, total_docs],
10382 );
10383 if let Err(err) = insert_result {
10384 if !matches!(err, frankensqlite::FrankenError::UniqueViolation { .. }) {
10385 return Err(err.into());
10386 }
10387 self.conn.execute_compat(
10388 "UPDATE embedding_jobs
10389 SET total_docs = ?3
10390 WHERE db_path = ?1 AND model_id = ?2 AND status IN ('pending', 'running')",
10391 fparams![db_path, model_id, total_docs],
10392 )?;
10393 }
10394 }
10395 self.conn
10396 .query_row_map(
10397 "SELECT id FROM embedding_jobs
10398 WHERE db_path = ?1 AND model_id = ?2 AND status IN ('pending', 'running')
10399 ORDER BY id DESC
10400 LIMIT 1",
10401 fparams![db_path, model_id],
10402 |row| row.get_typed(0),
10403 )
10404 .with_context(|| "resolving embedding job id after upsert")
10405 }
10406
10407 pub fn start_embedding_job(&self, job_id: i64) -> Result<()> {
10409 self.conn.execute_compat(
10410 "UPDATE embedding_jobs SET status = 'running', started_at = datetime('now') WHERE id = ?1",
10411 fparams![job_id],
10412 )?;
10413 Ok(())
10414 }
10415
10416 pub fn complete_embedding_job(&self, job_id: i64) -> Result<()> {
10418 self.conn.execute_compat(
10419 "UPDATE embedding_jobs SET status = 'completed', completed_at = datetime('now') WHERE id = ?1",
10420 fparams![job_id],
10421 )?;
10422 Ok(())
10423 }
10424
10425 pub fn fail_embedding_job(&self, job_id: i64, error: &str) -> Result<()> {
10427 self.conn.execute_compat(
10428 "UPDATE embedding_jobs SET status = 'failed', error_message = ?2, completed_at = datetime('now') WHERE id = ?1",
10429 fparams![job_id, error],
10430 )?;
10431 Ok(())
10432 }
10433
10434 pub fn cancel_embedding_jobs(&self, db_path: &str, model_id: Option<&str>) -> Result<usize> {
10436 if let Some(mid) = model_id {
10437 Ok(self.conn.execute_compat(
10438 "UPDATE embedding_jobs SET status = 'cancelled' WHERE db_path = ?1 AND model_id = ?2 AND status IN ('pending', 'running')",
10439 fparams![db_path, mid],
10440 )?)
10441 } else {
10442 Ok(self.conn.execute_compat(
10443 "UPDATE embedding_jobs SET status = 'cancelled' WHERE db_path = ?1 AND status IN ('pending', 'running')",
10444 fparams![db_path],
10445 )?)
10446 }
10447 }
10448
10449 pub fn update_job_progress(&self, job_id: i64, completed_docs: i64) -> Result<()> {
10451 self.conn.execute_compat(
10452 "UPDATE embedding_jobs SET completed_docs = ?2 WHERE id = ?1",
10453 fparams![job_id, completed_docs],
10454 )?;
10455 Ok(())
10456 }
10457
10458 pub fn count_sessions_in_range(
10467 &self,
10468 start_ts_ms: Option<i64>,
10469 end_ts_ms: Option<i64>,
10470 agent_slug: Option<&str>,
10471 source_id: Option<&str>,
10472 ) -> Result<(i64, bool)> {
10473 let agent = agent_slug.unwrap_or("all");
10474 let source = source_id.unwrap_or("all");
10475
10476 let stats_count: i64 = self
10478 .conn
10479 .query_row_map("SELECT COUNT(*) FROM daily_stats", fparams![], |row| {
10480 row.get_typed(0)
10481 })
10482 .unwrap_or(0);
10483
10484 if stats_count == 0 {
10485 return self.count_sessions_direct(start_ts_ms, end_ts_ms, agent_slug, source_id);
10486 }
10487
10488 let start_day = start_ts_ms.map(Self::day_id_from_millis);
10490 let end_day = end_ts_ms.map(Self::day_id_from_millis);
10491
10492 let count: i64 = match (start_day, end_day) {
10493 (Some(start), Some(end)) => self.conn.query_row_map(
10494 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats
10495 WHERE day_id BETWEEN ?1 AND ?2 AND agent_slug = ?3 AND source_id = ?4",
10496 fparams![start, end, agent, source],
10497 |row| row.get_typed(0),
10498 )?,
10499 (Some(start), None) => self.conn.query_row_map(
10500 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats
10501 WHERE day_id >= ?1 AND agent_slug = ?2 AND source_id = ?3",
10502 fparams![start, agent, source],
10503 |row| row.get_typed(0),
10504 )?,
10505 (None, Some(end)) => self.conn.query_row_map(
10506 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats
10507 WHERE day_id <= ?1 AND agent_slug = ?2 AND source_id = ?3",
10508 fparams![end, agent, source],
10509 |row| row.get_typed(0),
10510 )?,
10511 (None, None) => self.conn.query_row_map(
10512 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats
10513 WHERE agent_slug = ?1 AND source_id = ?2",
10514 fparams![agent, source],
10515 |row| row.get_typed(0),
10516 )?,
10517 };
10518
10519 Ok((count, true))
10520 }
10521
10522 fn count_sessions_direct(
10524 &self,
10525 start_ts_ms: Option<i64>,
10526 end_ts_ms: Option<i64>,
10527 agent_slug: Option<&str>,
10528 source_id: Option<&str>,
10529 ) -> Result<(i64, bool)> {
10530 let mut sql = "SELECT COUNT(*) FROM conversations c WHERE 1=1".to_string();
10537 let mut param_values: Vec<ParamValue> = Vec::new();
10538 let mut idx = 1;
10539
10540 if let Some(start) = start_ts_ms {
10541 sql.push_str(&format!(" AND c.started_at >= ?{idx}"));
10542 param_values.push(ParamValue::from(start));
10543 idx += 1;
10544 }
10545 if let Some(end) = end_ts_ms {
10546 sql.push_str(&format!(" AND c.started_at <= ?{idx}"));
10547 param_values.push(ParamValue::from(end));
10548 idx += 1;
10549 }
10550 if let Some(agent) = agent_slug
10551 && agent != "all"
10552 {
10553 sql.push_str(&format!(
10554 " AND EXISTS (SELECT 1 FROM agents a WHERE a.id = c.agent_id AND a.slug = ?{idx})"
10555 ));
10556 param_values.push(ParamValue::from(agent));
10557 idx += 1;
10558 }
10559 if let Some(source) = source_id
10560 && source != "all"
10561 {
10562 sql.push_str(&format!(" AND c.source_id = ?{idx}"));
10563 param_values.push(ParamValue::from(source));
10564 let _ = idx; }
10566
10567 let count: i64 = self
10568 .conn
10569 .query_row_map(&sql, ¶m_values, |row| row.get_typed(0))?;
10570 Ok((count, false))
10571 }
10572
10573 pub fn get_daily_histogram(
10575 &self,
10576 start_ts_ms: i64,
10577 end_ts_ms: i64,
10578 agent_slug: Option<&str>,
10579 source_id: Option<&str>,
10580 ) -> Result<Vec<DailyCount>> {
10581 let start_day = Self::day_id_from_millis(start_ts_ms);
10582 let end_day = Self::day_id_from_millis(end_ts_ms);
10583 let agent = agent_slug.unwrap_or("all");
10584 let source = source_id.unwrap_or("all");
10585
10586 let rows = self.conn.query_map_collect(
10587 "SELECT day_id, session_count, message_count, total_chars
10588 FROM daily_stats
10589 WHERE day_id BETWEEN ?1 AND ?2 AND agent_slug = ?3 AND source_id = ?4
10590 ORDER BY day_id",
10591 fparams![start_day, end_day, agent, source],
10592 |row| {
10593 Ok(DailyCount {
10594 day_id: row.get_typed(0)?,
10595 sessions: row.get_typed(1)?,
10596 messages: row.get_typed(2)?,
10597 chars: row.get_typed(3)?,
10598 })
10599 },
10600 )?;
10601
10602 Ok(rows)
10603 }
10604
10605 pub fn daily_stats_health(&self) -> Result<DailyStatsHealth> {
10607 let row_count: i64 =
10608 self.conn
10609 .query_row_map("SELECT COUNT(*) FROM daily_stats", fparams![], |row| {
10610 row.get_typed(0)
10611 })?;
10612
10613 let oldest_update: Option<i64> = self.conn.query_row_map(
10614 "SELECT MIN(last_updated) FROM daily_stats",
10615 fparams![],
10616 |row| row.get_typed(0),
10617 )?;
10618
10619 let conversation_count: i64 =
10620 self.conn
10621 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
10622 row.get_typed(0)
10623 })?;
10624
10625 let materialized_total: i64 = self.conn.query_row_map(
10626 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats
10627 WHERE agent_slug = 'all' AND source_id = 'all'",
10628 fparams![],
10629 |row| row.get_typed(0),
10630 )?;
10631
10632 Ok(DailyStatsHealth {
10633 populated: row_count > 0,
10634 row_count,
10635 oldest_update_ms: oldest_update,
10636 conversation_count,
10637 materialized_total,
10638 drift: (conversation_count - materialized_total).abs(),
10639 })
10640 }
10641
10642 pub fn insert_conversations_batched(
10646 &self,
10647 conversations: &[(i64, Option<i64>, &Conversation)],
10648 ) -> Result<Vec<InsertOutcome>> {
10649 if conversations.is_empty() {
10650 return Ok(Vec::new());
10651 }
10652
10653 self.ensure_sources_for_batch(conversations)?;
10654
10655 let defer_lexical_updates = defer_storage_lexical_updates_enabled();
10656 let defer_analytics_updates = defer_analytics_updates_enabled();
10657
10658 let pricing_table = PricingTable::franken_load(&self.conn).unwrap_or_else(|e| {
10659 tracing::warn!(target: "cass::analytics::pricing", error = %e, "failed to load pricing table");
10660 PricingTable { entries: Vec::new() }
10661 });
10662 let mut pricing_diag = PricingDiagnostics::default();
10663
10664 let mut tx = self.conn.transaction()?;
10665
10666 ensure_agents_in_tx(&tx, conversations)?;
10673 ensure_workspaces_in_tx(&tx, conversations)?;
10674 ensure_sources_in_tx(&tx, conversations)?;
10675
10676 let mut outcomes = Vec::with_capacity(conversations.len());
10677 let mut fts_entries = Vec::new();
10678 let mut fts_pending_chars = 0usize;
10679 let mut fts_inserted_total = 0usize;
10680 let mut fts_count_total = 0usize;
10681 let mut stats = StatsAggregator::new();
10682 let mut token_stats = TokenStatsAggregator::new();
10683 let mut token_entries: Vec<TokenUsageEntry> = Vec::new();
10684 let mut metrics_entries: Vec<MessageMetricsEntry> = Vec::new();
10685 let mut rollup_agg = AnalyticsRollupAggregator::new();
10686 let mut conv_ids_to_summarize: Vec<i64> = Vec::new();
10687 let mut pending_conversation_ids: HashMap<PendingConversationKey, i64> = HashMap::new();
10688 let mut pending_message_fingerprints: HashMap<i64, HashMap<i64, MessageMergeFingerprint>> =
10689 HashMap::new();
10690 let mut pending_message_replay_fingerprints: HashMap<
10691 i64,
10692 HashSet<MessageReplayFingerprint>,
10693 > = HashMap::new();
10694
10695 for &(agent_id, workspace_id, raw_conv) in conversations {
10696 let normalized_conv = normalized_conversation_for_storage(raw_conv);
10697 let conv = normalized_conv.as_ref();
10698 let mut total_chars: i64 = 0;
10699 let mut inserted_indices = Vec::with_capacity(conv.messages.len());
10700 let mut inserted_messages: Vec<(i64, &Message)> =
10701 Vec::with_capacity(conv.messages.len());
10702 let mut session_count_delta = 1_i64;
10703 let conversation_key = conversation_merge_key(agent_id, conv);
10704
10705 let existing_conv_id = if let Some(existing_id) =
10706 pending_conversation_ids.get(&conversation_key)
10707 {
10708 Some(*existing_id)
10709 } else {
10710 let existing_id =
10711 franken_find_existing_conversation_by_key(&tx, &conversation_key, Some(conv))?;
10712 if let Some(existing_id) = existing_id {
10713 pending_conversation_ids.insert(conversation_key.clone(), existing_id);
10714 }
10715 existing_id
10716 };
10717
10718 let conv_id = if let Some(existing_id) = existing_conv_id {
10719 session_count_delta = 0;
10720 let ExistingMessageLookup {
10721 by_idx: mut existing_messages,
10722 replay: mut existing_replay_fingerprints,
10723 } = franken_existing_message_lookup_with_pending(
10724 &tx,
10725 existing_id,
10726 &conv.messages,
10727 &mut pending_message_fingerprints,
10728 &mut pending_message_replay_fingerprints,
10729 )?;
10730 let ExistingConversationNewMessages {
10731 messages: new_messages,
10732 new_chars,
10733 idx_collision_count,
10734 first_collision_idx,
10735 } = collect_new_messages_for_existing_conversation(
10736 existing_id,
10737 conv,
10738 &mut existing_messages,
10739 &mut existing_replay_fingerprints,
10740 "skipping replay-equivalent recovered message with shifted idx during batched merge",
10741 );
10742 let (inserted_last_idx, inserted_last_created_at) =
10743 borrowed_messages_tail_state(&new_messages);
10744 let inserted_message_ids =
10745 franken_append_insert_new_messages(&tx, existing_id, &new_messages)?;
10746 total_chars += new_chars;
10747 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
10748 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
10749 if !defer_lexical_updates {
10750 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
10751 fts_count_total += 1;
10752 fts_pending_chars = fts_pending_chars.saturating_add(msg.content.len());
10753 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
10754 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
10755 {
10756 flush_pending_fts_entries(
10757 self,
10758 &tx,
10759 &mut fts_entries,
10760 &mut fts_pending_chars,
10761 &mut fts_inserted_total,
10762 )?;
10763 }
10764 }
10765 inserted_indices.push(msg.idx);
10766 inserted_messages.push((msg_id, msg));
10767 }
10768
10769 if idx_collision_count > 0 {
10770 tracing::warn!(
10771 conversation_id = existing_id,
10772 collision_count = idx_collision_count,
10773 first_idx = first_collision_idx,
10774 source_path = %conv.source_path.display(),
10775 "message idx collisions encountered during batched conversation merge; retaining canonical message variants"
10776 );
10777 }
10778
10779 let conv_last_ts = conv.messages.iter().filter_map(|m| m.created_at).max();
10780 franken_update_conversation_tail_state(
10781 &tx,
10782 existing_id,
10783 conv_last_ts,
10784 inserted_last_idx,
10785 inserted_last_created_at,
10786 )?;
10787 if let Some(lookup_key) = conversation_external_lookup_key_for_conv(agent_id, conv)
10788 {
10789 franken_update_external_conversation_tail_lookup_key(
10790 &tx,
10791 &lookup_key,
10792 conv_last_ts,
10793 inserted_last_idx,
10794 inserted_last_created_at,
10795 )?;
10796 }
10797
10798 pending_message_fingerprints.insert(existing_id, existing_messages);
10799 pending_message_replay_fingerprints
10800 .insert(existing_id, existing_replay_fingerprints);
10801
10802 existing_id
10803 } else {
10804 match franken_insert_conversation_or_get_existing(
10805 &tx,
10806 agent_id,
10807 workspace_id,
10808 conv,
10809 )? {
10810 ConversationInsertStatus::Inserted(new_conv_id) => {
10811 pending_conversation_ids.insert(conversation_key.clone(), new_conv_id);
10812 let pending_messages =
10813 pending_message_fingerprints.entry(new_conv_id).or_default();
10814 let pending_replay_fingerprints = pending_message_replay_fingerprints
10815 .entry(new_conv_id)
10816 .or_default();
10817 let mut new_messages = Vec::new();
10818 for msg in &conv.messages {
10819 let incoming_replay = message_replay_fingerprint(msg);
10820 if pending_messages.contains_key(&msg.idx)
10821 || pending_replay_fingerprints.contains(&incoming_replay)
10822 {
10823 continue;
10824 }
10825 pending_messages.insert(msg.idx, message_merge_fingerprint(msg));
10826 pending_replay_fingerprints.insert(incoming_replay);
10827 new_messages.push(msg);
10828 }
10829 let inserted_message_ids =
10830 franken_batch_insert_new_messages(&tx, new_conv_id, &new_messages)?;
10831 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
10832 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
10833 if !defer_lexical_updates {
10834 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
10835 fts_count_total += 1;
10836 fts_pending_chars =
10837 fts_pending_chars.saturating_add(msg.content.len());
10838 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
10839 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
10840 {
10841 flush_pending_fts_entries(
10842 self,
10843 &tx,
10844 &mut fts_entries,
10845 &mut fts_pending_chars,
10846 &mut fts_inserted_total,
10847 )?;
10848 }
10849 }
10850 total_chars += msg.content.len() as i64;
10851 inserted_indices.push(msg.idx);
10852 inserted_messages.push((msg_id, msg));
10853 }
10854 new_conv_id
10855 }
10856 ConversationInsertStatus::Existing(existing_id) => {
10857 session_count_delta = 0;
10858 pending_conversation_ids.insert(conversation_key.clone(), existing_id);
10859 let ExistingMessageLookup {
10860 by_idx: mut existing_messages,
10861 replay: mut existing_replay_fingerprints,
10862 } = franken_existing_message_lookup_with_pending(
10863 &tx,
10864 existing_id,
10865 &conv.messages,
10866 &mut pending_message_fingerprints,
10867 &mut pending_message_replay_fingerprints,
10868 )?;
10869 let ExistingConversationNewMessages {
10870 messages: new_messages,
10871 new_chars,
10872 idx_collision_count,
10873 first_collision_idx,
10874 } = collect_new_messages_for_existing_conversation(
10875 existing_id,
10876 conv,
10877 &mut existing_messages,
10878 &mut existing_replay_fingerprints,
10879 "skipping replay-equivalent recovered message with shifted idx after duplicate conversation recovery",
10880 );
10881 let (inserted_last_idx, inserted_last_created_at) =
10882 borrowed_messages_tail_state(&new_messages);
10883 let inserted_message_ids =
10884 franken_append_insert_new_messages(&tx, existing_id, &new_messages)?;
10885 total_chars += new_chars;
10886 for (msg_id, msg) in inserted_message_ids.into_iter().zip(new_messages) {
10887 franken_insert_snippets(&tx, msg_id, &msg.snippets)?;
10888 if !defer_lexical_updates {
10889 fts_entries.push(FtsEntry::from_message(msg_id, msg, conv));
10890 fts_count_total += 1;
10891 fts_pending_chars =
10892 fts_pending_chars.saturating_add(msg.content.len());
10893 if fts_entries.len() >= FTS_ENTRY_BATCH_MAX_DOCS
10894 || fts_pending_chars >= FTS_ENTRY_BATCH_MAX_CHARS
10895 {
10896 flush_pending_fts_entries(
10897 self,
10898 &tx,
10899 &mut fts_entries,
10900 &mut fts_pending_chars,
10901 &mut fts_inserted_total,
10902 )?;
10903 }
10904 }
10905 inserted_indices.push(msg.idx);
10906 inserted_messages.push((msg_id, msg));
10907 }
10908
10909 if idx_collision_count > 0 {
10910 tracing::warn!(
10911 conversation_id = existing_id,
10912 collision_count = idx_collision_count,
10913 first_idx = first_collision_idx,
10914 source_path = %conv.source_path.display(),
10915 "message idx collisions encountered after duplicate conversation recovery; retaining canonical message variants"
10916 );
10917 }
10918
10919 let conv_last_ts = conv.messages.iter().filter_map(|m| m.created_at).max();
10920 franken_update_conversation_tail_state(
10921 &tx,
10922 existing_id,
10923 conv_last_ts,
10924 inserted_last_idx,
10925 inserted_last_created_at,
10926 )?;
10927 if let Some(lookup_key) =
10928 conversation_external_lookup_key_for_conv(agent_id, conv)
10929 {
10930 franken_update_external_conversation_tail_lookup_key(
10931 &tx,
10932 &lookup_key,
10933 conv_last_ts,
10934 inserted_last_idx,
10935 inserted_last_created_at,
10936 )?;
10937 }
10938
10939 pending_message_fingerprints.insert(existing_id, existing_messages);
10940 pending_message_replay_fingerprints
10941 .insert(existing_id, existing_replay_fingerprints);
10942
10943 existing_id
10944 }
10945 }
10946 };
10947
10948 if !defer_analytics_updates {
10949 let delta = StatsDelta {
10950 session_count_delta,
10951 message_count_delta: inserted_messages.len() as i64,
10952 total_chars_delta: total_chars,
10953 };
10954
10955 let effective_started_at = conversation_effective_started_at(conv);
10956 let day_id = effective_started_at
10957 .map(FrankenStorage::day_id_from_millis)
10958 .unwrap_or(0);
10959 stats.record_delta(
10960 &conv.agent_slug,
10961 &conv.source_id,
10962 day_id,
10963 delta.session_count_delta,
10964 delta.message_count_delta,
10965 delta.total_chars_delta,
10966 );
10967
10968 let conv_day_id = day_id;
10969 let mut session_model_family = String::from("unknown");
10970 let mut has_any_tokens = false;
10971
10972 for &(message_id, msg) in &inserted_messages {
10973 let role_s = role_str(&msg.role);
10974 let usage = if historical_raw_json(&msg.extra_json).is_some() {
10975 crate::connectors::extract_tokens_for_agent(
10976 &conv.agent_slug,
10977 &serde_json::Value::Null,
10978 &msg.content,
10979 &role_s,
10980 )
10981 } else {
10982 crate::connectors::extract_tokens_for_agent(
10983 &conv.agent_slug,
10984 &msg.extra_json,
10985 &msg.content,
10986 &role_s,
10987 )
10988 };
10989
10990 let msg_ts = msg
10991 .created_at
10992 .or(conversation_effective_started_at(conv))
10993 .unwrap_or(0);
10994 let msg_day_id = if msg_ts > 0 {
10995 FrankenStorage::day_id_from_millis(msg_ts)
10996 } else {
10997 conv_day_id
10998 };
10999
11000 let model_info = usage
11001 .model_name
11002 .as_deref()
11003 .map(crate::connectors::normalize_model);
11004
11005 let model_family = model_info
11006 .as_ref()
11007 .map(|i| i.family.clone())
11008 .unwrap_or_else(|| "unknown".into());
11009 let model_tier = model_info
11010 .as_ref()
11011 .map(|i| i.tier.clone())
11012 .unwrap_or_else(|| "unknown".into());
11013 let provider = usage
11014 .provider
11015 .clone()
11016 .or_else(|| model_info.as_ref().map(|i| i.provider.clone()))
11017 .unwrap_or_else(|| "unknown".into());
11018
11019 if model_family != "unknown" {
11020 session_model_family = model_family.clone();
11021 }
11022
11023 let estimated_cost = pricing_table.compute_cost(
11024 usage.model_name.as_deref(),
11025 msg_day_id,
11026 usage.input_tokens,
11027 usage.output_tokens,
11028 usage.cache_read_tokens,
11029 usage.cache_creation_tokens,
11030 );
11031 if estimated_cost.is_some() {
11032 pricing_diag.record_priced();
11033 } else if usage.has_token_data() {
11034 pricing_diag.record_unpriced(usage.model_name.as_deref());
11035 }
11036
11037 token_stats.record(
11038 &conv.agent_slug,
11039 &conv.source_id,
11040 msg_day_id,
11041 &model_family,
11042 &role_s,
11043 &usage,
11044 msg.content.len() as i64,
11045 estimated_cost.unwrap_or(0.0),
11046 );
11047
11048 if usage.has_token_data() {
11049 has_any_tokens = true;
11050 }
11051
11052 let content_chars = msg.content.len() as i64;
11053 let content_tokens_est = content_chars / 4;
11054 let msg_hour_id = FrankenStorage::hour_id_from_millis(msg_ts);
11055 let has_plan = has_plan_for_role(&role_s, &msg.content);
11056
11057 token_entries.push(TokenUsageEntry {
11058 message_id,
11059 conversation_id: conv_id,
11060 agent_id,
11061 workspace_id,
11062 source_id: conv.source_id.clone(),
11063 timestamp_ms: msg_ts,
11064 day_id: msg_day_id,
11065 model_name: usage.model_name.clone(),
11066 model_family: Some(model_family.clone()),
11067 model_tier: Some(model_tier.clone()),
11068 service_tier: usage.service_tier.clone(),
11069 provider: Some(provider.clone()),
11070 input_tokens: usage.input_tokens,
11071 output_tokens: usage.output_tokens,
11072 cache_read_tokens: usage.cache_read_tokens,
11073 cache_creation_tokens: usage.cache_creation_tokens,
11074 thinking_tokens: usage.thinking_tokens,
11075 total_tokens: usage.total_tokens(),
11076 estimated_cost_usd: estimated_cost,
11077 role: role_s.to_string(),
11078 content_chars,
11079 has_tool_calls: usage.has_tool_calls,
11080 tool_call_count: usage.tool_call_count,
11081 data_source: usage.data_source.as_str().to_string(),
11082 });
11083
11084 let mm = MessageMetricsEntry {
11085 message_id,
11086 created_at_ms: msg_ts,
11087 hour_id: msg_hour_id,
11088 day_id: msg_day_id,
11089 agent_slug: conv.agent_slug.clone(),
11090 workspace_id: workspace_id.unwrap_or(0),
11091 source_id: conv.source_id.clone(),
11092 role: role_s.to_string(),
11093 content_chars,
11094 content_tokens_est,
11095 model_name: usage.model_name.clone(),
11096 model_family: model_family.clone(),
11097 model_tier: model_tier.clone(),
11098 provider,
11099 api_input_tokens: usage.input_tokens,
11100 api_output_tokens: usage.output_tokens,
11101 api_cache_read_tokens: usage.cache_read_tokens,
11102 api_cache_creation_tokens: usage.cache_creation_tokens,
11103 api_thinking_tokens: usage.thinking_tokens,
11104 api_service_tier: usage.service_tier.clone(),
11105 api_data_source: usage.data_source.as_str().to_string(),
11106 tool_call_count: usage.tool_call_count as i64,
11107 has_tool_calls: usage.has_tool_calls,
11108 has_plan,
11109 };
11110 rollup_agg.record(&mm);
11111 metrics_entries.push(mm);
11112 }
11113
11114 if session_count_delta > 0 {
11115 token_stats.record_session(
11116 &conv.agent_slug,
11117 &conv.source_id,
11118 conv_day_id,
11119 &session_model_family,
11120 );
11121 }
11122
11123 if has_any_tokens {
11124 conv_ids_to_summarize.push(conv_id);
11125 }
11126 }
11127
11128 outcomes.push(InsertOutcome {
11129 conversation_id: conv_id,
11130 conversation_inserted: session_count_delta > 0,
11131 inserted_indices,
11132 });
11133 }
11134
11135 if !defer_lexical_updates {
11137 flush_pending_fts_entries(
11138 self,
11139 &tx,
11140 &mut fts_entries,
11141 &mut fts_pending_chars,
11142 &mut fts_inserted_total,
11143 )?;
11144 }
11145 if !defer_lexical_updates && fts_count_total > 0 {
11146 tracing::debug!(
11147 target: "cass::perf::fts5",
11148 total = fts_count_total,
11149 inserted = fts_inserted_total,
11150 conversations = conversations.len(),
11151 "franken_batch_fts_insert_complete"
11152 );
11153 }
11154
11155 if !defer_analytics_updates && !stats.is_empty() {
11157 let entries = stats.expand();
11158 let affected = franken_update_daily_stats_batched_in_tx(&tx, &entries)?;
11159 tracing::debug!(
11160 target: "cass::perf::daily_stats",
11161 raw = stats.raw_entry_count(),
11162 expanded = entries.len(),
11163 affected = affected,
11164 "franken_batched_stats_update_complete"
11165 );
11166 }
11167
11168 if !defer_analytics_updates && !token_entries.is_empty() {
11170 let token_count = token_entries.len();
11171 let inserted = franken_insert_token_usage_batched_in_tx(&tx, &token_entries)?;
11172 tracing::debug!(
11173 target: "cass::perf::token_usage",
11174 total = token_count,
11175 inserted = inserted,
11176 "franken_batch_token_usage_insert_complete"
11177 );
11178 }
11179
11180 if !defer_analytics_updates && !token_stats.is_empty() {
11182 let entries = token_stats.expand();
11183 let affected = franken_update_token_daily_stats_batched_in_tx(&tx, &entries)?;
11184 tracing::debug!(
11185 target: "cass::perf::token_daily_stats",
11186 raw = token_stats.raw_entry_count(),
11187 expanded = entries.len(),
11188 affected = affected,
11189 "franken_batched_token_stats_update_complete"
11190 );
11191 }
11192
11193 if !defer_analytics_updates && !metrics_entries.is_empty() {
11195 let mm_count = metrics_entries.len();
11196 let inserted = franken_insert_message_metrics_batched_in_tx(&tx, &metrics_entries)?;
11197 tracing::debug!(
11198 target: "cass::perf::message_metrics",
11199 total = mm_count,
11200 inserted = inserted,
11201 "franken_batch_message_metrics_insert_complete"
11202 );
11203 }
11204
11205 if !defer_analytics_updates && !rollup_agg.is_empty() {
11207 let (hourly, daily, models_daily) =
11208 franken_flush_analytics_rollups_in_tx(&tx, &rollup_agg)?;
11209 tracing::debug!(
11210 target: "cass::perf::usage_rollups",
11211 hourly_buckets = rollup_agg.hourly_entry_count(),
11212 daily_buckets = rollup_agg.daily_entry_count(),
11213 models_daily_buckets = rollup_agg.models_daily_entry_count(),
11214 hourly_affected = hourly,
11215 daily_affected = daily,
11216 models_daily_affected = models_daily,
11217 "franken_batched_usage_rollups_complete"
11218 );
11219 }
11220
11221 if !defer_analytics_updates {
11223 for conv_id in &conv_ids_to_summarize {
11224 franken_update_conversation_token_summaries_in_tx(&tx, *conv_id)?;
11225 }
11226 }
11227
11228 tx.commit()?;
11229
11230 pricing_diag.log_summary();
11231
11232 Ok(outcomes)
11233 }
11234}
11235
11236fn normalized_storage_source_parts(
11237 source_id: Option<&str>,
11238 origin_kind: Option<&str>,
11239 origin_host: Option<&str>,
11240) -> (String, SourceKind, Option<String>) {
11241 let host_label = crate::search::tantivy::normalized_index_origin_host(origin_host);
11242 let source_id = crate::search::tantivy::normalized_index_source_id(
11243 source_id,
11244 origin_kind,
11245 host_label.as_deref(),
11246 );
11247
11248 if source_id == LOCAL_SOURCE_ID {
11249 (source_id, SourceKind::Local, None)
11250 } else {
11251 (source_id, SourceKind::Ssh, host_label)
11252 }
11253}
11254
11255fn normalized_source_for_conversation(conv: &Conversation) -> Source {
11256 let (id, kind, host_label) = normalized_storage_source_parts(
11257 Some(conv.source_id.as_str()),
11258 None,
11259 conv.origin_host.as_deref(),
11260 );
11261 Source {
11262 id,
11263 kind,
11264 host_label,
11265 machine_id: None,
11266 platform: None,
11267 config_json: None,
11268 created_at: None,
11269 updated_at: None,
11270 }
11271}
11272
11273fn is_bootstrap_local_source(source: &Source) -> bool {
11274 source.id == LOCAL_SOURCE_ID
11275 && matches!(source.kind, SourceKind::Local)
11276 && source.host_label.is_none()
11277 && source.machine_id.is_none()
11278 && source.platform.is_none()
11279 && source.config_json.is_none()
11280 && source.created_at.is_none()
11281 && source.updated_at.is_none()
11282}
11283
11284fn normalized_conversation_for_storage<'a>(conv: &'a Conversation) -> Cow<'a, Conversation> {
11285 let normalized_source = normalized_source_for_conversation(conv);
11286 if normalized_source.id == conv.source_id && normalized_source.host_label == conv.origin_host {
11287 Cow::Borrowed(conv)
11288 } else {
11289 let mut normalized = conv.clone();
11290 normalized.source_id = normalized_source.id;
11291 normalized.origin_host = normalized_source.host_label;
11292 Cow::Owned(normalized)
11293 }
11294}
11295
11296impl FrankenStorage {
11297 fn ensure_source_for_conversation(&self, conv: &Conversation) -> Result<()> {
11298 let source = normalized_source_for_conversation(conv);
11299 if is_bootstrap_local_source(&source) {
11300 return Ok(());
11303 }
11304 let cache_key = EnsuredConversationSourceKey::from_source(&source);
11305 if self.conversation_source_already_ensured(&cache_key) {
11306 return Ok(());
11307 }
11308 self.upsert_source(&source)?;
11309 self.mark_conversation_source_ensured(cache_key);
11310 Ok(())
11311 }
11312
11313 fn ensure_sources_for_batch(
11314 &self,
11315 conversations: &[(i64, Option<i64>, &Conversation)],
11316 ) -> Result<()> {
11317 let mut seen = HashSet::with_capacity(conversations.len());
11318 for &(_, _, conv) in conversations {
11319 let source = normalized_source_for_conversation(conv);
11320 if seen.insert(source.id.clone()) {
11321 if is_bootstrap_local_source(&source) {
11322 continue;
11323 }
11324 self.upsert_source(&source)?;
11325 self.mark_conversation_source_ensured(EnsuredConversationSourceKey::from_source(
11326 &source,
11327 ));
11328 }
11329 }
11330 Ok(())
11331 }
11332}
11333
11334fn franken_last_rowid(tx: &FrankenTransaction<'_>) -> Result<i64> {
11340 tx.last_insert_rowid()
11341 .ok()
11342 .filter(|&id| id > 0)
11343 .with_context(|| "last_insert_rowid() returned NULL or 0 after INSERT")
11344}
11345
11346fn ensure_agents_in_tx(
11352 tx: &FrankenTransaction<'_>,
11353 conversations: &[(i64, Option<i64>, &Conversation)],
11354) -> Result<()> {
11355 let mut seen = HashSet::new();
11356 let now = FrankenStorage::now_millis();
11357 for &(agent_id, _, conv) in conversations {
11358 if !seen.insert(agent_id) {
11359 continue;
11360 }
11361 let exists: i64 = tx.query_row_map(
11362 "SELECT COUNT(*) FROM agents WHERE id = ?1",
11363 fparams![agent_id],
11364 |row| row.get_typed(0),
11365 )?;
11366 if exists == 0 {
11367 tracing::debug!(
11368 target: "cass::fk_guard",
11369 agent_id,
11370 slug = %conv.agent_slug,
11371 "inserting agent row inside transaction to satisfy FK constraint"
11372 );
11373 tx.execute_compat(
11377 "INSERT OR IGNORE INTO agents(id, slug, name, kind, created_at, updated_at)
11378 VALUES(?1, ?2, ?3, 'cli', ?4, ?5)",
11379 fparams![
11380 agent_id,
11381 conv.agent_slug.as_str(),
11382 conv.agent_slug.as_str(),
11383 now,
11384 now
11385 ],
11386 )?;
11387 }
11388 }
11389 Ok(())
11390}
11391
11392fn ensure_workspaces_in_tx(
11395 tx: &FrankenTransaction<'_>,
11396 conversations: &[(i64, Option<i64>, &Conversation)],
11397) -> Result<()> {
11398 let mut seen = HashSet::new();
11399 for &(_, workspace_id, conv) in conversations {
11400 let ws_id = match workspace_id {
11401 Some(id) => id,
11402 None => continue,
11403 };
11404 if !seen.insert(ws_id) {
11405 continue;
11406 }
11407 let exists: i64 = tx.query_row_map(
11408 "SELECT COUNT(*) FROM workspaces WHERE id = ?1",
11409 fparams![ws_id],
11410 |row| row.get_typed(0),
11411 )?;
11412 if exists == 0 {
11413 let path_str = conv
11414 .workspace
11415 .as_ref()
11416 .map(|p| p.to_string_lossy().to_string())
11417 .unwrap_or_default();
11418 tracing::debug!(
11419 target: "cass::fk_guard",
11420 workspace_id = ws_id,
11421 path = %path_str,
11422 "inserting workspace row inside transaction to satisfy FK constraint"
11423 );
11424 tx.execute_compat(
11425 "INSERT OR IGNORE INTO workspaces(id, path) VALUES(?1, ?2)",
11426 fparams![ws_id, path_str.as_str()],
11427 )?;
11428 }
11429 }
11430 Ok(())
11431}
11432
11433fn ensure_sources_in_tx(
11437 tx: &FrankenTransaction<'_>,
11438 conversations: &[(i64, Option<i64>, &Conversation)],
11439) -> Result<()> {
11440 let mut seen = HashSet::new();
11441 for &(_, _, conv) in conversations {
11442 let (source_id, source_kind, host_label) = normalized_storage_source_parts(
11443 Some(conv.source_id.as_str()),
11444 None,
11445 conv.origin_host.as_deref(),
11446 );
11447 if !seen.insert(source_id.clone()) {
11448 continue;
11449 }
11450 let exists: i64 = tx.query_row_map(
11451 "SELECT COUNT(*) FROM sources WHERE id = ?1",
11452 fparams![source_id.as_str()],
11453 |row| row.get_typed(0),
11454 )?;
11455 if exists == 0 {
11456 let kind_str = source_kind.to_string();
11457 let now = FrankenStorage::now_millis();
11458 tracing::debug!(
11459 target: "cass::fk_guard",
11460 source_id = %source_id,
11461 kind = kind_str.as_str(),
11462 "inserting source row inside transaction to satisfy FK constraint"
11463 );
11464 tx.execute_compat(
11465 "INSERT OR IGNORE INTO sources(id, kind, host_label, created_at, updated_at)
11466 VALUES(?1, ?2, ?3, ?4, ?5)",
11467 fparams![
11468 source_id.as_str(),
11469 kind_str.as_str(),
11470 host_label.as_deref(),
11471 now,
11472 now
11473 ],
11474 )?;
11475 }
11476 }
11477 Ok(())
11478}
11479
11480fn env_flag_enabled(name: &str) -> bool {
11481 dotenvy::var(name)
11482 .map(|v| !(v == "0" || v.eq_ignore_ascii_case("false")))
11483 .unwrap_or(false)
11484}
11485
11486fn defer_storage_lexical_updates_enabled() -> bool {
11487 env_flag_enabled("CASS_DEFER_LEXICAL_UPDATES")
11488}
11489
11490fn defer_analytics_updates_enabled() -> bool {
11491 env_flag_enabled("CASS_DEFER_ANALYTICS_UPDATES")
11492}
11493
11494enum ConversationInsertStatus {
11495 Inserted(i64),
11496 Existing(i64),
11497}
11498
11499fn franken_find_external_conversation_tail_lookup(
11500 tx: &FrankenTransaction<'_>,
11501 lookup_key: &str,
11502) -> Result<Option<ExistingConversationWithTail>> {
11503 let params = [SqliteValue::from(lookup_key)];
11504 let row = tx
11505 .query_row_with_params(
11506 "SELECT conversation_id, ended_at, last_message_idx, last_message_created_at
11507 FROM conversation_external_tail_lookup
11508 WHERE lookup_key = ?1",
11509 ¶ms,
11510 )
11511 .optional()?;
11512 let Some(row) = row else {
11513 return Ok(None);
11514 };
11515 let id = row.get_typed(0)?;
11516 let ended_at = row.get_typed(1)?;
11517 let last_message_idx = row.get_typed(2)?;
11518 let last_message_created_at = row.get_typed(3)?;
11519 Ok(Some(ExistingConversationWithTail {
11520 id,
11521 tail_state: existing_conversation_tail_state_from_cached(
11522 last_message_idx,
11523 last_message_created_at,
11524 ended_at,
11525 ),
11526 }))
11527}
11528
11529fn franken_find_external_conversation_lookup(
11530 tx: &FrankenTransaction<'_>,
11531 lookup_key: &str,
11532) -> Result<Option<i64>> {
11533 Ok(franken_find_external_conversation_tail_lookup(tx, lookup_key)?.map(|existing| existing.id))
11534}
11535
11536fn franken_insert_external_conversation_tail_lookup_key(
11537 tx: &FrankenTransaction<'_>,
11538 lookup_key: &str,
11539 conversation_id: i64,
11540 ended_at: Option<i64>,
11541 last_message_idx: Option<i64>,
11542 last_message_created_at: Option<i64>,
11543) -> Result<()> {
11544 let params = [
11545 SqliteValue::from(lookup_key),
11546 SqliteValue::from(conversation_id),
11547 SqliteValue::from(ended_at),
11548 SqliteValue::from(last_message_idx),
11549 SqliteValue::from(last_message_created_at),
11550 ];
11551 tx.execute_with_params(
11552 "INSERT OR REPLACE INTO conversation_external_tail_lookup(
11553 lookup_key, conversation_id, ended_at, last_message_idx, last_message_created_at
11554 ) VALUES(?1, ?2, ?3, ?4, ?5)",
11555 ¶ms,
11556 )?;
11557 Ok(())
11558}
11559
11560fn franken_insert_external_conversation_tail_lookup(
11561 tx: &FrankenTransaction<'_>,
11562 source_id: &str,
11563 agent_id: i64,
11564 external_id: &str,
11565 existing: ExistingConversationWithTail,
11566) -> Result<()> {
11567 let lookup_key = conversation_external_lookup_key(source_id, agent_id, external_id);
11568 let ended_at = existing.tail_state.and_then(|state| state.ended_at);
11569 let last_message_idx = existing.tail_state.map(|state| state.last_message_idx);
11570 let last_message_created_at = existing
11571 .tail_state
11572 .map(|state| state.last_message_created_at);
11573 franken_insert_external_conversation_tail_lookup_key(
11574 tx,
11575 &lookup_key,
11576 existing.id,
11577 ended_at,
11578 last_message_idx,
11579 last_message_created_at,
11580 )
11581}
11582
11583fn franken_update_external_conversation_tail_lookup_key(
11584 tx: &FrankenTransaction<'_>,
11585 lookup_key: &str,
11586 ended_at_candidate: Option<i64>,
11587 last_message_idx_candidate: Option<i64>,
11588 last_message_created_at_candidate: Option<i64>,
11589) -> Result<()> {
11590 if ended_at_candidate.is_none()
11591 && last_message_idx_candidate.is_none()
11592 && last_message_created_at_candidate.is_none()
11593 {
11594 return Ok(());
11595 }
11596 tx.execute_compat(
11597 "UPDATE conversation_external_tail_lookup
11598 SET ended_at = CASE
11599 WHEN ?1 IS NULL THEN ended_at
11600 ELSE MAX(IFNULL(ended_at, 0), ?1)
11601 END,
11602 last_message_idx = CASE
11603 WHEN ?2 IS NULL THEN last_message_idx
11604 WHEN last_message_idx IS NULL OR last_message_idx < ?2 THEN ?2
11605 ELSE last_message_idx
11606 END,
11607 last_message_created_at = CASE
11608 WHEN ?3 IS NULL THEN last_message_created_at
11609 WHEN last_message_created_at IS NULL OR last_message_created_at < ?3 THEN ?3
11610 ELSE last_message_created_at
11611 END
11612 WHERE lookup_key = ?4",
11613 fparams![
11614 ended_at_candidate,
11615 last_message_idx_candidate,
11616 last_message_created_at_candidate,
11617 lookup_key
11618 ],
11619 )?;
11620 Ok(())
11621}
11622
11623fn franken_set_external_conversation_tail_lookup_after_append(
11624 tx: &FrankenTransaction<'_>,
11625 lookup_key: &str,
11626 ended_at: i64,
11627 last_message_idx: i64,
11628 last_message_created_at: i64,
11629) -> Result<()> {
11630 tx.execute_compat(
11631 "UPDATE conversation_external_tail_lookup
11632 SET ended_at = ?1,
11633 last_message_idx = ?2,
11634 last_message_created_at = ?3
11635 WHERE lookup_key = ?4",
11636 fparams![
11637 ended_at,
11638 last_message_idx,
11639 last_message_created_at,
11640 lookup_key
11641 ],
11642 )?;
11643 Ok(())
11644}
11645
11646fn franken_update_external_conversation_tail_after_append(
11647 tx: &FrankenTransaction<'_>,
11648 agent_id: i64,
11649 conv: &Conversation,
11650 used_append_tail_plan: bool,
11651 exact_append_set: bool,
11652 inserted_last_idx: Option<i64>,
11653 inserted_last_created_at: Option<i64>,
11654) -> Result<()> {
11655 let Some(lookup_key) = conversation_external_lookup_key_for_conv(agent_id, conv) else {
11656 return Ok(());
11657 };
11658
11659 if exact_append_set
11660 && let (Some(last_message_idx), Some(last_message_created_at)) =
11661 (inserted_last_idx, inserted_last_created_at)
11662 {
11663 return franken_set_external_conversation_tail_lookup_after_append(
11664 tx,
11665 &lookup_key,
11666 last_message_created_at,
11667 last_message_idx,
11668 last_message_created_at,
11669 );
11670 }
11671
11672 let ended_at_candidate = if used_append_tail_plan {
11673 inserted_last_created_at
11674 } else {
11675 conv.messages.iter().filter_map(|m| m.created_at).max()
11676 };
11677 franken_update_external_conversation_tail_lookup_key(
11678 tx,
11679 &lookup_key,
11680 ended_at_candidate,
11681 inserted_last_idx,
11682 inserted_last_created_at,
11683 )
11684}
11685
11686fn franken_find_existing_conversation_by_key(
11687 tx: &FrankenTransaction<'_>,
11688 key: &PendingConversationKey,
11689 conv: Option<&Conversation>,
11690) -> Result<Option<i64>> {
11691 franken_find_existing_conversation_by_key_impl(tx, key, conv, false)
11692}
11693
11694fn franken_find_existing_conversation_by_key_after_conflict(
11695 tx: &FrankenTransaction<'_>,
11696 key: &PendingConversationKey,
11697 conv: Option<&Conversation>,
11698) -> Result<Option<i64>> {
11699 franken_find_existing_conversation_by_key_impl(tx, key, conv, true)
11700}
11701
11702fn franken_find_existing_conversation_by_key_impl(
11703 tx: &FrankenTransaction<'_>,
11704 key: &PendingConversationKey,
11705 conv: Option<&Conversation>,
11706 allow_legacy_external_scan: bool,
11707) -> Result<Option<i64>> {
11708 match key {
11709 PendingConversationKey::External {
11710 source_id,
11711 agent_id,
11712 external_id,
11713 } => {
11714 let lookup_key = conversation_external_lookup_key(source_id, *agent_id, external_id);
11715 if let Some(existing_id) = franken_find_external_conversation_lookup(tx, &lookup_key)? {
11716 return Ok(Some(existing_id));
11717 }
11718 if !allow_legacy_external_scan {
11719 return Ok(None);
11720 }
11721
11722 let existing_id = tx
11723 .query_row_map(
11724 "SELECT id
11725 FROM conversations
11726 WHERE source_id = ?1 AND agent_id = ?2 AND external_id = ?3",
11727 fparams![source_id.as_str(), *agent_id, external_id.as_str()],
11728 |row| row.get_typed(0),
11729 )
11730 .optional()?;
11731 if let Some(existing_id) = existing_id {
11732 let tail_state = franken_existing_conversation_append_tail_state(tx, existing_id)?;
11733 franken_insert_external_conversation_tail_lookup_key(
11734 tx,
11735 &lookup_key,
11736 existing_id,
11737 tail_state.and_then(|state| state.ended_at),
11738 tail_state.map(|state| state.last_message_idx),
11739 tail_state.map(|state| state.last_message_created_at),
11740 )?;
11741 Ok(Some(existing_id))
11742 } else {
11743 Ok(None)
11744 }
11745 }
11746 PendingConversationKey::SourcePath {
11747 source_id,
11748 agent_id,
11749 source_path,
11750 started_at,
11751 } => {
11752 let exact_match = tx
11753 .query_row_map(
11754 "SELECT c.id
11755 FROM conversations c
11756 WHERE c.source_id = ?1
11757 AND c.agent_id = ?2
11758 AND c.source_path = ?3
11759 AND ((
11760 COALESCE(
11761 c.started_at,
11762 (SELECT MIN(created_at)
11763 FROM messages
11764 WHERE conversation_id = c.id
11765 AND created_at IS NOT NULL)
11766 ) IS NULL
11767 AND ?4 IS NULL
11768 ) OR COALESCE(
11769 c.started_at,
11770 (SELECT MIN(created_at)
11771 FROM messages
11772 WHERE conversation_id = c.id
11773 AND created_at IS NOT NULL)
11774 ) = ?4)
11775 ORDER BY c.id
11776 LIMIT 1",
11777 fparams![
11778 source_id.as_str(),
11779 *agent_id,
11780 source_path.as_str(),
11781 *started_at
11782 ],
11783 |row| row.get_typed(0),
11784 )
11785 .optional()?;
11786 if exact_match.is_some() {
11787 return Ok(exact_match);
11788 }
11789
11790 let Some(conv) = conv else {
11791 return Ok(None);
11792 };
11793 let incoming_fingerprints = conversation_message_fingerprints(conv);
11794 if incoming_fingerprints.is_empty() {
11795 return Ok(None);
11796 }
11797 let incoming_replay_fingerprints = conversation_message_replay_fingerprints(conv);
11798
11799 let candidates: Vec<(i64, Option<i64>)> = tx.query_map_collect(
11800 "SELECT
11801 c.id,
11802 COALESCE(
11803 c.started_at,
11804 (SELECT MIN(created_at)
11805 FROM messages
11806 WHERE conversation_id = c.id
11807 AND created_at IS NOT NULL)
11808 ) AS effective_started_at
11809 FROM conversations c
11810 WHERE c.source_id = ?1
11811 AND c.agent_id = ?2
11812 AND c.source_path = ?3
11813 ORDER BY c.id",
11814 fparams![source_id.as_str(), *agent_id, source_path.as_str()],
11815 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
11816 )?;
11817
11818 let mut best_candidate: Option<(i64, ConversationMergeEvidence)> = None;
11819 for (candidate_id, candidate_started_at) in candidates {
11820 let existing_fingerprints =
11821 franken_existing_message_fingerprints(tx, candidate_id)?;
11822 let existing_replay_fingerprints =
11823 replay_fingerprints_from_merge_set(&existing_fingerprints);
11824 let Some(evidence) = conversation_merge_evidence(
11825 &incoming_fingerprints,
11826 &incoming_replay_fingerprints,
11827 &existing_fingerprints,
11828 &existing_replay_fingerprints,
11829 *started_at,
11830 candidate_started_at,
11831 ) else {
11832 continue;
11833 };
11834
11835 let candidate_key = (
11836 evidence.exact_overlap,
11837 evidence.replay_overlap,
11838 evidence.started_close,
11839 evidence.smaller_replay_set,
11840 std::cmp::Reverse(evidence.start_distance_ms),
11841 );
11842 let should_replace = best_candidate
11843 .as_ref()
11844 .map(|(_, best_evidence)| {
11845 candidate_key
11846 > (
11847 best_evidence.exact_overlap,
11848 best_evidence.replay_overlap,
11849 best_evidence.started_close,
11850 best_evidence.smaller_replay_set,
11851 std::cmp::Reverse(best_evidence.start_distance_ms),
11852 )
11853 })
11854 .unwrap_or(true);
11855
11856 if should_replace {
11857 best_candidate = Some((candidate_id, evidence));
11858 }
11859 }
11860
11861 Ok(best_candidate.map(|(candidate_id, _)| candidate_id))
11862 }
11863 }
11864}
11865
11866fn franken_insert_conversation_or_get_existing(
11867 tx: &FrankenTransaction<'_>,
11868 agent_id: i64,
11869 workspace_id: Option<i64>,
11870 conv: &Conversation,
11871) -> Result<ConversationInsertStatus> {
11872 let conversation_key = conversation_merge_key(agent_id, conv);
11873 if let Some(existing_id) =
11874 franken_find_existing_conversation_by_key(tx, &conversation_key, Some(conv))?
11875 {
11876 return Ok(ConversationInsertStatus::Existing(existing_id));
11877 }
11878
11879 franken_insert_conversation_or_get_existing_after_miss(
11880 tx,
11881 agent_id,
11882 workspace_id,
11883 conv,
11884 &conversation_key,
11885 )
11886}
11887
11888fn franken_insert_conversation_or_get_existing_after_miss(
11889 tx: &FrankenTransaction<'_>,
11890 agent_id: i64,
11891 workspace_id: Option<i64>,
11892 conv: &Conversation,
11893 conversation_key: &PendingConversationKey,
11894) -> Result<ConversationInsertStatus> {
11895 match franken_insert_conversation(tx, agent_id, workspace_id, conv) {
11896 Ok(Some(conv_id)) => Ok(ConversationInsertStatus::Inserted(conv_id)),
11897 Ok(None) => {
11898 let existing_id =
11901 franken_find_existing_conversation_by_key_after_conflict(
11902 tx,
11903 conversation_key,
11904 Some(conv),
11905 )?
11906 .with_context(|| {
11907 format!(
11908 "conversation INSERT produced a duplicate conflict but existing row was not found for source_id={} agent_id={} external_id={:?} source_path={}",
11909 conv.source_id,
11910 agent_id,
11911 conv.external_id,
11912 conv.source_path.display()
11913 )
11914 })?;
11915 tracing::warn!(
11916 source_id = %conv.source_id,
11917 agent_id,
11918 external_id = ?conv.external_id,
11919 existing_id,
11920 source_path = %conv.source_path.display(),
11921 "conversation INSERT: duplicate gracefully recovered, reusing existing row"
11922 );
11923 Ok(ConversationInsertStatus::Existing(existing_id))
11924 }
11925 Err(error) => {
11926 tracing::error!(
11927 source_id = %conv.source_id,
11928 agent_id,
11929 external_id = ?conv.external_id,
11930 error = %error,
11931 source_path = %conv.source_path.display(),
11932 "franken_insert_conversation failed"
11933 );
11934 Err(error)
11935 }
11936 }
11937}
11938
11939fn franken_insert_conversation(
11945 tx: &FrankenTransaction<'_>,
11946 agent_id: i64,
11947 workspace_id: Option<i64>,
11948 conv: &Conversation,
11949) -> Result<Option<i64>> {
11950 let (metadata_json_str, metadata_bin) = franken_metadata_insert_payload(&conv.metadata_json)?;
11951 let (last_message_idx, last_message_created_at) = conversation_tail_state(conv);
11952 let metadata_bin_bytes = metadata_bin.as_deref();
11953
11954 match tx.execute_compat(
11955 "INSERT INTO conversations(
11956 agent_id, workspace_id, source_id, external_id, title, source_path,
11957 started_at, ended_at, approx_tokens, metadata_json, origin_host, metadata_bin,
11958 last_message_idx, last_message_created_at
11959 ) VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14)",
11960 fparams![
11961 agent_id,
11962 workspace_id,
11963 conv.source_id.as_str(),
11964 conv.external_id.as_deref(),
11965 conv.title.as_deref(),
11966 path_to_string(&conv.source_path),
11967 conv.started_at,
11968 conv.ended_at,
11969 conv.approx_tokens,
11970 metadata_json_str.as_deref(),
11971 conv.origin_host.as_deref(),
11972 metadata_bin_bytes,
11973 last_message_idx,
11974 last_message_created_at
11975 ],
11976 ) {
11977 Ok(_) => {
11978 let conv_id = franken_last_rowid(tx)?;
11979 franken_insert_conversation_tail_state(
11980 tx,
11981 conv_id,
11982 conv.ended_at,
11983 last_message_idx,
11984 last_message_created_at,
11985 )?;
11986 if let Some(external_id) = conv.external_id.as_deref() {
11987 franken_insert_external_conversation_tail_lookup(
11988 tx,
11989 conv.source_id.as_str(),
11990 agent_id,
11991 external_id,
11992 ExistingConversationWithTail {
11993 id: conv_id,
11994 tail_state: existing_conversation_tail_state_from_cached(
11995 last_message_idx,
11996 last_message_created_at,
11997 conv.ended_at,
11998 ),
11999 },
12000 )?;
12001 }
12002 Ok(Some(conv_id))
12003 }
12004 Err(frankensqlite::FrankenError::UniqueViolation { .. }) => {
12005 tracing::debug!(
12006 source_id = %conv.source_id,
12007 agent_id,
12008 external_id = ?conv.external_id,
12009 source_path = %conv.source_path.display(),
12010 "conversation INSERT: duplicate provenance conflict"
12011 );
12012 Ok(None)
12013 }
12014 Err(error) => Err(error.into()),
12015 }
12016}
12017
12018type MetadataInsertPayload<'a> = (Option<Cow<'a, str>>, Option<Vec<u8>>);
12019
12020fn franken_metadata_insert_payload(value: &serde_json::Value) -> Result<MetadataInsertPayload<'_>> {
12021 if let Some(raw) = historical_raw_json(value) {
12022 Ok((Some(Cow::Borrowed(raw)), None))
12023 } else if value.is_null() {
12024 Ok((Some(Cow::Borrowed("null")), None))
12025 } else if value.as_object().is_some_and(|object| object.is_empty()) {
12026 Ok((None, None))
12027 } else if let Some(metadata_bin) = serialize_json_to_msgpack(value) {
12028 Ok((None, Some(metadata_bin)))
12029 } else {
12030 Ok((Some(Cow::Owned(serde_json::to_string(value)?)), None))
12031 }
12032}
12033
12034fn franken_insert_new_message(
12035 tx: &FrankenTransaction<'_>,
12036 conversation_id: i64,
12037 msg: &Message,
12038) -> Result<i64> {
12039 let (extra_json_str, extra_bin) = franken_message_insert_payload(msg)?;
12040 let extra_bin_bytes = extra_bin.as_deref();
12041
12042 tx.execute_compat(
12043 "INSERT INTO messages(conversation_id, idx, role, author, created_at, content, extra_json, extra_bin)
12044 VALUES(?1,?2,?3,?4,?5,?6,?7,?8)",
12045 fparams![
12046 conversation_id,
12047 msg.idx,
12048 role_as_str(&msg.role),
12049 msg.author.as_deref(),
12050 msg.created_at,
12051 msg.content.as_str(),
12052 extra_json_str.as_deref(),
12053 extra_bin_bytes
12054 ],
12055 )?;
12056 franken_last_rowid(tx)
12057}
12058
12059type MessageInsertPayload<'a> = (Option<Cow<'a, str>>, Option<Vec<u8>>);
12060
12061fn franken_message_insert_payload(msg: &Message) -> Result<MessageInsertPayload<'_>> {
12062 if let Some(raw) = historical_raw_json(&msg.extra_json) {
12063 Ok((Some(Cow::Borrowed(raw)), None))
12064 } else if msg.extra_json.is_null() {
12065 Ok((None, None))
12066 } else {
12067 let extra_bin = serialize_json_to_msgpack(&msg.extra_json);
12068 if extra_bin.is_some() {
12069 Ok((None, extra_bin))
12070 } else {
12071 Ok((
12072 Some(Cow::Owned(serde_json::to_string(&msg.extra_json)?)),
12073 None,
12074 ))
12075 }
12076 }
12077}
12078
12079const MESSAGE_INSERT_BATCH_SIZE: usize = 100;
12084
12085const APPEND_MESSAGE_INSERT_BATCH_SIZE: usize = 50;
12091
12092fn message_insert_batch_sql(row_count: usize) -> &'static str {
12093 static MESSAGE_INSERT_BATCH_SQL: std::sync::OnceLock<Vec<String>> = std::sync::OnceLock::new();
12094
12095 let max_batch_size = MESSAGE_INSERT_BATCH_SIZE.max(APPEND_MESSAGE_INSERT_BATCH_SIZE);
12096 let cached_sql = MESSAGE_INSERT_BATCH_SQL.get_or_init(|| {
12097 let mut sql_by_row_count = Vec::with_capacity(max_batch_size + 1);
12098 sql_by_row_count.push(String::new());
12099 for row_count in 1..=max_batch_size {
12100 let placeholders = (0..row_count)
12101 .map(|idx| {
12102 let base = idx * 8;
12103 format!(
12104 "(?{},?{},?{},?{},?{},?{},?{},?{})",
12105 base + 1,
12106 base + 2,
12107 base + 3,
12108 base + 4,
12109 base + 5,
12110 base + 6,
12111 base + 7,
12112 base + 8
12113 )
12114 })
12115 .collect::<Vec<_>>()
12116 .join(",");
12117 sql_by_row_count.push(format!(
12118 "INSERT INTO messages(conversation_id, idx, role, author, created_at, content, extra_json, extra_bin) VALUES {placeholders}"
12119 ));
12120 }
12121 sql_by_row_count
12122 });
12123
12124 cached_sql
12125 .get(row_count)
12126 .map(String::as_str)
12127 .expect("message insert batch size must be covered by the cached SQL table")
12128}
12129
12130fn franken_batch_insert_new_messages(
12131 tx: &FrankenTransaction<'_>,
12132 conversation_id: i64,
12133 messages: &[&Message],
12134) -> Result<Vec<i64>> {
12135 franken_batch_insert_new_messages_with_batch_size(
12136 tx,
12137 conversation_id,
12138 messages,
12139 MESSAGE_INSERT_BATCH_SIZE,
12140 )
12141}
12142
12143fn franken_append_insert_new_messages(
12144 tx: &FrankenTransaction<'_>,
12145 conversation_id: i64,
12146 messages: &[&Message],
12147) -> Result<Vec<i64>> {
12148 franken_batch_insert_new_messages_with_batch_size(
12149 tx,
12150 conversation_id,
12151 messages,
12152 APPEND_MESSAGE_INSERT_BATCH_SIZE,
12153 )
12154}
12155
12156fn franken_batch_insert_new_messages_with_batch_size(
12157 tx: &FrankenTransaction<'_>,
12158 conversation_id: i64,
12159 messages: &[&Message],
12160 batch_size: usize,
12161) -> Result<Vec<i64>> {
12162 let batch_size = batch_size.max(1);
12163 let mut inserted_ids = Vec::with_capacity(messages.len());
12164 for chunk in messages.chunks(batch_size) {
12165 if chunk.len() == 1 {
12166 inserted_ids.push(franken_insert_new_message(tx, conversation_id, chunk[0])?);
12167 continue;
12168 }
12169 let sql = message_insert_batch_sql(chunk.len());
12170
12171 let mut param_values: Vec<SqliteValue> = Vec::with_capacity(chunk.len() * 8);
12172 for msg in chunk {
12173 let (extra_json_str, extra_bin) = franken_message_insert_payload(msg)?;
12174 param_values.push(SqliteValue::from(conversation_id));
12175 param_values.push(SqliteValue::from(msg.idx));
12176 param_values.push(SqliteValue::from(role_as_str(&msg.role)));
12177 param_values.push(SqliteValue::from(msg.author.as_deref()));
12178 param_values.push(SqliteValue::from(msg.created_at));
12179 param_values.push(SqliteValue::from(msg.content.as_str()));
12180 param_values.push(SqliteValue::from(extra_json_str.as_deref()));
12181 param_values.push(SqliteValue::from(extra_bin.as_deref()));
12182 }
12183
12184 tx.execute_with_params(sql, ¶m_values)?;
12185
12186 let last_id = franken_last_rowid(tx)?;
12187 let first_id = last_id
12188 .checked_sub((chunk.len() - 1) as i64)
12189 .with_context(|| {
12190 format!(
12191 "inferring rowid range for {}-row message batch ending at {last_id}",
12192 chunk.len()
12193 )
12194 })?;
12195 inserted_ids.extend((0..chunk.len()).map(|offset| first_id + offset as i64));
12196 }
12197
12198 Ok(inserted_ids)
12199}
12200
12201#[cfg(test)]
12202fn franken_insert_new_message_with_profile(
12203 tx: &FrankenTransaction<'_>,
12204 conversation_id: i64,
12205 msg: &Message,
12206 profile: &mut MessageInsertSubstageProfile,
12207) -> Result<i64> {
12208 profile.single_row_calls += 1;
12209 profile.batch_rows += 1;
12210
12211 let payload_start = Instant::now();
12212 let (extra_json_str, extra_bin) = franken_message_insert_payload(msg)?;
12213 profile.payload_duration += payload_start.elapsed();
12214 let extra_bin_bytes = extra_bin.as_deref();
12215
12216 let execute_start = Instant::now();
12217 tx.execute_compat(
12218 "INSERT INTO messages(conversation_id, idx, role, author, created_at, content, extra_json, extra_bin)
12219 VALUES(?1,?2,?3,?4,?5,?6,?7,?8)",
12220 fparams![
12221 conversation_id,
12222 msg.idx,
12223 role_as_str(&msg.role),
12224 msg.author.as_deref(),
12225 msg.created_at,
12226 msg.content.as_str(),
12227 extra_json_str.as_deref(),
12228 extra_bin_bytes
12229 ],
12230 )?;
12231 profile.execute_duration += execute_start.elapsed();
12232
12233 let rowid_start = Instant::now();
12234 let rowid = franken_last_rowid(tx)?;
12235 profile.rowid_duration += rowid_start.elapsed();
12236 Ok(rowid)
12237}
12238
12239#[cfg(test)]
12240fn franken_batch_insert_new_messages_with_profile(
12241 tx: &FrankenTransaction<'_>,
12242 conversation_id: i64,
12243 messages: &[&Message],
12244 profile: &mut MessageInsertSubstageProfile,
12245) -> Result<Vec<i64>> {
12246 franken_batch_insert_new_messages_with_profile_batch_size(
12247 tx,
12248 conversation_id,
12249 messages,
12250 profile,
12251 MESSAGE_INSERT_BATCH_SIZE,
12252 )
12253}
12254
12255#[cfg(test)]
12256fn franken_append_insert_new_messages_with_profile(
12257 tx: &FrankenTransaction<'_>,
12258 conversation_id: i64,
12259 messages: &[&Message],
12260 profile: &mut MessageInsertSubstageProfile,
12261) -> Result<Vec<i64>> {
12262 franken_batch_insert_new_messages_with_profile_batch_size(
12263 tx,
12264 conversation_id,
12265 messages,
12266 profile,
12267 APPEND_MESSAGE_INSERT_BATCH_SIZE,
12268 )
12269}
12270
12271#[cfg(test)]
12272fn franken_batch_insert_new_messages_with_profile_batch_size(
12273 tx: &FrankenTransaction<'_>,
12274 conversation_id: i64,
12275 messages: &[&Message],
12276 profile: &mut MessageInsertSubstageProfile,
12277 batch_size: usize,
12278) -> Result<Vec<i64>> {
12279 let batch_size = batch_size.max(1);
12280 let mut inserted_ids = Vec::with_capacity(messages.len());
12281 for chunk in messages.chunks(batch_size) {
12282 if chunk.len() == 1 {
12283 inserted_ids.push(franken_insert_new_message_with_profile(
12284 tx,
12285 conversation_id,
12286 chunk[0],
12287 profile,
12288 )?);
12289 continue;
12290 }
12291
12292 profile.batch_calls += 1;
12293 profile.batch_rows += chunk.len();
12294
12295 let sql_build_start = Instant::now();
12296 let sql = message_insert_batch_sql(chunk.len());
12297 profile.sql_build_duration += sql_build_start.elapsed();
12298
12299 let mut param_values: Vec<SqliteValue> = Vec::with_capacity(chunk.len() * 8);
12300 for msg in chunk {
12301 let payload_start = Instant::now();
12302 let (extra_json_str, extra_bin) = franken_message_insert_payload(msg)?;
12303 profile.payload_duration += payload_start.elapsed();
12304
12305 let param_build_start = Instant::now();
12306 param_values.push(SqliteValue::from(conversation_id));
12307 param_values.push(SqliteValue::from(msg.idx));
12308 param_values.push(SqliteValue::from(role_as_str(&msg.role)));
12309 param_values.push(SqliteValue::from(msg.author.as_deref()));
12310 param_values.push(SqliteValue::from(msg.created_at));
12311 param_values.push(SqliteValue::from(msg.content.as_str()));
12312 param_values.push(SqliteValue::from(extra_json_str.as_deref()));
12313 param_values.push(SqliteValue::from(extra_bin.as_deref()));
12314 profile.param_build_duration += param_build_start.elapsed();
12315 }
12316
12317 let execute_start = Instant::now();
12318 tx.execute_with_params(sql, ¶m_values)?;
12319 profile.execute_duration += execute_start.elapsed();
12320
12321 let rowid_start = Instant::now();
12322 let last_id = franken_last_rowid(tx)?;
12323 let first_id = last_id
12324 .checked_sub((chunk.len() - 1) as i64)
12325 .with_context(|| {
12326 format!(
12327 "inferring rowid range for {}-row message batch ending at {last_id}",
12328 chunk.len()
12329 )
12330 })?;
12331 inserted_ids.extend((0..chunk.len()).map(|offset| first_id + offset as i64));
12332 profile.rowid_duration += rowid_start.elapsed();
12333 }
12334
12335 Ok(inserted_ids)
12336}
12337
12338fn franken_insert_snippets(
12340 tx: &FrankenTransaction<'_>,
12341 message_id: i64,
12342 snippets: &[Snippet],
12343) -> Result<()> {
12344 for snip in snippets {
12345 let file_path_str = snip.file_path.as_ref().map(path_to_string);
12346 tx.execute_compat(
12347 "INSERT INTO snippets(message_id, file_path, start_line, end_line, language, snippet_text)
12348 VALUES(?1,?2,?3,?4,?5,?6)",
12349 fparams![
12350 message_id,
12351 file_path_str.as_deref(),
12352 snip.start_line,
12353 snip.end_line,
12354 snip.language.as_deref(),
12355 snip.snippet_text.as_deref()
12356 ],
12357 )?;
12358 }
12359 Ok(())
12360}
12361
12362fn franken_existing_message_fingerprints(
12363 tx: &FrankenTransaction<'_>,
12364 conversation_id: i64,
12365) -> Result<HashSet<MessageMergeFingerprint>> {
12366 let rows = tx.query_params(
12367 "SELECT idx, role, author, created_at, content
12368 FROM messages
12369 WHERE conversation_id = ?1",
12370 fparams![conversation_id],
12371 )?;
12372 let mut fingerprints = HashSet::with_capacity(rows.len());
12373 for row in rows {
12374 let role: String = row.get_typed(1)?;
12375 let content: String = row.get_typed(4)?;
12376 fingerprints.insert(MessageMergeFingerprint {
12377 idx: row.get_typed(0)?,
12378 created_at: row.get_typed(3)?,
12379 role: role_from_str(&role),
12380 author: row.get_typed(2)?,
12381 content_hash: *blake3::hash(content.as_bytes()).as_bytes(),
12382 });
12383 }
12384 Ok(fingerprints)
12385}
12386
12387struct ExistingMessageLookup {
12388 by_idx: HashMap<i64, MessageMergeFingerprint>,
12389 replay: HashSet<MessageReplayFingerprint>,
12390}
12391
12392fn franken_existing_message_lookup(
12393 tx: &FrankenTransaction<'_>,
12394 conversation_id: i64,
12395 incoming_messages: &[Message],
12396) -> Result<ExistingMessageLookup> {
12397 if incoming_messages.is_empty() {
12398 return Ok(ExistingMessageLookup {
12399 by_idx: HashMap::new(),
12400 replay: HashSet::new(),
12401 });
12402 }
12403
12404 let min_idx = incoming_messages
12405 .iter()
12406 .map(|msg| msg.idx)
12407 .min()
12408 .unwrap_or(0);
12409 let max_idx = incoming_messages
12410 .iter()
12411 .map(|msg| msg.idx)
12412 .max()
12413 .unwrap_or(min_idx);
12414 let requires_full_scan = incoming_messages.iter().any(|msg| msg.created_at.is_none());
12415 let created_bounds = incoming_messages
12416 .iter()
12417 .filter_map(|msg| msg.created_at)
12418 .fold(None, |bounds: Option<(i64, i64)>, created_at| {
12419 Some(match bounds {
12420 Some((min_created_at, max_created_at)) => (
12421 min_created_at.min(created_at),
12422 max_created_at.max(created_at),
12423 ),
12424 None => (created_at, created_at),
12425 })
12426 });
12427
12428 let mut indexed_by_idx = HashMap::with_capacity(incoming_messages.len());
12429 let mut indexed_replay = HashSet::with_capacity(incoming_messages.len());
12430 let mut exact_idx_match = true;
12431 for msg in incoming_messages {
12432 record_message_lookup_exact_idx_probe();
12433 let Some((role, author, created_at, content)) = tx
12434 .query_row_map(
12435 "SELECT role, author, created_at, content
12436 FROM messages INDEXED BY sqlite_autoindex_messages_1
12437 WHERE conversation_id = ?1 AND idx = ?2
12438 LIMIT 1",
12439 fparams![conversation_id, msg.idx],
12440 |row| {
12441 Ok((
12442 row.get_typed::<String>(0)?,
12443 row.get_typed::<Option<String>>(1)?,
12444 row.get_typed::<Option<i64>>(2)?,
12445 row.get_typed::<String>(3)?,
12446 ))
12447 },
12448 )
12449 .optional()?
12450 else {
12451 exact_idx_match = false;
12452 break;
12453 };
12454 let role = role_from_str(&role);
12455 let content_hash = *blake3::hash(content.as_bytes()).as_bytes();
12456 let fingerprint = MessageMergeFingerprint {
12457 idx: msg.idx,
12458 created_at,
12459 role: role.clone(),
12460 author: author.clone(),
12461 content_hash,
12462 };
12463 if fingerprint != message_merge_fingerprint(msg) {
12464 exact_idx_match = false;
12465 break;
12466 }
12467 indexed_by_idx.insert(msg.idx, fingerprint);
12468 indexed_replay.insert(MessageReplayFingerprint {
12469 created_at,
12470 role,
12471 author,
12472 content_hash,
12473 });
12474 }
12475
12476 if exact_idx_match {
12477 return Ok(ExistingMessageLookup {
12478 by_idx: indexed_by_idx,
12479 replay: indexed_replay,
12480 });
12481 }
12482
12483 let (rows, replay_full_scan) = if requires_full_scan {
12484 let rows = tx.query_params(
12485 "SELECT idx, role, author, created_at, content
12486 FROM messages INDEXED BY sqlite_autoindex_messages_1
12487 WHERE conversation_id = ?1",
12488 fparams![conversation_id],
12489 )?;
12490 record_message_lookup_full_scan_query(rows.len());
12491 (rows, true)
12492 } else if let Some((min_created_at, max_created_at)) = created_bounds {
12493 let mut rows = tx.query_params(
12494 "SELECT idx, role, author, created_at, content
12495 FROM messages INDEXED BY sqlite_autoindex_messages_1
12496 WHERE conversation_id = ?1
12497 AND idx >= ?2
12498 AND idx <= ?3",
12499 fparams![conversation_id, min_idx, max_idx],
12500 )?;
12501 rows.extend(tx.query_params(
12502 "SELECT idx, role, author, created_at, content
12503 FROM messages INDEXED BY sqlite_autoindex_messages_1
12504 WHERE conversation_id = ?1
12505 AND created_at IS NOT NULL
12506 AND created_at >= ?2
12507 AND created_at <= ?3",
12508 fparams![conversation_id, min_created_at, max_created_at],
12509 )?);
12510 record_message_lookup_bounded_queries(2, rows.len());
12511 (rows, false)
12512 } else {
12513 let rows = tx.query_params(
12514 "SELECT idx, role, author, created_at, content
12515 FROM messages INDEXED BY sqlite_autoindex_messages_1
12516 WHERE conversation_id = ?1",
12517 fparams![conversation_id],
12518 )?;
12519 record_message_lookup_full_scan_query(rows.len());
12520 (rows, true)
12521 };
12522
12523 let mut by_idx = HashMap::with_capacity(rows.len());
12524 let mut replay = HashSet::with_capacity(rows.len());
12525 for row in rows {
12526 let idx: i64 = row.get_typed(0)?;
12527 let role: String = row.get_typed(1)?;
12528 let author: Option<String> = row.get_typed(2)?;
12529 let created_at: Option<i64> = row.get_typed(3)?;
12530 let content: String = row.get_typed(4)?;
12531 let role = role_from_str(&role);
12532 let content_hash = *blake3::hash(content.as_bytes()).as_bytes();
12533
12534 if idx >= min_idx && idx <= max_idx {
12535 by_idx.insert(
12536 idx,
12537 MessageMergeFingerprint {
12538 idx,
12539 created_at,
12540 role: role.clone(),
12541 author: author.clone(),
12542 content_hash,
12543 },
12544 );
12545 }
12546
12547 let replay_matches = if replay_full_scan {
12548 true
12549 } else if let Some((min_created_at, max_created_at)) = created_bounds {
12550 created_at.is_some_and(|ts| ts >= min_created_at && ts <= max_created_at)
12551 } else {
12552 true
12553 };
12554 if replay_matches {
12555 replay.insert(MessageReplayFingerprint {
12556 created_at,
12557 role,
12558 author,
12559 content_hash,
12560 });
12561 }
12562 }
12563
12564 Ok(ExistingMessageLookup { by_idx, replay })
12565}
12566
12567fn franken_existing_message_lookup_with_pending(
12568 tx: &FrankenTransaction<'_>,
12569 conversation_id: i64,
12570 incoming_messages: &[Message],
12571 pending_message_fingerprints: &mut HashMap<i64, HashMap<i64, MessageMergeFingerprint>>,
12572 pending_message_replay_fingerprints: &mut HashMap<i64, HashSet<MessageReplayFingerprint>>,
12573) -> Result<ExistingMessageLookup> {
12574 if let (Some(by_idx), Some(replay)) = (
12575 pending_message_fingerprints.get(&conversation_id),
12576 pending_message_replay_fingerprints.get(&conversation_id),
12577 ) {
12578 if incoming_messages.iter().all(|msg| {
12579 by_idx.contains_key(&msg.idx) || replay.contains(&message_replay_fingerprint(msg))
12580 }) {
12581 return Ok(ExistingMessageLookup {
12582 by_idx: by_idx.clone(),
12583 replay: replay.clone(),
12584 });
12585 }
12586
12587 let fresh = franken_existing_message_lookup(tx, conversation_id, incoming_messages)?;
12588 let mut merged_by_idx = by_idx.clone();
12589 let mut merged_replay = replay.clone();
12590 merged_by_idx.extend(fresh.by_idx);
12591 merged_replay.extend(fresh.replay);
12592 pending_message_fingerprints.insert(conversation_id, merged_by_idx.clone());
12593 pending_message_replay_fingerprints.insert(conversation_id, merged_replay.clone());
12594 return Ok(ExistingMessageLookup {
12595 by_idx: merged_by_idx,
12596 replay: merged_replay,
12597 });
12598 }
12599
12600 let lookup = franken_existing_message_lookup(tx, conversation_id, incoming_messages)?;
12601 pending_message_fingerprints.insert(conversation_id, lookup.by_idx.clone());
12602 pending_message_replay_fingerprints.insert(conversation_id, lookup.replay.clone());
12603 Ok(lookup)
12604}
12605
12606fn franken_batch_insert_fts(tx: &FrankenTransaction<'_>, entries: &[FtsEntry]) -> Result<usize> {
12608 if entries.is_empty() {
12609 return Ok(0);
12610 }
12611
12612 let mut inserted = 0;
12613
12614 for chunk in entries.chunks(FTS5_BATCH_SIZE) {
12615 let placeholders: String = chunk
12616 .iter()
12617 .enumerate()
12618 .map(|(i, _)| {
12619 let base = i * 7 + 1; format!(
12621 "(?{},?{},?{},?{},?{},?{},?{})",
12622 base,
12623 base + 1,
12624 base + 2,
12625 base + 3,
12626 base + 4,
12627 base + 5,
12628 base + 6
12629 )
12630 })
12631 .collect::<Vec<_>>()
12632 .join(",");
12633
12634 let sql = format!(
12635 "INSERT INTO fts_messages(rowid, content, title, agent, workspace, source_path, created_at) VALUES {placeholders}"
12636 );
12637
12638 let mut param_values: Vec<SqliteValue> = Vec::with_capacity(chunk.len() * 7);
12639 for entry in chunk {
12640 param_values.push(SqliteValue::from(entry.message_id));
12641 param_values.push(SqliteValue::from(entry.content.as_str()));
12642 param_values.push(SqliteValue::from(entry.title.as_str()));
12643 param_values.push(SqliteValue::from(entry.agent.as_str()));
12644 param_values.push(SqliteValue::from(entry.workspace.as_str()));
12645 param_values.push(SqliteValue::from(entry.source_path.as_str()));
12646 param_values.push(SqliteValue::from(entry.created_at));
12647 }
12648
12649 match tx.execute_with_params(&sql, ¶m_values) {
12650 Ok(_) => {
12651 inserted += chunk.len();
12652 }
12653 Err(err) => {
12654 tracing::warn!(
12655 error = %err,
12656 chunk_docs = chunk.len(),
12657 "frankensqlite FTS batch insert failed; skipping db-resident FTS maintenance because Tantivy is authoritative"
12658 );
12659 return Ok(inserted);
12660 }
12661 }
12662 }
12663
12664 Ok(inserted)
12665}
12666
12667fn franken_batch_insert_fts_on_connection(
12668 conn: &FrankenConnection,
12669 entries: &[FtsEntry],
12670) -> Result<usize> {
12671 if entries.is_empty() {
12672 return Ok(0);
12673 }
12674
12675 let mut inserted = 0;
12676
12677 for chunk in entries.chunks(FTS5_BATCH_SIZE) {
12678 let placeholders: String = chunk
12679 .iter()
12680 .enumerate()
12681 .map(|(i, _)| {
12682 let base = i * 7 + 1;
12683 format!(
12684 "(?{},?{},?{},?{},?{},?{},?{})",
12685 base,
12686 base + 1,
12687 base + 2,
12688 base + 3,
12689 base + 4,
12690 base + 5,
12691 base + 6
12692 )
12693 })
12694 .collect::<Vec<_>>()
12695 .join(",");
12696
12697 let sql = format!(
12698 "INSERT INTO fts_messages(rowid, content, title, agent, workspace, source_path, created_at) VALUES {placeholders}"
12699 );
12700
12701 let mut param_values: Vec<SqliteValue> = Vec::with_capacity(chunk.len() * 7);
12702 for entry in chunk {
12703 param_values.push(SqliteValue::from(entry.message_id));
12704 param_values.push(SqliteValue::from(entry.content.as_str()));
12705 param_values.push(SqliteValue::from(entry.title.as_str()));
12706 param_values.push(SqliteValue::from(entry.agent.as_str()));
12707 param_values.push(SqliteValue::from(entry.workspace.as_str()));
12708 param_values.push(SqliteValue::from(entry.source_path.as_str()));
12709 param_values.push(SqliteValue::from(entry.created_at));
12710 }
12711
12712 conn.execute_with_params(&sql, ¶m_values)
12713 .with_context(|| {
12714 format!(
12715 "inserting {} rows into fts_messages during streaming FTS maintenance",
12716 chunk.len()
12717 )
12718 })?;
12719 inserted += chunk.len();
12720 }
12721
12722 Ok(inserted)
12723}
12724
12725fn franken_update_daily_stats_in_tx(
12727 storage: &FrankenStorage,
12728 tx: &FrankenTransaction<'_>,
12729 agent_slug: &str,
12730 source_id: &str,
12731 started_at: Option<i64>,
12732 delta: StatsDelta,
12733) -> Result<()> {
12734 let day_id = started_at
12735 .map(FrankenStorage::day_id_from_millis)
12736 .unwrap_or(0);
12737 let now = FrankenStorage::now_millis();
12738
12739 let targets = [
12740 DailyStatsTarget {
12741 day_id,
12742 agent_slug,
12743 source_id,
12744 },
12745 DailyStatsTarget {
12746 day_id,
12747 agent_slug: "all",
12748 source_id,
12749 },
12750 DailyStatsTarget {
12751 day_id,
12752 agent_slug,
12753 source_id: "all",
12754 },
12755 DailyStatsTarget {
12756 day_id,
12757 agent_slug: "all",
12758 source_id: "all",
12759 },
12760 ];
12761
12762 if agent_slug != "all"
12763 && source_id != "all"
12764 && franken_update_ensured_daily_stats_targets_in_tx(storage, tx, &targets, now, delta)?
12765 {
12766 return Ok(());
12767 }
12768
12769 for target in targets {
12770 franken_apply_daily_stats_delta_in_tx(storage, tx, target, now, delta)?;
12771 }
12772
12773 Ok(())
12774}
12775
12776#[derive(Clone, Copy)]
12777struct DailyStatsTarget<'a> {
12778 day_id: i64,
12779 agent_slug: &'a str,
12780 source_id: &'a str,
12781}
12782
12783fn franken_update_ensured_daily_stats_targets_in_tx(
12784 storage: &FrankenStorage,
12785 tx: &FrankenTransaction<'_>,
12786 targets: &[DailyStatsTarget<'_>; 4],
12787 now: i64,
12788 delta: StatsDelta,
12789) -> Result<bool> {
12790 let cache_keys = targets.map(|target| {
12791 EnsuredDailyStatsKey::new(target.day_id, target.agent_slug, target.source_id)
12792 });
12793 if !storage.daily_stats_keys_already_ensured(&cache_keys) {
12794 return Ok(false);
12795 }
12796
12797 let primary = targets[0];
12798 let rows_changed = tx.execute_compat(
12799 "UPDATE daily_stats
12800 SET session_count = session_count + ?4,
12801 message_count = message_count + ?5,
12802 total_chars = total_chars + ?6,
12803 last_updated = ?7
12804 WHERE day_id = ?1
12805 AND ((agent_slug = ?2 AND source_id = ?3)
12806 OR (agent_slug = 'all' AND source_id = ?3)
12807 OR (agent_slug = ?2 AND source_id = 'all')
12808 OR (agent_slug = 'all' AND source_id = 'all'))",
12809 fparams![
12810 primary.day_id,
12811 primary.agent_slug,
12812 primary.source_id,
12813 delta.session_count_delta,
12814 delta.message_count_delta,
12815 delta.total_chars_delta,
12816 now
12817 ],
12818 )?;
12819 if rows_changed == targets.len() {
12820 return Ok(true);
12821 }
12822
12823 for (target, cache_key) in targets.iter().copied().zip(cache_keys) {
12824 let exists = tx
12825 .query_row_map(
12826 "SELECT 1 FROM daily_stats
12827 WHERE day_id = ?1 AND agent_slug = ?2 AND source_id = ?3
12828 LIMIT 1",
12829 fparams![target.day_id, target.agent_slug, target.source_id],
12830 |row| row.get_typed::<i64>(0),
12831 )
12832 .optional()?
12833 .is_some();
12834 if exists {
12835 continue;
12836 }
12837
12838 tx.execute_compat(
12839 "INSERT INTO daily_stats(day_id, agent_slug, source_id, session_count, message_count, total_chars, last_updated)
12840 VALUES(?1,?2,?3,?4,?5,?6,?7)",
12841 fparams![
12842 target.day_id,
12843 target.agent_slug,
12844 target.source_id,
12845 delta.session_count_delta,
12846 delta.message_count_delta,
12847 delta.total_chars_delta,
12848 now
12849 ],
12850 )?;
12851 storage.mark_daily_stats_key_ensured(cache_key);
12852 }
12853
12854 Ok(true)
12855}
12856
12857fn franken_apply_daily_stats_delta_in_tx(
12858 storage: &FrankenStorage,
12859 tx: &FrankenTransaction<'_>,
12860 target: DailyStatsTarget<'_>,
12861 now: i64,
12862 delta: StatsDelta,
12863) -> Result<()> {
12864 let cache_key = EnsuredDailyStatsKey::new(target.day_id, target.agent_slug, target.source_id);
12865 if storage.daily_stats_key_already_ensured(&cache_key) {
12866 let rows_changed = tx.execute_compat(
12867 "UPDATE daily_stats
12868 SET session_count = session_count + ?4,
12869 message_count = message_count + ?5,
12870 total_chars = total_chars + ?6,
12871 last_updated = ?7
12872 WHERE day_id = ?1 AND agent_slug = ?2 AND source_id = ?3",
12873 fparams![
12874 target.day_id,
12875 target.agent_slug,
12876 target.source_id,
12877 delta.session_count_delta,
12878 delta.message_count_delta,
12879 delta.total_chars_delta,
12880 now
12881 ],
12882 )?;
12883 if rows_changed > 0 {
12884 return Ok(());
12885 }
12886 }
12887
12888 tx.execute_compat(
12889 "INSERT INTO daily_stats(day_id, agent_slug, source_id, session_count, message_count, total_chars, last_updated)
12890 VALUES(?1,?2,?3,?4,?5,?6,?7)
12891 ON CONFLICT(day_id, agent_slug, source_id) DO UPDATE SET
12892 session_count = session_count + excluded.session_count,
12893 message_count = message_count + excluded.message_count,
12894 total_chars = total_chars + excluded.total_chars,
12895 last_updated = excluded.last_updated",
12896 fparams![
12897 target.day_id,
12898 target.agent_slug,
12899 target.source_id,
12900 delta.session_count_delta,
12901 delta.message_count_delta,
12902 delta.total_chars_delta,
12903 now
12904 ],
12905 )?;
12906 storage.mark_daily_stats_key_ensured(cache_key);
12907 Ok(())
12908}
12909
12910fn franken_update_daily_stats_batched_in_tx(
12916 tx: &FrankenTransaction<'_>,
12917 entries: &[(i64, String, String, StatsDelta)],
12918) -> Result<usize> {
12919 if entries.is_empty() {
12920 return Ok(0);
12921 }
12922
12923 let now = FrankenStorage::now_millis();
12924 let mut total_affected = 0;
12925
12926 for (day_id, agent, source, delta) in entries {
12931 total_affected += tx.execute_compat(
12932 "INSERT INTO daily_stats (day_id, agent_slug, source_id, session_count, message_count, total_chars, last_updated)
12933 VALUES(?1,?2,?3,?4,?5,?6,?7)
12934 ON CONFLICT(day_id, agent_slug, source_id) DO UPDATE SET
12935 session_count = session_count + excluded.session_count,
12936 message_count = message_count + excluded.message_count,
12937 total_chars = total_chars + excluded.total_chars,
12938 last_updated = excluded.last_updated",
12939 fparams![
12940 *day_id,
12941 agent.as_str(),
12942 source.as_str(),
12943 delta.session_count_delta,
12944 delta.message_count_delta,
12945 delta.total_chars_delta,
12946 now
12947 ],
12948 )?;
12949 }
12950
12951 Ok(total_affected)
12952}
12953
12954fn franken_insert_token_usage_batched_in_tx(
12960 tx: &FrankenTransaction<'_>,
12961 entries: &[TokenUsageEntry],
12962) -> Result<usize> {
12963 if entries.is_empty() {
12964 return Ok(0);
12965 }
12966
12967 let mut total_inserted = 0;
12968
12969 for e in entries {
12970 let params_vec: Vec<ParamValue> = vec![
12971 ParamValue::from(e.message_id),
12972 ParamValue::from(e.conversation_id),
12973 ParamValue::from(e.agent_id),
12974 ParamValue::from(e.workspace_id),
12975 ParamValue::from(e.source_id.clone()),
12976 ParamValue::from(e.timestamp_ms),
12977 ParamValue::from(e.day_id),
12978 ParamValue::from(e.model_name.clone()),
12979 ParamValue::from(e.model_family.clone()),
12980 ParamValue::from(e.model_tier.clone()),
12981 ParamValue::from(e.service_tier.clone()),
12982 ParamValue::from(e.provider.clone()),
12983 ParamValue::from(e.input_tokens),
12984 ParamValue::from(e.output_tokens),
12985 ParamValue::from(e.cache_read_tokens),
12986 ParamValue::from(e.cache_creation_tokens),
12987 ParamValue::from(e.thinking_tokens),
12988 ParamValue::from(e.total_tokens),
12989 ParamValue::from(e.estimated_cost_usd),
12990 ParamValue::from(e.role.clone()),
12991 ParamValue::from(e.content_chars),
12992 ParamValue::from(e.has_tool_calls as i64),
12993 ParamValue::from(e.tool_call_count as i64),
12994 ParamValue::from(e.data_source.clone()),
12995 ];
12996
12997 let values = param_slice_to_values(¶ms_vec);
12998 total_inserted += tx.execute_with_params(
12999 "INSERT OR IGNORE INTO token_usage (
13000 message_id, conversation_id, agent_id, workspace_id, source_id,
13001 timestamp_ms, day_id,
13002 model_name, model_family, model_tier, service_tier, provider,
13003 input_tokens, output_tokens, cache_read_tokens, cache_creation_tokens,
13004 thinking_tokens, total_tokens, estimated_cost_usd,
13005 role, content_chars, has_tool_calls, tool_call_count, data_source
13006 )
13007 VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19,?20,?21,?22,?23,?24)",
13008 &values,
13009 )?;
13010 }
13011
13012 Ok(total_inserted)
13013}
13014
13015fn franken_update_token_daily_stats_batched_in_tx(
13017 tx: &FrankenTransaction<'_>,
13018 entries: &[(i64, String, String, String, TokenStatsDelta)],
13019) -> Result<usize> {
13020 if entries.is_empty() {
13021 return Ok(0);
13022 }
13023
13024 let now = FrankenStorage::now_millis();
13025 let mut total_affected = 0;
13026
13027 for (day_id, agent, source, model, delta) in entries {
13028 total_affected += tx.execute_compat(
13029 "INSERT INTO token_daily_stats (
13030 day_id, agent_slug, source_id, model_family,
13031 api_call_count, user_message_count, assistant_message_count, tool_message_count,
13032 total_input_tokens, total_output_tokens, total_cache_read_tokens,
13033 total_cache_creation_tokens, total_thinking_tokens, grand_total_tokens,
13034 total_content_chars, total_tool_calls, estimated_cost_usd, session_count,
13035 last_updated
13036 )
13037 VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19)
13038 ON CONFLICT(day_id, agent_slug, source_id, model_family) DO UPDATE SET
13039 api_call_count = api_call_count + excluded.api_call_count,
13040 user_message_count = user_message_count + excluded.user_message_count,
13041 assistant_message_count = assistant_message_count + excluded.assistant_message_count,
13042 tool_message_count = tool_message_count + excluded.tool_message_count,
13043 total_input_tokens = total_input_tokens + excluded.total_input_tokens,
13044 total_output_tokens = total_output_tokens + excluded.total_output_tokens,
13045 total_cache_read_tokens = total_cache_read_tokens + excluded.total_cache_read_tokens,
13046 total_cache_creation_tokens = total_cache_creation_tokens + excluded.total_cache_creation_tokens,
13047 total_thinking_tokens = total_thinking_tokens + excluded.total_thinking_tokens,
13048 grand_total_tokens = grand_total_tokens + excluded.grand_total_tokens,
13049 total_content_chars = total_content_chars + excluded.total_content_chars,
13050 total_tool_calls = total_tool_calls + excluded.total_tool_calls,
13051 estimated_cost_usd = estimated_cost_usd + excluded.estimated_cost_usd,
13052 session_count = session_count + excluded.session_count,
13053 last_updated = excluded.last_updated",
13054 fparams![
13055 *day_id,
13056 agent.as_str(),
13057 source.as_str(),
13058 model.as_str(),
13059 delta.api_call_count,
13060 delta.user_message_count,
13061 delta.assistant_message_count,
13062 delta.tool_message_count,
13063 delta.total_input_tokens,
13064 delta.total_output_tokens,
13065 delta.total_cache_read_tokens,
13066 delta.total_cache_creation_tokens,
13067 delta.total_thinking_tokens,
13068 delta.grand_total_tokens,
13069 delta.total_content_chars,
13070 delta.total_tool_calls,
13071 delta.estimated_cost_usd,
13072 delta.session_count,
13073 now
13074 ],
13075 )?;
13076 }
13077
13078 Ok(total_affected)
13079}
13080
13081fn franken_insert_message_metrics_batched_in_tx(
13087 tx: &FrankenTransaction<'_>,
13088 entries: &[MessageMetricsEntry],
13089) -> Result<usize> {
13090 if entries.is_empty() {
13091 return Ok(0);
13092 }
13093
13094 let mut total_inserted = 0;
13095
13096 for e in entries {
13097 let params_vec: Vec<ParamValue> = vec![
13098 ParamValue::from(e.message_id),
13099 ParamValue::from(e.created_at_ms),
13100 ParamValue::from(e.hour_id),
13101 ParamValue::from(e.day_id),
13102 ParamValue::from(e.agent_slug.clone()),
13103 ParamValue::from(e.workspace_id),
13104 ParamValue::from(e.source_id.clone()),
13105 ParamValue::from(e.role.clone()),
13106 ParamValue::from(e.content_chars),
13107 ParamValue::from(e.content_tokens_est),
13108 ParamValue::from(e.model_name.clone()),
13109 ParamValue::from(e.model_family.clone()),
13110 ParamValue::from(e.model_tier.clone()),
13111 ParamValue::from(e.provider.clone()),
13112 ParamValue::from(e.api_input_tokens),
13113 ParamValue::from(e.api_output_tokens),
13114 ParamValue::from(e.api_cache_read_tokens),
13115 ParamValue::from(e.api_cache_creation_tokens),
13116 ParamValue::from(e.api_thinking_tokens),
13117 ParamValue::from(e.api_service_tier.clone()),
13118 ParamValue::from(e.api_data_source.clone()),
13119 ParamValue::from(e.tool_call_count),
13120 ParamValue::from(e.has_tool_calls as i64),
13121 ParamValue::from(e.has_plan as i64),
13122 ];
13123
13124 let values = param_slice_to_values(¶ms_vec);
13125 total_inserted += tx.execute_with_params(
13126 "INSERT OR IGNORE INTO message_metrics (
13127 message_id, created_at_ms, hour_id, day_id,
13128 agent_slug, workspace_id, source_id, role,
13129 content_chars, content_tokens_est,
13130 model_name, model_family, model_tier, provider,
13131 api_input_tokens, api_output_tokens, api_cache_read_tokens,
13132 api_cache_creation_tokens, api_thinking_tokens,
13133 api_service_tier, api_data_source,
13134 tool_call_count, has_tool_calls, has_plan
13135 )
13136 VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19,?20,?21,?22,?23,?24)",
13137 &values,
13138 )?;
13139 }
13140
13141 Ok(total_inserted)
13142}
13143
13144fn franken_flush_rollup_table(
13146 tx: &FrankenTransaction<'_>,
13147 table: &str,
13148 bucket_col: &str,
13149 deltas: &HashMap<(i64, String, i64, String), UsageRollupDelta>,
13150 now: i64,
13151) -> Result<usize> {
13152 if deltas.is_empty() {
13153 return Ok(0);
13154 }
13155
13156 let mut total_affected = 0;
13157
13158 for ((bucket_id, agent, workspace_id, source), d) in deltas {
13159 let sql = format!(
13160 "INSERT INTO {table} (
13161 {bucket_col}, agent_slug, workspace_id, source_id,
13162 message_count, user_message_count, assistant_message_count,
13163 tool_call_count, plan_message_count, plan_content_tokens_est_total,
13164 plan_api_tokens_total, api_coverage_message_count,
13165 content_tokens_est_total, content_tokens_est_user, content_tokens_est_assistant,
13166 api_tokens_total, api_input_tokens_total, api_output_tokens_total,
13167 api_cache_read_tokens_total, api_cache_creation_tokens_total,
13168 api_thinking_tokens_total, last_updated
13169 )
13170 VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19,?20,?21,?22)
13171 ON CONFLICT({bucket_col}, agent_slug, workspace_id, source_id) DO UPDATE SET
13172 message_count = message_count + excluded.message_count,
13173 user_message_count = user_message_count + excluded.user_message_count,
13174 assistant_message_count = assistant_message_count + excluded.assistant_message_count,
13175 tool_call_count = tool_call_count + excluded.tool_call_count,
13176 plan_message_count = plan_message_count + excluded.plan_message_count,
13177 plan_content_tokens_est_total = plan_content_tokens_est_total + excluded.plan_content_tokens_est_total,
13178 plan_api_tokens_total = plan_api_tokens_total + excluded.plan_api_tokens_total,
13179 api_coverage_message_count = api_coverage_message_count + excluded.api_coverage_message_count,
13180 content_tokens_est_total = content_tokens_est_total + excluded.content_tokens_est_total,
13181 content_tokens_est_user = content_tokens_est_user + excluded.content_tokens_est_user,
13182 content_tokens_est_assistant = content_tokens_est_assistant + excluded.content_tokens_est_assistant,
13183 api_tokens_total = api_tokens_total + excluded.api_tokens_total,
13184 api_input_tokens_total = api_input_tokens_total + excluded.api_input_tokens_total,
13185 api_output_tokens_total = api_output_tokens_total + excluded.api_output_tokens_total,
13186 api_cache_read_tokens_total = api_cache_read_tokens_total + excluded.api_cache_read_tokens_total,
13187 api_cache_creation_tokens_total = api_cache_creation_tokens_total + excluded.api_cache_creation_tokens_total,
13188 api_thinking_tokens_total = api_thinking_tokens_total + excluded.api_thinking_tokens_total,
13189 last_updated = excluded.last_updated"
13190 );
13191
13192 total_affected += tx.execute_compat(
13193 &sql,
13194 fparams![
13195 *bucket_id,
13196 agent.as_str(),
13197 *workspace_id,
13198 source.as_str(),
13199 d.message_count,
13200 d.user_message_count,
13201 d.assistant_message_count,
13202 d.tool_call_count,
13203 d.plan_message_count,
13204 d.plan_content_tokens_est_total,
13205 d.plan_api_tokens_total,
13206 d.api_coverage_message_count,
13207 d.content_tokens_est_total,
13208 d.content_tokens_est_user,
13209 d.content_tokens_est_assistant,
13210 d.api_tokens_total,
13211 d.api_input_tokens_total,
13212 d.api_output_tokens_total,
13213 d.api_cache_read_tokens_total,
13214 d.api_cache_creation_tokens_total,
13215 d.api_thinking_tokens_total,
13216 now
13217 ],
13218 )?;
13219 }
13220
13221 Ok(total_affected)
13222}
13223
13224fn franken_flush_model_daily_rollup_table(
13226 tx: &FrankenTransaction<'_>,
13227 deltas: &HashMap<(i64, String, i64, String, String, String), UsageRollupDelta>,
13228 now: i64,
13229) -> Result<usize> {
13230 if deltas.is_empty() {
13231 return Ok(0);
13232 }
13233
13234 let mut total_affected = 0;
13235
13236 for ((day_id, agent, workspace_id, source, model_family, model_tier), d) in deltas {
13237 total_affected += tx.execute_compat(
13238 "INSERT INTO usage_models_daily (
13239 day_id, agent_slug, workspace_id, source_id, model_family, model_tier,
13240 message_count, user_message_count, assistant_message_count,
13241 tool_call_count, plan_message_count, api_coverage_message_count,
13242 content_tokens_est_total, content_tokens_est_user, content_tokens_est_assistant,
13243 api_tokens_total, api_input_tokens_total, api_output_tokens_total,
13244 api_cache_read_tokens_total, api_cache_creation_tokens_total,
13245 api_thinking_tokens_total, last_updated
13246 )
13247 VALUES(?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18,?19,?20,?21,?22)
13248 ON CONFLICT(day_id, agent_slug, workspace_id, source_id, model_family, model_tier) DO UPDATE SET
13249 message_count = message_count + excluded.message_count,
13250 user_message_count = user_message_count + excluded.user_message_count,
13251 assistant_message_count = assistant_message_count + excluded.assistant_message_count,
13252 tool_call_count = tool_call_count + excluded.tool_call_count,
13253 plan_message_count = plan_message_count + excluded.plan_message_count,
13254 api_coverage_message_count = api_coverage_message_count + excluded.api_coverage_message_count,
13255 content_tokens_est_total = content_tokens_est_total + excluded.content_tokens_est_total,
13256 content_tokens_est_user = content_tokens_est_user + excluded.content_tokens_est_user,
13257 content_tokens_est_assistant = content_tokens_est_assistant + excluded.content_tokens_est_assistant,
13258 api_tokens_total = api_tokens_total + excluded.api_tokens_total,
13259 api_input_tokens_total = api_input_tokens_total + excluded.api_input_tokens_total,
13260 api_output_tokens_total = api_output_tokens_total + excluded.api_output_tokens_total,
13261 api_cache_read_tokens_total = api_cache_read_tokens_total + excluded.api_cache_read_tokens_total,
13262 api_cache_creation_tokens_total = api_cache_creation_tokens_total + excluded.api_cache_creation_tokens_total,
13263 api_thinking_tokens_total = api_thinking_tokens_total + excluded.api_thinking_tokens_total,
13264 last_updated = excluded.last_updated",
13265 fparams![
13266 *day_id,
13267 agent.as_str(),
13268 *workspace_id,
13269 source.as_str(),
13270 model_family.as_str(),
13271 model_tier.as_str(),
13272 d.message_count,
13273 d.user_message_count,
13274 d.assistant_message_count,
13275 d.tool_call_count,
13276 d.plan_message_count,
13277 d.api_coverage_message_count,
13278 d.content_tokens_est_total,
13279 d.content_tokens_est_user,
13280 d.content_tokens_est_assistant,
13281 d.api_tokens_total,
13282 d.api_input_tokens_total,
13283 d.api_output_tokens_total,
13284 d.api_cache_read_tokens_total,
13285 d.api_cache_creation_tokens_total,
13286 d.api_thinking_tokens_total,
13287 now
13288 ],
13289 )?;
13290 }
13291
13292 Ok(total_affected)
13293}
13294
13295fn franken_flush_analytics_rollups_in_tx(
13297 tx: &FrankenTransaction<'_>,
13298 agg: &AnalyticsRollupAggregator,
13299) -> Result<(usize, usize, usize)> {
13300 let now = FrankenStorage::now_millis();
13301
13302 let hourly_affected =
13303 franken_flush_rollup_table(tx, "usage_hourly", "hour_id", &agg.hourly, now)?;
13304 let daily_affected = franken_flush_rollup_table(tx, "usage_daily", "day_id", &agg.daily, now)?;
13305 let models_daily_affected = franken_flush_model_daily_rollup_table(tx, &agg.models_daily, now)?;
13306
13307 Ok((hourly_affected, daily_affected, models_daily_affected))
13308}
13309
13310fn franken_update_conversation_token_summaries_in_tx(
13312 tx: &FrankenTransaction<'_>,
13313 conversation_id: i64,
13314) -> Result<()> {
13315 tx.execute_compat(
13316 "UPDATE conversations SET
13317 total_input_tokens = (SELECT SUM(input_tokens) FROM token_usage WHERE conversation_id = ?1),
13318 total_output_tokens = (SELECT SUM(output_tokens) FROM token_usage WHERE conversation_id = ?1),
13319 total_cache_read_tokens = (SELECT SUM(cache_read_tokens) FROM token_usage WHERE conversation_id = ?1),
13320 total_cache_creation_tokens = (SELECT SUM(cache_creation_tokens) FROM token_usage WHERE conversation_id = ?1),
13321 grand_total_tokens = (SELECT SUM(total_tokens) FROM token_usage WHERE conversation_id = ?1),
13322 estimated_cost_usd = (SELECT SUM(estimated_cost_usd) FROM token_usage WHERE conversation_id = ?1),
13323 primary_model = (SELECT model_name FROM token_usage WHERE conversation_id = ?1
13324 AND model_name IS NOT NULL
13325 GROUP BY model_name ORDER BY COUNT(*) DESC LIMIT 1),
13326 api_call_count = (SELECT COUNT(*) FROM token_usage WHERE conversation_id = ?1
13327 AND data_source = 'api'),
13328 tool_call_count = (SELECT SUM(tool_call_count) FROM token_usage WHERE conversation_id = ?1),
13329 user_message_count = (SELECT COUNT(*) FROM token_usage WHERE conversation_id = ?1
13330 AND role = 'user'),
13331 assistant_message_count = (SELECT COUNT(*) FROM token_usage WHERE conversation_id = ?1
13332 AND role IN ('assistant', 'agent'))
13333 WHERE id = ?1",
13334 fparams![conversation_id],
13335 )?;
13336 Ok(())
13337}
13338
13339impl FrankenStorage {
13340 pub fn rebuild_token_daily_stats(&self) -> Result<usize> {
13342 const CONVERSATION_BATCH_SIZE: usize = 1_000;
13343 const TOKEN_USAGE_BATCH_SIZE: usize = 10_000;
13344
13345 let total_usage_rows: i64 =
13346 self.conn
13347 .query_row_map("SELECT COUNT(*) FROM token_usage", fparams![], |row| {
13348 row.get_typed(0)
13349 })?;
13350 tracing::info!(
13351 target: "cass::analytics",
13352 total_usage_rows,
13353 "token_daily_stats_rebuild_start"
13354 );
13355
13356 let mut tx = self.conn.transaction()?;
13357 tx.execute("DELETE FROM token_daily_stats")?;
13358
13359 let mut last_conversation_id = 0_i64;
13360 let mut rows_created = 0_usize;
13361
13362 loop {
13363 let conversation_rows = tx.query_map_collect(
13364 "SELECT c.id, c.started_at, c.source_id,
13365 COALESCE((SELECT a.slug FROM agents a WHERE a.id = c.agent_id), 'unknown')
13366 FROM conversations c
13367 WHERE c.id > ?1
13368 ORDER BY c.id
13369 LIMIT ?2",
13370 fparams![last_conversation_id, CONVERSATION_BATCH_SIZE as i64],
13371 |row| {
13372 Ok((
13373 row.get_typed::<i64>(0)?,
13374 row.get_typed::<Option<i64>>(1)?,
13375 row.get_typed::<String>(2)?,
13376 row.get_typed::<String>(3)?,
13377 ))
13378 },
13379 )?;
13380 if conversation_rows.is_empty() {
13381 break;
13382 }
13383
13384 let mut aggregate = TokenStatsAggregator::new();
13385
13386 for (conversation_id, started_at, source_id, agent_slug) in conversation_rows {
13387 last_conversation_id = conversation_id;
13388 let conversation_day_id = started_at.map(Self::day_id_from_millis).unwrap_or(0);
13389 let mut last_token_usage_id = 0_i64;
13390 let mut session_model_family = String::from("unknown");
13391
13392 loop {
13393 let usage_rows = tx.query_map_collect(
13394 "SELECT id, day_id, role,
13395 COALESCE(model_family, 'unknown'),
13396 input_tokens, output_tokens, cache_read_tokens,
13397 cache_creation_tokens, thinking_tokens,
13398 has_tool_calls, tool_call_count,
13399 content_chars, estimated_cost_usd
13400 FROM token_usage
13401 WHERE conversation_id = ?1
13402 AND id > ?2
13403 ORDER BY id
13404 LIMIT ?3",
13405 fparams![
13406 conversation_id,
13407 last_token_usage_id,
13408 TOKEN_USAGE_BATCH_SIZE as i64
13409 ],
13410 |row| {
13411 Ok((
13412 row.get_typed::<i64>(0)?,
13413 row.get_typed::<i64>(1)?,
13414 row.get_typed::<String>(2)?,
13415 row.get_typed::<String>(3)?,
13416 row.get_typed::<Option<i64>>(4)?,
13417 row.get_typed::<Option<i64>>(5)?,
13418 row.get_typed::<Option<i64>>(6)?,
13419 row.get_typed::<Option<i64>>(7)?,
13420 row.get_typed::<Option<i64>>(8)?,
13421 row.get_typed::<i64>(9)?,
13422 row.get_typed::<i64>(10)?,
13423 row.get_typed::<i64>(11)?,
13424 row.get_typed::<Option<f64>>(12)?,
13425 ))
13426 },
13427 )?;
13428 if usage_rows.is_empty() {
13429 break;
13430 }
13431
13432 for (
13433 token_usage_id,
13434 day_id,
13435 role,
13436 model_family,
13437 input_tokens,
13438 output_tokens,
13439 cache_read_tokens,
13440 cache_creation_tokens,
13441 thinking_tokens,
13442 has_tool_calls,
13443 tool_call_count,
13444 content_chars,
13445 estimated_cost_usd,
13446 ) in usage_rows
13447 {
13448 last_token_usage_id = token_usage_id;
13449 if model_family != "unknown" {
13450 session_model_family = model_family.clone();
13451 }
13452 let usage = crate::connectors::ExtractedTokenUsage {
13453 model_name: None,
13454 provider: None,
13455 input_tokens,
13456 output_tokens,
13457 cache_read_tokens,
13458 cache_creation_tokens,
13459 thinking_tokens,
13460 service_tier: None,
13461 has_tool_calls: has_tool_calls != 0,
13462 tool_call_count: u32::try_from(tool_call_count.max(0)).unwrap_or(0),
13463 data_source: franken_agent_detection::TokenDataSource::Api,
13464 };
13465 aggregate.record(
13466 &agent_slug,
13467 &source_id,
13468 day_id,
13469 &model_family,
13470 &role,
13471 &usage,
13472 content_chars,
13473 estimated_cost_usd.unwrap_or(0.0),
13474 );
13475 }
13476 }
13477
13478 aggregate.record_session(
13479 &agent_slug,
13480 &source_id,
13481 conversation_day_id,
13482 &session_model_family,
13483 );
13484 }
13485
13486 let entries = aggregate.expand();
13487 rows_created = rows_created.saturating_add(entries.len());
13488 franken_update_token_daily_stats_batched_in_tx(&tx, &entries)?;
13489 }
13490
13491 tx.commit()?;
13492
13493 tracing::info!(
13494 target: "cass::analytics",
13495 rows_created,
13496 "token_daily_stats_rebuild_complete"
13497 );
13498
13499 Ok(rows_created)
13500 }
13501
13502 pub fn rebuild_analytics(&self) -> Result<AnalyticsRebuildResult> {
13505 let start = Instant::now();
13506
13507 let total_messages: i64 =
13508 self.conn
13509 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
13510 row.get_typed(0)
13511 })?;
13512 tracing::info!(
13513 target: "cass::analytics",
13514 total_messages,
13515 "analytics_rebuild_start"
13516 );
13517
13518 let mut tx = self.conn.transaction()?;
13519
13520 tx.execute("DELETE FROM message_metrics")?;
13521 tx.execute("DELETE FROM usage_hourly")?;
13522 tx.execute("DELETE FROM usage_daily")?;
13523 tx.execute("DELETE FROM usage_models_daily")?;
13524
13525 const CHUNK_SIZE: i64 = 10_000;
13526 let mut offset: i64 = 0;
13527 let mut total_inserted: usize = 0;
13528 let mut usage_hourly_rows: usize = 0;
13529 let mut usage_daily_rows: usize = 0;
13530 let mut usage_models_daily_rows: usize = 0;
13531
13532 loop {
13533 #[allow(clippy::type_complexity)]
13534 let rows: Vec<(
13535 i64,
13536 String,
13537 String,
13538 Option<serde_json::Value>,
13539 Option<i64>,
13540 Option<i64>,
13541 String,
13542 Option<i64>,
13543 String,
13544 )> = tx.query_map_collect(
13545 "SELECT m.id, m.idx, m.role, m.content, m.extra_json, m.extra_bin,
13551 m.created_at,
13552 c.id AS conv_id, c.started_at AS conv_started_at,
13553 c.source_id, c.workspace_id,
13554 COALESCE((SELECT a.slug FROM agents a WHERE a.id = c.agent_id), 'unknown') AS agent_slug
13555 FROM messages m
13556 JOIN conversations c ON m.conversation_id = c.id
13557 ORDER BY m.id
13558 LIMIT ?1 OFFSET ?2",
13559 fparams![CHUNK_SIZE, offset],
13560 |row| {
13561 let msg_id: i64 = row.get_typed(0)?;
13562 let role: String = row.get_typed(2)?;
13563 let content: String = row.get_typed(3)?;
13564 let extra_json = row
13565 .get_typed::<Option<String>>(4)?
13566 .and_then(|s| serde_json::from_str(&s).ok())
13567 .or_else(|| {
13568 row.get_typed::<Option<Vec<u8>>>(5)
13569 .ok()
13570 .flatten()
13571 .and_then(|b| rmp_serde::from_slice(&b).ok())
13572 });
13573 let msg_ts: Option<i64> = row.get_typed(6)?;
13574 let conv_started_at: Option<i64> = row.get_typed(8)?;
13575 let source_id: String = row.get_typed(9)?;
13576 let workspace_id: Option<i64> = row.get_typed(10)?;
13577 let agent_slug: String = row.get_typed(11)?;
13578 let effective_ts = msg_ts.or(conv_started_at).unwrap_or(0);
13579
13580 Ok((
13581 msg_id,
13582 role,
13583 content,
13584 extra_json,
13585 Some(effective_ts),
13586 workspace_id,
13587 source_id,
13588 conv_started_at,
13589 agent_slug,
13590 ))
13591 },
13592 )?;
13593
13594 if rows.is_empty() {
13595 break;
13596 }
13597
13598 let chunk_len = rows.len();
13599 let mut entries = Vec::with_capacity(chunk_len);
13600 let mut rollup_agg = AnalyticsRollupAggregator::new();
13601
13602 for (
13603 msg_id,
13604 role,
13605 content,
13606 extra_json,
13607 effective_ts,
13608 workspace_id,
13609 source_id,
13610 _conv_started_at,
13611 agent_slug,
13612 ) in &rows
13613 {
13614 let ts = effective_ts.unwrap_or(0);
13615 let day_id = Self::day_id_from_millis(ts);
13616 let hour_id = Self::hour_id_from_millis(ts);
13617 let content_chars = content.len() as i64;
13618 let content_tokens_est = content_chars / 4;
13619 let extra = extra_json
13620 .as_ref()
13621 .cloned()
13622 .unwrap_or(serde_json::Value::Null);
13623 let usage =
13624 crate::connectors::extract_tokens_for_agent(agent_slug, &extra, content, role);
13625 let model_info = usage
13626 .model_name
13627 .as_deref()
13628 .map(crate::connectors::normalize_model);
13629 let model_family = model_info
13630 .as_ref()
13631 .map(|i| i.family.clone())
13632 .unwrap_or_else(|| "unknown".into());
13633 let model_tier = model_info
13634 .as_ref()
13635 .map(|i| i.tier.clone())
13636 .unwrap_or_else(|| "unknown".into());
13637 let provider = usage
13638 .provider
13639 .clone()
13640 .or_else(|| model_info.as_ref().map(|i| i.provider.clone()))
13641 .unwrap_or_else(|| "unknown".into());
13642
13643 let entry = MessageMetricsEntry {
13644 message_id: *msg_id,
13645 created_at_ms: ts,
13646 hour_id,
13647 day_id,
13648 agent_slug: agent_slug.clone(),
13649 workspace_id: workspace_id.unwrap_or(0),
13650 source_id: source_id.clone(),
13651 role: role.clone(),
13652 content_chars,
13653 content_tokens_est,
13654 model_name: usage.model_name.clone(),
13655 model_family,
13656 model_tier,
13657 provider,
13658 api_input_tokens: usage.input_tokens,
13659 api_output_tokens: usage.output_tokens,
13660 api_cache_read_tokens: usage.cache_read_tokens,
13661 api_cache_creation_tokens: usage.cache_creation_tokens,
13662 api_thinking_tokens: usage.thinking_tokens,
13663 api_service_tier: usage.service_tier,
13664 api_data_source: usage.data_source.as_str().to_string(),
13665 tool_call_count: usage.tool_call_count as i64,
13666 has_tool_calls: usage.has_tool_calls,
13667 has_plan: has_plan_for_role(role, content),
13668 };
13669 rollup_agg.record(&entry);
13670 entries.push(entry);
13671 }
13672
13673 total_inserted += franken_insert_message_metrics_batched_in_tx(&tx, &entries)?;
13674 let (hourly, daily, models_daily) =
13675 franken_flush_analytics_rollups_in_tx(&tx, &rollup_agg)?;
13676 usage_hourly_rows += hourly;
13677 usage_daily_rows += daily;
13678 usage_models_daily_rows += models_daily;
13679 offset += chunk_len as i64;
13680
13681 tracing::debug!(
13682 target: "cass::analytics",
13683 offset,
13684 chunk = chunk_len,
13685 inserted = entries.len(),
13686 total = total_inserted,
13687 "analytics_rebuild_chunk"
13688 );
13689
13690 if (chunk_len as i64) < CHUNK_SIZE {
13691 break;
13692 }
13693 }
13694
13695 tx.commit()?;
13696
13697 let elapsed = start.elapsed();
13698 let elapsed_ms = elapsed.as_millis() as u64;
13699 let msgs_per_sec = if elapsed_ms > 0 {
13700 (total_inserted as f64) / (elapsed_ms as f64 / 1000.0)
13701 } else {
13702 0.0
13703 };
13704
13705 tracing::info!(
13706 target: "cass::analytics",
13707 message_metrics_rows = total_inserted,
13708 usage_hourly_rows,
13709 usage_daily_rows,
13710 usage_models_daily_rows,
13711 elapsed_ms,
13712 messages_per_sec = format!("{:.0}", msgs_per_sec),
13713 "analytics_rebuild_complete"
13714 );
13715
13716 Ok(AnalyticsRebuildResult {
13717 message_metrics_rows: total_inserted,
13718 usage_hourly_rows,
13719 usage_daily_rows,
13720 usage_models_daily_rows,
13721 elapsed_ms,
13722 messages_per_sec: msgs_per_sec,
13723 })
13724 }
13725
13726 pub fn rebuild_daily_stats(&self) -> Result<DailyStatsRebuildResult> {
13728 const DAILY_STATS_REBUILD_CONVERSATION_BATCH_SIZE: usize = 1_000;
13729 const DAILY_STATS_REBUILD_MESSAGE_BATCH_SIZE: usize = 10_000;
13730
13731 let mut conversation_batch_size = rebuild_batch_size_env(
13732 "CASS_DAILY_STATS_REBUILD_CONVERSATION_BATCH_SIZE",
13733 DAILY_STATS_REBUILD_CONVERSATION_BATCH_SIZE,
13734 );
13735 let mut message_batch_size = rebuild_batch_size_env(
13736 "CASS_DAILY_STATS_REBUILD_MESSAGE_BATCH_SIZE",
13737 DAILY_STATS_REBUILD_MESSAGE_BATCH_SIZE,
13738 );
13739
13740 let total_messages: i64 =
13741 self.conn
13742 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
13743 row.get_typed(0)
13744 })?;
13745 let message_metrics_rows: i64 =
13746 self.conn
13747 .query_row_map("SELECT COUNT(*) FROM message_metrics", fparams![], |row| {
13748 row.get_typed(0)
13749 })?;
13750 let use_message_metrics = total_messages > 0 && total_messages == message_metrics_rows;
13751
13752 tracing::info!(
13753 target: "cass::perf::daily_stats",
13754 total_messages,
13755 message_metrics_rows,
13756 use_message_metrics,
13757 "daily_stats rebuild selected message source"
13758 );
13759
13760 let mut tx = self.conn.transaction()?;
13761 tx.execute("DELETE FROM daily_stats")?;
13762
13763 let mut last_conversation_id = 0_i64;
13764 let mut conversation_batch_count = 0_usize;
13765 let mut conversations_processed = 0_usize;
13766 let mut messages_processed = 0_usize;
13767 let mut message_batch_count = 0_usize;
13768 let mut raw_entries_flushed = 0_usize;
13769 let mut expanded_entries_flushed = 0_usize;
13770 let message_scan_sql = if use_message_metrics {
13771 "SELECT m.idx, mm.content_chars
13772 FROM messages m
13773 JOIN message_metrics mm ON mm.message_id = m.id
13774 WHERE m.conversation_id = ?1
13775 AND m.idx > ?2
13776 ORDER BY m.conversation_id, m.idx
13777 LIMIT ?3"
13778 } else {
13779 "SELECT m.idx, COALESCE(LENGTH(CAST(m.content AS BLOB)), 0)
13780 FROM messages m
13781 WHERE m.conversation_id = ?1
13782 AND m.idx > ?2
13783 ORDER BY m.conversation_id, m.idx
13784 LIMIT ?3"
13785 };
13786
13787 loop {
13788 let conversation_rows = match self.conn.query_with_params(
13794 "SELECT c.id, c.started_at,
13795 COALESCE((SELECT a.slug FROM agents a WHERE a.id = c.agent_id), 'unknown'),
13796 c.source_id
13797 FROM conversations c
13798 WHERE c.id > ?1
13799 ORDER BY c.id
13800 LIMIT ?2",
13801 ¶ms_from_iter([
13802 ParamValue::from(last_conversation_id),
13803 ParamValue::from(conversation_batch_size as i64),
13804 ]),
13805 ) {
13806 Ok(rows) => rows,
13807 Err(err) if is_out_of_memory_error(&err) && conversation_batch_size > 1 => {
13808 let previous_batch_size = conversation_batch_size;
13809 conversation_batch_size = (conversation_batch_size / 2).max(1);
13810 tracing::warn!(
13811 previous_batch_size,
13812 conversation_batch_size,
13813 last_conversation_id,
13814 "daily_stats conversation scan ran out of memory; retrying with smaller batch"
13815 );
13816 continue;
13817 }
13818 Err(err) => return Err(err.into()),
13819 };
13820 if conversation_rows.is_empty() {
13821 break;
13822 }
13823
13824 let mut aggregate = StatsAggregator::new();
13825 let mut conversation_batch_meta: Vec<(i64, i64, String, String)> =
13826 Vec::with_capacity(conversation_rows.len());
13827 for row in &conversation_rows {
13828 let conversation_id: i64 = row.get_typed(0)?;
13829 let started_at: Option<i64> = row.get_typed(1)?;
13830 let agent_slug: String = row.get_typed(2)?;
13831 let source_id: String = row.get_typed(3)?;
13832 last_conversation_id = conversation_id;
13833 let day_id = started_at.map(Self::day_id_from_millis).unwrap_or(0);
13834 aggregate.record_delta(&agent_slug, &source_id, day_id, 1, 0, 0);
13835 conversation_batch_meta.push((conversation_id, day_id, agent_slug, source_id));
13836 conversations_processed += 1;
13837 }
13838
13839 conversation_batch_count += 1;
13840 raw_entries_flushed += aggregate.raw_entry_count();
13841 let entries = aggregate.expand();
13842 expanded_entries_flushed += entries.len();
13843 if !entries.is_empty() {
13844 franken_update_daily_stats_batched_in_tx(&tx, &entries)?;
13845 }
13846 if conversation_batch_count.is_multiple_of(25) {
13847 tracing::info!(
13848 target: "cass::perf::daily_stats",
13849 conversations_processed,
13850 batches = conversation_batch_count,
13851 batch_size = conversation_batch_size,
13852 last_conversation_id,
13853 "daily_stats rebuild conversation scan progress"
13854 );
13855 }
13856 if conversation_batch_meta.is_empty() {
13857 continue;
13858 }
13859
13860 for (conversation_id, day_id, agent_slug, source_id) in conversation_batch_meta {
13861 let mut cursor_message_idx = -1_i64;
13862 loop {
13863 let message_rows = match self.conn.query_with_params(
13864 message_scan_sql,
13865 ¶ms_from_iter([
13866 ParamValue::from(conversation_id),
13867 ParamValue::from(cursor_message_idx),
13868 ParamValue::from(message_batch_size as i64),
13869 ]),
13870 ) {
13871 Ok(rows) => rows,
13872 Err(err) if is_out_of_memory_error(&err) && message_batch_size > 1 => {
13873 let previous_batch_size = message_batch_size;
13874 message_batch_size = (message_batch_size / 2).max(1);
13875 tracing::warn!(
13876 previous_batch_size,
13877 message_batch_size,
13878 conversation_id,
13879 cursor_message_idx,
13880 "daily_stats message scan ran out of memory; retrying with smaller batch"
13881 );
13882 continue;
13883 }
13884 Err(err) => return Err(err.into()),
13885 };
13886 if message_rows.is_empty() {
13887 break;
13888 }
13889
13890 let mut aggregate = StatsAggregator::new();
13891 for row in &message_rows {
13892 let message_idx: i64 = row.get_typed(0)?;
13893 let content_len: i64 = row.get_typed(1)?;
13894 cursor_message_idx = message_idx;
13895 aggregate.record_delta(&agent_slug, &source_id, day_id, 0, 1, content_len);
13896 messages_processed += 1;
13897 }
13898
13899 message_batch_count += 1;
13900 raw_entries_flushed += aggregate.raw_entry_count();
13901 let entries = aggregate.expand();
13902 expanded_entries_flushed += entries.len();
13903 if !entries.is_empty() {
13904 franken_update_daily_stats_batched_in_tx(&tx, &entries)?;
13905 }
13906 if message_batch_count.is_multiple_of(50) {
13907 tracing::info!(
13908 target: "cass::perf::daily_stats",
13909 messages_processed,
13910 batches = message_batch_count,
13911 batch_size = message_batch_size,
13912 source = if use_message_metrics {
13913 "message_metrics"
13914 } else {
13915 "messages"
13916 },
13917 conversation_id,
13918 cursor_message_idx,
13919 "daily_stats rebuild message scan progress"
13920 );
13921 }
13922 }
13923 }
13924 }
13925
13926 let rows_created: i64 =
13927 tx.query_row_map("SELECT COUNT(*) FROM daily_stats", fparams![], |row| {
13928 row.get_typed(0)
13929 })?;
13930 let total_sessions: i64 = tx.query_row_map(
13931 "SELECT COALESCE(SUM(session_count), 0) FROM daily_stats WHERE agent_slug = 'all' AND source_id = 'all'",
13932 fparams![],
13933 |row| row.get_typed(0),
13934 )?;
13935
13936 tx.commit()?;
13937
13938 tracing::info!(
13939 target: "cass::perf::daily_stats",
13940 rows_created,
13941 total_sessions,
13942 conversations_processed,
13943 conversation_batches = conversation_batch_count,
13944 conversation_batch_size,
13945 message_batches = message_batch_count,
13946 message_batch_size,
13947 messages_processed,
13948 use_message_metrics,
13949 raw_entries_flushed,
13950 expanded_entries_flushed,
13951 "Daily stats rebuilt from conversations"
13952 );
13953
13954 Ok(DailyStatsRebuildResult {
13955 rows_created,
13956 total_sessions,
13957 })
13958 }
13959}
13960
13961#[derive(Debug, Default)]
13988pub struct IndexingCache {
13989 agent_ids: HashMap<String, i64>,
13990 workspace_ids: HashMap<PathBuf, i64>,
13991 hits: u64,
13992 misses: u64,
13993}
13994
13995pub trait IndexingCacheStorage {
13996 fn ensure_indexing_agent(&self, agent: &Agent) -> Result<i64>;
13997 fn ensure_indexing_workspace(&self, path: &Path, display_name: Option<&str>) -> Result<i64>;
13998}
13999
14000impl IndexingCacheStorage for FrankenStorage {
14001 fn ensure_indexing_agent(&self, agent: &Agent) -> Result<i64> {
14002 self.ensure_agent(agent)
14003 }
14004
14005 fn ensure_indexing_workspace(&self, path: &Path, display_name: Option<&str>) -> Result<i64> {
14006 self.ensure_workspace(path, display_name)
14007 }
14008}
14009
14010impl IndexingCache {
14013 pub fn new() -> Self {
14015 Self {
14016 agent_ids: HashMap::new(),
14017 workspace_ids: HashMap::new(),
14018 hits: 0,
14019 misses: 0,
14020 }
14021 }
14022
14023 pub fn is_enabled() -> bool {
14026 dotenvy::var("CASS_SQLITE_CACHE")
14027 .map(|v| v != "0" && v.to_lowercase() != "false")
14028 .unwrap_or(true)
14029 }
14030
14031 pub fn get_or_insert_agent<S>(&mut self, storage: &S, agent: &Agent) -> Result<i64>
14036 where
14037 S: IndexingCacheStorage + ?Sized,
14038 {
14039 if let Some(&cached) = self.agent_ids.get(&agent.slug) {
14040 self.hits += 1;
14041 return Ok(cached);
14042 }
14043
14044 self.misses += 1;
14045 let id = storage.ensure_indexing_agent(agent)?;
14046 self.agent_ids.insert(agent.slug.clone(), id);
14047 Ok(id)
14048 }
14049
14050 pub fn get_or_insert_workspace(
14055 &mut self,
14056 storage: &(impl IndexingCacheStorage + ?Sized),
14057 path: &Path,
14058 display_name: Option<&str>,
14059 ) -> Result<i64> {
14060 if let Some(&cached) = self.workspace_ids.get(path) {
14061 self.hits += 1;
14062 return Ok(cached);
14063 }
14064
14065 self.misses += 1;
14066 let id = storage.ensure_indexing_workspace(path, display_name)?;
14067 self.workspace_ids.insert(path.to_path_buf(), id);
14068 Ok(id)
14069 }
14070
14071 pub fn stats(&self) -> (u64, u64, f64) {
14073 let total = self.hits + self.misses;
14074 let hit_rate = if total > 0 {
14075 self.hits as f64 / total as f64
14076 } else {
14077 0.0
14078 };
14079 (self.hits, self.misses, hit_rate)
14080 }
14081
14082 pub fn clear(&mut self) {
14084 self.agent_ids.clear();
14085 self.workspace_ids.clear();
14086 self.hits = 0;
14087 self.misses = 0;
14088 }
14089
14090 pub fn agent_count(&self) -> usize {
14092 self.agent_ids.len()
14093 }
14094
14095 pub fn workspace_count(&self) -> usize {
14097 self.workspace_ids.len()
14098 }
14099}
14100
14101#[derive(Clone, Copy, Debug, Default)]
14110pub struct StatsDelta {
14111 pub session_count_delta: i64,
14112 pub message_count_delta: i64,
14113 pub total_chars_delta: i64,
14114}
14115
14116#[derive(Debug, Default)]
14132pub struct StatsAggregator {
14133 deltas: HashMap<(i64, String, String), StatsDelta>,
14136}
14137
14138impl StatsAggregator {
14139 pub fn new() -> Self {
14141 Self {
14142 deltas: HashMap::new(),
14143 }
14144 }
14145
14146 pub fn record(
14157 &mut self,
14158 agent_slug: &str,
14159 source_id: &str,
14160 day_id: i64,
14161 message_count: i64,
14162 total_chars: i64,
14163 ) {
14164 self.record_delta(agent_slug, source_id, day_id, 1, message_count, total_chars);
14165 }
14166
14167 pub fn record_delta(
14170 &mut self,
14171 agent_slug: &str,
14172 source_id: &str,
14173 day_id: i64,
14174 session_count_delta: i64,
14175 message_count_delta: i64,
14176 total_chars_delta: i64,
14177 ) {
14178 if session_count_delta == 0 && message_count_delta == 0 && total_chars_delta == 0 {
14179 return;
14180 }
14181 let key = (day_id, agent_slug.to_owned(), source_id.to_owned());
14182 let delta = self.deltas.entry(key).or_default();
14183 delta.session_count_delta += session_count_delta;
14184 delta.message_count_delta += message_count_delta;
14185 delta.total_chars_delta += total_chars_delta;
14186 }
14187
14188 pub fn expand(&self) -> Vec<(i64, String, String, StatsDelta)> {
14196 let mut expanded: HashMap<(i64, String, String), StatsDelta> = HashMap::new();
14197
14198 for ((day_id, agent, source), delta) in &self.deltas {
14199 let permutations = [
14200 (agent.as_str(), source.as_str()),
14201 ("all", source.as_str()),
14202 (agent.as_str(), "all"),
14203 ("all", "all"),
14204 ];
14205
14206 for idx in 0..permutations.len() {
14208 let (a, s) = permutations[idx];
14209 if permutations[..idx].contains(&(a, s)) {
14210 continue;
14211 }
14212 let key = (*day_id, a.to_owned(), s.to_owned());
14213 let entry = expanded.entry(key).or_default();
14214 entry.session_count_delta += delta.session_count_delta;
14215 entry.message_count_delta += delta.message_count_delta;
14216 entry.total_chars_delta += delta.total_chars_delta;
14217 }
14218 }
14219
14220 let mut out: Vec<(i64, String, String, StatsDelta)> = expanded
14221 .into_iter()
14222 .map(|((d, a, s), delta)| (d, a, s, delta))
14223 .collect();
14224 out.sort_by(|(d1, a1, s1, _), (d2, a2, s2, _)| {
14225 d1.cmp(d2).then_with(|| a1.cmp(a2)).then_with(|| s1.cmp(s2))
14226 });
14227 out
14228 }
14229
14230 pub fn is_empty(&self) -> bool {
14232 self.deltas.is_empty()
14233 }
14234
14235 pub fn raw_entry_count(&self) -> usize {
14237 self.deltas.len()
14238 }
14239}
14240
14241#[derive(Clone, Debug, Default)]
14250pub struct TokenStatsDelta {
14251 pub api_call_count: i64,
14252 pub user_message_count: i64,
14253 pub assistant_message_count: i64,
14254 pub tool_message_count: i64,
14255 pub total_input_tokens: i64,
14256 pub total_output_tokens: i64,
14257 pub total_cache_read_tokens: i64,
14258 pub total_cache_creation_tokens: i64,
14259 pub total_thinking_tokens: i64,
14260 pub grand_total_tokens: i64,
14261 pub total_content_chars: i64,
14262 pub total_tool_calls: i64,
14263 pub estimated_cost_usd: f64,
14264 pub session_count: i64,
14265}
14266
14267#[derive(Debug, Default)]
14273pub struct TokenStatsAggregator {
14274 deltas: HashMap<(i64, String, String, String), TokenStatsDelta>,
14276}
14277
14278impl TokenStatsAggregator {
14279 pub fn new() -> Self {
14280 Self {
14281 deltas: HashMap::new(),
14282 }
14283 }
14284
14285 #[allow(clippy::too_many_arguments)]
14287 pub fn record(
14288 &mut self,
14289 agent_slug: &str,
14290 source_id: &str,
14291 day_id: i64,
14292 model_family: &str,
14293 role: &str,
14294 usage: &crate::connectors::ExtractedTokenUsage,
14295 content_chars: i64,
14296 estimated_cost_usd: f64,
14297 ) {
14298 let key = (
14299 day_id,
14300 agent_slug.to_owned(),
14301 source_id.to_owned(),
14302 model_family.to_owned(),
14303 );
14304 let delta = self.deltas.entry(key).or_default();
14305
14306 delta.api_call_count += 1;
14307 match role {
14308 "user" => delta.user_message_count += 1,
14309 "assistant" | "agent" => delta.assistant_message_count += 1,
14310 "tool" => delta.tool_message_count += 1,
14311 _ => {}
14312 }
14313
14314 delta.total_input_tokens += usage.input_tokens.unwrap_or(0);
14315 delta.total_output_tokens += usage.output_tokens.unwrap_or(0);
14316 delta.total_cache_read_tokens += usage.cache_read_tokens.unwrap_or(0);
14317 delta.total_cache_creation_tokens += usage.cache_creation_tokens.unwrap_or(0);
14318 delta.total_thinking_tokens += usage.thinking_tokens.unwrap_or(0);
14319 delta.grand_total_tokens += usage.total_tokens().unwrap_or(0);
14320 delta.total_content_chars += content_chars;
14321 delta.total_tool_calls += usage.tool_call_count as i64;
14322 delta.estimated_cost_usd += estimated_cost_usd;
14323 }
14324
14325 pub fn record_session(
14327 &mut self,
14328 agent_slug: &str,
14329 source_id: &str,
14330 day_id: i64,
14331 model_family: &str,
14332 ) {
14333 let key = (
14334 day_id,
14335 agent_slug.to_owned(),
14336 source_id.to_owned(),
14337 model_family.to_owned(),
14338 );
14339 self.deltas.entry(key).or_default().session_count += 1;
14340 }
14341
14342 pub fn expand(&self) -> Vec<(i64, String, String, String, TokenStatsDelta)> {
14349 let mut expanded: HashMap<(i64, String, String, String), TokenStatsDelta> = HashMap::new();
14350
14351 for ((day_id, agent, source, model), delta) in &self.deltas {
14352 let permutations = [
14353 (agent.as_str(), source.as_str(), model.as_str()),
14354 ("all", source.as_str(), model.as_str()),
14355 (agent.as_str(), "all", model.as_str()),
14356 (agent.as_str(), source.as_str(), "all"),
14357 ("all", "all", "all"),
14358 ];
14359
14360 for idx in 0..permutations.len() {
14361 let (a, s, m) = permutations[idx];
14362 if permutations[..idx].contains(&(a, s, m)) {
14364 continue;
14365 }
14366 let key = (*day_id, a.to_owned(), s.to_owned(), m.to_owned());
14367 let entry = expanded.entry(key).or_default();
14368 entry.api_call_count += delta.api_call_count;
14369 entry.user_message_count += delta.user_message_count;
14370 entry.assistant_message_count += delta.assistant_message_count;
14371 entry.tool_message_count += delta.tool_message_count;
14372 entry.total_input_tokens += delta.total_input_tokens;
14373 entry.total_output_tokens += delta.total_output_tokens;
14374 entry.total_cache_read_tokens += delta.total_cache_read_tokens;
14375 entry.total_cache_creation_tokens += delta.total_cache_creation_tokens;
14376 entry.total_thinking_tokens += delta.total_thinking_tokens;
14377 entry.grand_total_tokens += delta.grand_total_tokens;
14378 entry.total_content_chars += delta.total_content_chars;
14379 entry.total_tool_calls += delta.total_tool_calls;
14380 entry.estimated_cost_usd += delta.estimated_cost_usd;
14381 entry.session_count += delta.session_count;
14382 }
14383 }
14384
14385 let mut out: Vec<(i64, String, String, String, TokenStatsDelta)> = expanded
14386 .into_iter()
14387 .map(|((d, a, s, m), delta)| (d, a, s, m, delta))
14388 .collect();
14389 out.sort_by(|(d1, a1, s1, m1, _), (d2, a2, s2, m2, _)| {
14390 d1.cmp(d2)
14391 .then_with(|| a1.cmp(a2))
14392 .then_with(|| s1.cmp(s2))
14393 .then_with(|| m1.cmp(m2))
14394 });
14395 out
14396 }
14397
14398 pub fn is_empty(&self) -> bool {
14399 self.deltas.is_empty()
14400 }
14401
14402 pub fn raw_entry_count(&self) -> usize {
14403 self.deltas.len()
14404 }
14405}
14406
14407#[derive(Clone, Debug, Default)]
14415pub struct UsageRollupDelta {
14416 pub message_count: i64,
14417 pub user_message_count: i64,
14418 pub assistant_message_count: i64,
14419 pub tool_call_count: i64,
14420 pub plan_message_count: i64,
14421 pub plan_content_tokens_est_total: i64,
14422 pub plan_api_tokens_total: i64,
14423 pub api_coverage_message_count: i64,
14424 pub content_tokens_est_total: i64,
14425 pub content_tokens_est_user: i64,
14426 pub content_tokens_est_assistant: i64,
14427 pub api_tokens_total: i64,
14428 pub api_input_tokens_total: i64,
14429 pub api_output_tokens_total: i64,
14430 pub api_cache_read_tokens_total: i64,
14431 pub api_cache_creation_tokens_total: i64,
14432 pub api_thinking_tokens_total: i64,
14433}
14434
14435#[derive(Debug, Clone)]
14437pub struct MessageMetricsEntry {
14438 pub message_id: i64,
14439 pub created_at_ms: i64,
14440 pub hour_id: i64,
14441 pub day_id: i64,
14442 pub agent_slug: String,
14443 pub workspace_id: i64,
14444 pub source_id: String,
14445 pub role: String,
14446 pub content_chars: i64,
14447 pub content_tokens_est: i64,
14448 pub model_name: Option<String>,
14449 pub model_family: String,
14450 pub model_tier: String,
14451 pub provider: String,
14452 pub api_input_tokens: Option<i64>,
14453 pub api_output_tokens: Option<i64>,
14454 pub api_cache_read_tokens: Option<i64>,
14455 pub api_cache_creation_tokens: Option<i64>,
14456 pub api_thinking_tokens: Option<i64>,
14457 pub api_service_tier: Option<String>,
14458 pub api_data_source: String,
14459 pub tool_call_count: i64,
14460 pub has_tool_calls: bool,
14461 pub has_plan: bool,
14462}
14463
14464#[derive(Debug, Default)]
14469pub struct AnalyticsRollupAggregator {
14470 hourly: HashMap<(i64, String, i64, String), UsageRollupDelta>,
14471 daily: HashMap<(i64, String, i64, String), UsageRollupDelta>,
14472 models_daily: HashMap<(i64, String, i64, String, String, String), UsageRollupDelta>,
14473}
14474
14475impl AnalyticsRollupAggregator {
14476 pub fn new() -> Self {
14477 Self::default()
14478 }
14479
14480 pub fn record(&mut self, entry: &MessageMetricsEntry) {
14482 let content_est = entry.content_tokens_est;
14483 let api_total = entry.api_input_tokens.unwrap_or(0)
14484 + entry.api_output_tokens.unwrap_or(0)
14485 + entry.api_cache_read_tokens.unwrap_or(0)
14486 + entry.api_cache_creation_tokens.unwrap_or(0)
14487 + entry.api_thinking_tokens.unwrap_or(0);
14488 let is_api = entry.api_data_source == "api";
14489 let is_user = entry.role == "user";
14490 let is_assistant = entry.role == "assistant" || entry.role == "agent";
14491
14492 for (map, bucket_id) in [
14494 (&mut self.hourly, entry.hour_id),
14495 (&mut self.daily, entry.day_id),
14496 ] {
14497 let key = (
14498 bucket_id,
14499 entry.agent_slug.clone(),
14500 entry.workspace_id,
14501 entry.source_id.clone(),
14502 );
14503 let d = map.entry(key).or_default();
14504 d.message_count += 1;
14505 if is_user {
14506 d.user_message_count += 1;
14507 d.content_tokens_est_user += content_est;
14508 }
14509 if is_assistant {
14510 d.assistant_message_count += 1;
14511 d.content_tokens_est_assistant += content_est;
14512 }
14513 d.tool_call_count += entry.tool_call_count;
14514 if entry.has_plan {
14515 d.plan_message_count += 1;
14516 d.plan_content_tokens_est_total += content_est;
14517 if is_api {
14518 d.plan_api_tokens_total += api_total;
14519 }
14520 }
14521 if is_api {
14522 d.api_coverage_message_count += 1;
14523 d.api_tokens_total += api_total;
14524 d.api_input_tokens_total += entry.api_input_tokens.unwrap_or(0);
14525 d.api_output_tokens_total += entry.api_output_tokens.unwrap_or(0);
14526 d.api_cache_read_tokens_total += entry.api_cache_read_tokens.unwrap_or(0);
14527 d.api_cache_creation_tokens_total += entry.api_cache_creation_tokens.unwrap_or(0);
14528 d.api_thinking_tokens_total += entry.api_thinking_tokens.unwrap_or(0);
14529 }
14530 d.content_tokens_est_total += content_est;
14531 }
14532
14533 let model_key = (
14534 entry.day_id,
14535 entry.agent_slug.clone(),
14536 entry.workspace_id,
14537 entry.source_id.clone(),
14538 entry.model_family.clone(),
14539 entry.model_tier.clone(),
14540 );
14541 let d = self.models_daily.entry(model_key).or_default();
14542 d.message_count += 1;
14543 if is_user {
14544 d.user_message_count += 1;
14545 d.content_tokens_est_user += content_est;
14546 }
14547 if is_assistant {
14548 d.assistant_message_count += 1;
14549 d.content_tokens_est_assistant += content_est;
14550 }
14551 d.tool_call_count += entry.tool_call_count;
14552 if entry.has_plan {
14553 d.plan_message_count += 1;
14554 d.plan_content_tokens_est_total += content_est;
14555 if is_api {
14556 d.plan_api_tokens_total += api_total;
14557 }
14558 }
14559 if is_api {
14560 d.api_coverage_message_count += 1;
14561 d.api_tokens_total += api_total;
14562 d.api_input_tokens_total += entry.api_input_tokens.unwrap_or(0);
14563 d.api_output_tokens_total += entry.api_output_tokens.unwrap_or(0);
14564 d.api_cache_read_tokens_total += entry.api_cache_read_tokens.unwrap_or(0);
14565 d.api_cache_creation_tokens_total += entry.api_cache_creation_tokens.unwrap_or(0);
14566 d.api_thinking_tokens_total += entry.api_thinking_tokens.unwrap_or(0);
14567 }
14568 d.content_tokens_est_total += content_est;
14569 }
14570
14571 pub fn is_empty(&self) -> bool {
14572 self.hourly.is_empty() && self.daily.is_empty() && self.models_daily.is_empty()
14573 }
14574
14575 pub fn hourly_entry_count(&self) -> usize {
14576 self.hourly.len()
14577 }
14578
14579 pub fn daily_entry_count(&self) -> usize {
14580 self.daily.len()
14581 }
14582
14583 pub fn models_daily_entry_count(&self) -> usize {
14584 self.models_daily.len()
14585 }
14586}
14587
14588fn has_plan_for_role(role: &str, content: &str) -> bool {
14592 let role = role.trim();
14593 (role.eq_ignore_ascii_case("assistant") || role.eq_ignore_ascii_case("agent"))
14594 && has_plan_heuristic(content)
14595}
14596
14597fn has_plan_heuristic(content: &str) -> bool {
14604 if content.len() < 24 {
14605 return false;
14606 }
14607
14608 let lower = content.to_lowercase();
14609
14610 let looks_like_tool_blob = lower.contains("```")
14612 || lower.contains("\"tool\"")
14613 || lower.contains("stdout:")
14614 || lower.contains("stderr:")
14615 || lower.contains("exit code:");
14616
14617 let mut lines: Vec<&str> = Vec::with_capacity(60);
14618 let mut in_fenced_code = false;
14619 for raw in lower.lines() {
14620 let line = raw.trim();
14621 if line.starts_with("```") {
14622 in_fenced_code = !in_fenced_code;
14623 continue;
14624 }
14625 if in_fenced_code || line.is_empty() {
14626 continue;
14627 }
14628 lines.push(line);
14629 if lines.len() >= 60 {
14630 break;
14631 }
14632 }
14633
14634 let header_pos = lines.iter().position(|line| {
14635 line.starts_with("## plan")
14636 || line.starts_with("# plan")
14637 || line.starts_with("plan:")
14638 || line.starts_with("implementation plan")
14639 || line.starts_with("next steps:")
14640 || line.starts_with("action plan:")
14641 });
14642 let preview_top = lines.iter().take(8).copied().collect::<Vec<_>>().join("\n");
14643 let header_near_top = header_pos.is_some_and(|idx| idx <= 6) || preview_top.contains("plan:");
14644
14645 if !header_near_top {
14646 return false;
14647 }
14648 if looks_like_tool_blob && header_pos.is_none() {
14649 return false;
14650 }
14651
14652 let numbered_steps = lines
14653 .iter()
14654 .filter(|line| is_numbered_step_line(line))
14655 .count();
14656 let bullet_steps = lines
14657 .iter()
14658 .filter(|line| {
14659 line.starts_with("- ")
14660 || line.starts_with("* ")
14661 || line.starts_with("+ ")
14662 || line.starts_with("- [ ] ")
14663 || line.starts_with("- [x] ")
14664 })
14665 .count();
14666
14667 numbered_steps >= 2 || (numbered_steps >= 1 && bullet_steps >= 1) || bullet_steps >= 3
14668}
14669
14670fn is_numbered_step_line(line: &str) -> bool {
14671 let trimmed = line.trim_start();
14672 let digit_count = trimmed.chars().take_while(|c| c.is_ascii_digit()).count();
14673 if digit_count == 0 || digit_count > 3 {
14674 return false;
14675 }
14676 let rest = &trimmed[digit_count..];
14677 rest.starts_with(". ") || rest.starts_with(") ")
14678}
14679
14680#[derive(Debug, Clone)]
14682pub struct TokenUsageEntry {
14683 pub message_id: i64,
14684 pub conversation_id: i64,
14685 pub agent_id: i64,
14686 pub workspace_id: Option<i64>,
14687 pub source_id: String,
14688 pub timestamp_ms: i64,
14689 pub day_id: i64,
14690 pub model_name: Option<String>,
14691 pub model_family: Option<String>,
14692 pub model_tier: Option<String>,
14693 pub service_tier: Option<String>,
14694 pub provider: Option<String>,
14695 pub input_tokens: Option<i64>,
14696 pub output_tokens: Option<i64>,
14697 pub cache_read_tokens: Option<i64>,
14698 pub cache_creation_tokens: Option<i64>,
14699 pub thinking_tokens: Option<i64>,
14700 pub total_tokens: Option<i64>,
14701 pub estimated_cost_usd: Option<f64>,
14702 pub role: String,
14703 pub content_chars: i64,
14704 pub has_tool_calls: bool,
14705 pub tool_call_count: u32,
14706 pub data_source: String,
14707}
14708
14709#[derive(Debug, Clone)]
14715pub struct PricingEntry {
14716 pub model_pattern: String,
14717 pub provider: String,
14718 pub input_cost_per_mtok: f64,
14719 pub output_cost_per_mtok: f64,
14720 pub cache_read_cost_per_mtok: Option<f64>,
14721 pub cache_creation_cost_per_mtok: Option<f64>,
14722 pub effective_day_id: i64,
14724}
14725
14726#[derive(Debug, Clone, Default)]
14728pub struct PricingDiagnostics {
14729 pub priced_count: u64,
14730 pub unpriced_count: u64,
14731 pub unknown_models: HashMap<String, u64>,
14733}
14734
14735impl PricingDiagnostics {
14736 fn record_priced(&mut self) {
14737 self.priced_count += 1;
14738 }
14739
14740 fn record_unpriced(&mut self, model_name: Option<&str>) {
14741 self.unpriced_count += 1;
14742 let key = model_name.unwrap_or("(none)").to_string();
14743 *self.unknown_models.entry(key).or_insert(0) += 1;
14744 }
14745
14746 pub fn log_summary(&self) {
14748 let total = self.priced_count + self.unpriced_count;
14749 if total == 0 {
14750 return;
14751 }
14752 let pct = (self.priced_count as f64 / total as f64) * 100.0;
14753 tracing::info!(
14754 target: "cass::analytics::pricing",
14755 priced = self.priced_count,
14756 unpriced = self.unpriced_count,
14757 total = total,
14758 coverage_pct = format!("{pct:.1}%"),
14759 "pricing coverage"
14760 );
14761 if !self.unknown_models.is_empty() {
14762 let mut sorted: Vec<_> = self.unknown_models.iter().collect();
14763 sorted.sort_by(|a, b| b.1.cmp(a.1));
14764 for (model, count) in sorted.iter().take(5) {
14765 tracing::debug!(
14766 target: "cass::analytics::pricing",
14767 model = model.as_str(),
14768 count = count,
14769 "unknown model (no pricing)"
14770 );
14771 }
14772 }
14773 }
14774}
14775
14776#[derive(Debug, Clone)]
14778pub struct PricingTable {
14779 entries: Vec<PricingEntry>,
14780}
14781
14782impl PricingTable {
14783 pub fn load(conn: &FrankenConnection) -> Result<Self> {
14785 Self::franken_load(conn)
14786 }
14787
14788 pub fn franken_load(conn: &FrankenConnection) -> Result<Self> {
14790 let rows = conn.query(
14791 "SELECT model_pattern, provider, input_cost_per_mtok, output_cost_per_mtok,
14792 cache_read_cost_per_mtok, cache_creation_cost_per_mtok, effective_date
14793 FROM model_pricing
14794 ORDER BY effective_date DESC",
14795 )?;
14796 let mut entries = Vec::with_capacity(rows.len());
14797 for row in &rows {
14798 let effective_date: String = row.get_typed(6)?;
14799 let effective_day_id = date_str_to_day_id(&effective_date)?;
14800 entries.push(PricingEntry {
14801 model_pattern: row.get_typed(0)?,
14802 provider: row.get_typed(1)?,
14803 input_cost_per_mtok: row.get_typed(2)?,
14804 output_cost_per_mtok: row.get_typed(3)?,
14805 cache_read_cost_per_mtok: row.get_typed(4)?,
14806 cache_creation_cost_per_mtok: row.get_typed(5)?,
14807 effective_day_id,
14808 });
14809 }
14810 Ok(Self { entries })
14811 }
14812
14813 pub fn lookup(&self, model_name: &str, message_day_id: i64) -> Option<&PricingEntry> {
14821 let mut best: Option<&PricingEntry> = None;
14822
14823 for entry in &self.entries {
14824 if entry.effective_day_id > message_day_id {
14825 continue;
14826 }
14827 if !sql_like_match(model_name, &entry.model_pattern) {
14828 continue;
14829 }
14830
14831 match best {
14832 None => best = Some(entry),
14833 Some(current) => {
14834 if entry.effective_day_id > current.effective_day_id
14835 || (entry.effective_day_id == current.effective_day_id
14836 && entry.model_pattern.len() > current.model_pattern.len())
14837 {
14838 best = Some(entry);
14839 }
14840 }
14841 }
14842 }
14843
14844 best
14845 }
14846
14847 pub fn compute_cost(
14851 &self,
14852 model_name: Option<&str>,
14853 message_day_id: i64,
14854 input_tokens: Option<i64>,
14855 output_tokens: Option<i64>,
14856 cache_read_tokens: Option<i64>,
14857 cache_creation_tokens: Option<i64>,
14858 ) -> Option<f64> {
14859 let model = model_name?;
14860 let pricing = self.lookup(model, message_day_id)?;
14861
14862 if input_tokens.is_none() && output_tokens.is_none() {
14863 return None;
14864 }
14865
14866 let mut cost = 0.0;
14867 let cache_read = cache_read_tokens.unwrap_or(0);
14868 let cache_creation = cache_creation_tokens.unwrap_or(0);
14869 let non_cache_input = input_tokens
14872 .unwrap_or(0)
14873 .saturating_sub(cache_read)
14874 .saturating_sub(cache_creation)
14875 .max(0);
14876 cost += non_cache_input as f64 * pricing.input_cost_per_mtok / 1_000_000.0;
14877 cost += output_tokens.unwrap_or(0) as f64 * pricing.output_cost_per_mtok / 1_000_000.0;
14878
14879 if let Some(cache_price) = pricing.cache_read_cost_per_mtok {
14880 cost += cache_read as f64 * cache_price / 1_000_000.0;
14881 }
14882 if let Some(cache_price) = pricing.cache_creation_cost_per_mtok {
14883 cost += cache_creation as f64 * cache_price / 1_000_000.0;
14884 }
14885
14886 Some(cost)
14887 }
14888
14889 pub fn is_empty(&self) -> bool {
14891 self.entries.is_empty()
14892 }
14893}
14894
14895fn date_str_to_day_id(s: &str) -> Result<i64> {
14898 use chrono::NaiveDate;
14899 const EPOCH_2020: NaiveDate = match NaiveDate::from_ymd_opt(2020, 1, 1) {
14900 Some(d) => d,
14901 None => unreachable!(),
14902 };
14903 NaiveDate::parse_from_str(s, "%Y-%m-%d")
14904 .map(|d| (d - EPOCH_2020).num_days())
14905 .with_context(|| format!("invalid effective_date '{s}'"))
14906}
14907
14908fn sql_like_match(value: &str, pattern: &str) -> bool {
14910 sql_like_match_bytes(
14911 value.to_ascii_lowercase().as_bytes(),
14912 pattern.to_ascii_lowercase().as_bytes(),
14913 )
14914}
14915
14916fn utf8_char_len(b: u8) -> usize {
14918 if b < 0x80 {
14919 1
14920 } else if b < 0xE0 {
14921 2
14922 } else if b < 0xF0 {
14923 3
14924 } else {
14925 4
14926 }
14927}
14928
14929fn sql_like_match_bytes(val: &[u8], pat: &[u8]) -> bool {
14930 if pat.is_empty() {
14931 return val.is_empty();
14932 }
14933 match pat[0] {
14934 b'%' => {
14935 let mut p = 1;
14936 while p < pat.len() && pat[p] == b'%' {
14937 p += 1;
14938 }
14939 let rest = &pat[p..];
14940 let mut i = 0;
14942 while i <= val.len() {
14943 if sql_like_match_bytes(&val[i..], rest) {
14944 return true;
14945 }
14946 if i < val.len() {
14947 i += utf8_char_len(val[i]);
14948 } else {
14949 break;
14950 }
14951 }
14952 false
14953 }
14954 b'_' => {
14955 if val.is_empty() {
14957 return false;
14958 }
14959 let char_len = utf8_char_len(val[0]);
14960 val.len() >= char_len && sql_like_match_bytes(&val[char_len..], &pat[1..])
14961 }
14962 c => !val.is_empty() && val[0] == c && sql_like_match_bytes(&val[1..], &pat[1..]),
14963 }
14964}
14965
14966fn rebuild_batch_size_env(var: &str, default: usize) -> usize {
14967 dotenvy::var(var)
14968 .ok()
14969 .and_then(|raw| raw.parse::<usize>().ok())
14970 .filter(|value| *value > 0)
14971 .unwrap_or(default)
14972}
14973
14974fn is_out_of_memory_error<E: OutOfMemoryProbe + ?Sized>(err: &E) -> bool {
14984 err.is_out_of_memory()
14985}
14986
14987trait OutOfMemoryProbe {
14988 fn is_out_of_memory(&self) -> bool;
14989}
14990
14991impl OutOfMemoryProbe for anyhow::Error {
14992 fn is_out_of_memory(&self) -> bool {
14993 self.chain().any(|cause| {
14994 if cause
14995 .downcast_ref::<frankensqlite::FrankenError>()
14996 .is_some_and(|err| matches!(err, frankensqlite::FrankenError::OutOfMemory))
14997 {
14998 return true;
14999 }
15000 is_exact_out_of_memory_message(&cause.to_string())
15001 })
15002 }
15003}
15004
15005impl OutOfMemoryProbe for frankensqlite::FrankenError {
15006 fn is_out_of_memory(&self) -> bool {
15007 matches!(self, frankensqlite::FrankenError::OutOfMemory)
15008 }
15009}
15010
15011fn is_exact_out_of_memory_message(message: &str) -> bool {
15012 matches!(
15013 message.trim().to_ascii_lowercase().as_str(),
15014 "out of memory" | "not enough memory"
15015 )
15016}
15017
15018#[derive(Debug, Clone)]
15024pub struct DailyCount {
15025 pub day_id: i64,
15026 pub sessions: i64,
15027 pub messages: i64,
15028 pub chars: i64,
15029}
15030
15031#[derive(Debug, Clone)]
15033pub struct AnalyticsRebuildResult {
15034 pub message_metrics_rows: usize,
15035 pub usage_hourly_rows: usize,
15036 pub usage_daily_rows: usize,
15037 pub usage_models_daily_rows: usize,
15038 pub elapsed_ms: u64,
15039 pub messages_per_sec: f64,
15040}
15041
15042#[derive(Debug, Clone)]
15044pub struct DailyStatsRebuildResult {
15045 pub rows_created: i64,
15046 pub total_sessions: i64,
15047}
15048
15049#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
15051pub struct AgentArchivePurgeResult {
15052 pub conversations_deleted: usize,
15053 pub messages_deleted: usize,
15054}
15055
15056#[derive(Debug, Clone)]
15058pub struct DailyStatsHealth {
15059 pub populated: bool,
15060 pub row_count: i64,
15061 pub oldest_update_ms: Option<i64>,
15062 pub conversation_count: i64,
15063 pub materialized_total: i64,
15064 pub drift: i64,
15065}
15066
15067const FTS5_BATCH_SIZE: usize = 100;
15075
15076#[derive(Debug, Clone)]
15077struct FtsRebuildMessageRow {
15078 rowid: i64,
15079 message_id: i64,
15080 conversation_id: i64,
15081 content: String,
15082 created_at: Option<i64>,
15083}
15084
15085#[derive(Debug, Clone)]
15086struct FtsConversationProjection {
15087 title: String,
15088 agent_id: Option<i64>,
15089 workspace_id: Option<i64>,
15090 source_path: String,
15091}
15092
15093#[derive(Debug, Clone)]
15095pub struct FtsEntry {
15096 pub content: String,
15097 pub title: String,
15098 pub agent: String,
15099 pub workspace: String,
15100 pub source_path: String,
15101 pub created_at: Option<i64>,
15102 pub message_id: i64,
15103}
15104
15105impl FtsEntry {
15106 pub fn from_message(message_id: i64, msg: &Message, conv: &Conversation) -> Self {
15108 FtsEntry {
15109 content: msg.content.clone(),
15110 title: conv.title.clone().unwrap_or_default(),
15111 agent: conv.agent_slug.clone(),
15112 workspace: conv
15113 .workspace
15114 .as_ref()
15115 .map(|p| p.to_string_lossy().into_owned())
15116 .unwrap_or_default(),
15117 source_path: path_to_string(&conv.source_path),
15118 created_at: msg.created_at.or(conv.started_at),
15119 message_id,
15120 }
15121 }
15122}
15123
15124const FTS_ENTRY_BATCH_MAX_DOCS: usize = 512;
15125const FTS_ENTRY_BATCH_MAX_CHARS: usize = 1024 * 1024;
15126
15127const FTS_REBUILD_BATCH_SIZE_DEFAULT: usize = 5_000;
15132
15133fn fts_rebuild_batch_size() -> usize {
15136 dotenvy::var("CASS_FTS_REBUILD_BATCH_SIZE")
15137 .ok()
15138 .and_then(|v| v.parse::<usize>().ok())
15139 .filter(|&n| n > 0)
15140 .unwrap_or(FTS_REBUILD_BATCH_SIZE_DEFAULT)
15141}
15142
15143fn flush_pending_fts_entries(
15144 storage: &FrankenStorage,
15145 tx: &FrankenTransaction<'_>,
15146 entries: &mut Vec<FtsEntry>,
15147 pending_chars: &mut usize,
15148 inserted_total: &mut usize,
15149) -> Result<()> {
15150 if entries.is_empty() {
15151 return Ok(());
15152 }
15153
15154 if storage.fts_messages_present_cached(tx) {
15155 *inserted_total += franken_batch_insert_fts(tx, entries)?;
15156 }
15157 entries.clear();
15158 *pending_chars = 0;
15159 Ok(())
15160}
15161
15162fn path_to_string<P: AsRef<Path>>(p: P) -> String {
15163 p.as_ref().to_string_lossy().into_owned()
15164}
15165
15166fn role_str(role: &MessageRole) -> String {
15167 role_as_str(role).to_owned()
15168}
15169
15170fn role_as_str(role: &MessageRole) -> &str {
15171 match role {
15172 MessageRole::User => "user",
15173 MessageRole::Agent => "agent",
15174 MessageRole::Tool => "tool",
15175 MessageRole::System => "system",
15176 MessageRole::Other(v) => v.as_str(),
15177 }
15178}
15179
15180fn agent_kind_str(kind: AgentKind) -> String {
15181 match kind {
15182 AgentKind::Cli => "cli".into(),
15183 AgentKind::VsCode => "vscode".into(),
15184 AgentKind::Hybrid => "hybrid".into(),
15185 }
15186}
15187
15188#[cfg(test)]
15193mod tests {
15194 use super::*;
15195 use serial_test::serial;
15196 use tempfile::TempDir;
15197
15198 struct EnvGuard {
15199 key: &'static str,
15200 previous: Option<String>,
15201 }
15202
15203 impl Drop for EnvGuard {
15204 fn drop(&mut self) {
15205 if let Some(value) = &self.previous {
15206 unsafe {
15208 std::env::set_var(self.key, value);
15209 }
15210 } else {
15211 unsafe {
15213 std::env::remove_var(self.key);
15214 }
15215 }
15216 }
15217 }
15218
15219 fn set_env_var(key: &'static str, value: impl AsRef<str>) -> EnvGuard {
15220 let previous = dotenvy::var(key).ok();
15221 unsafe {
15223 std::env::set_var(key, value.as_ref());
15224 }
15225 EnvGuard { key, previous }
15226 }
15227
15228 #[test]
15229 fn doctor_mutation_open_guard_only_targets_canonical_archive_db() {
15230 let dir = TempDir::new().unwrap();
15231 let canonical = dir.path().join("agent_search.db");
15232 let scratch = dir.path().join("scratch.db");
15233
15234 assert_eq!(
15235 doctor_mutation_lock_path_for_db_open(&canonical),
15236 Some(dir.path().join("doctor/locks/doctor-repair.lock"))
15237 );
15238 assert_eq!(doctor_mutation_lock_path_for_db_open(&scratch), None);
15239 }
15240
15241 #[test]
15242 fn doctor_lock_metadata_pid_detection_is_exact() {
15243 let current = std::process::id();
15244
15245 assert!(doctor_lock_metadata_pid_is_current_process(&format!(
15246 "schema_version=1\npid={current}\nmode=safe_auto_run\n"
15247 )));
15248 assert!(!doctor_lock_metadata_pid_is_current_process(
15249 "schema_version=1\npid=not-a-pid\n"
15250 ));
15251 assert!(!doctor_lock_metadata_pid_is_current_process(&format!(
15252 "pid={}\n",
15253 current.saturating_add(1)
15254 )));
15255 }
15256
15257 #[test]
15258 #[cfg(not(windows))]
15259 fn doctor_storage_open_refuses_active_doctor_mutation_lock_from_other_process() {
15260 use std::io::Write as _;
15261
15262 let dir = TempDir::new().unwrap();
15263 let db_path = dir.path().join("agent_search.db");
15264 {
15265 let storage = FrankenStorage::open(&db_path).unwrap();
15266 storage.close().unwrap();
15267 }
15268
15269 let lock_path = doctor_mutation_lock_path_for_db_open(&db_path).unwrap();
15270 let mut lock_file = fs::OpenOptions::new()
15271 .create(true)
15272 .truncate(false)
15273 .read(true)
15274 .write(true)
15275 .open(&lock_path)
15276 .unwrap();
15277 fs2::FileExt::try_lock_exclusive(&lock_file).unwrap();
15278 lock_file.set_len(0).unwrap();
15279 lock_file.write_all(b"schema_version=1\npid=1\n").unwrap();
15280 lock_file.sync_all().unwrap();
15281
15282 let err =
15283 open_franken_raw_readonly_connection_with_timeout(&db_path, Duration::from_millis(25))
15284 .expect_err("active doctor mutation lock must block canonical DB opens");
15285 let message = err.to_string();
15286 assert!(
15287 message.contains("doctor mutation lock") && message.contains("active"),
15288 "error should identify the active doctor mutation lock: {message}"
15289 );
15290
15291 fs2::FileExt::unlock(&lock_file).unwrap();
15292 }
15293
15294 #[test]
15295 fn doctor_storage_open_allows_current_doctor_process_probe() {
15296 use std::io::Write as _;
15297
15298 let dir = TempDir::new().unwrap();
15299 let db_path = dir.path().join("agent_search.db");
15300 {
15301 let storage = FrankenStorage::open(&db_path).unwrap();
15302 storage.close().unwrap();
15303 }
15304
15305 let lock_path = doctor_mutation_lock_path_for_db_open(&db_path).unwrap();
15306 let mut lock_file = fs::OpenOptions::new()
15307 .create(true)
15308 .truncate(false)
15309 .read(true)
15310 .write(true)
15311 .open(&lock_path)
15312 .unwrap();
15313 fs2::FileExt::try_lock_exclusive(&lock_file).unwrap();
15314 lock_file.set_len(0).unwrap();
15315 write!(lock_file, "schema_version=1\npid={}\n", std::process::id()).unwrap();
15316 lock_file.sync_all().unwrap();
15317
15318 #[cfg(windows)]
15319 let _bypass = enter_doctor_mutation_db_open_bypass();
15320
15321 let conn =
15322 open_franken_raw_readonly_connection_with_timeout(&db_path, Duration::from_millis(25))
15323 .expect(
15324 "doctor process must be able to run post-repair read probes under its own lock",
15325 );
15326 drop(conn);
15327
15328 fs2::FileExt::unlock(&lock_file).unwrap();
15329 }
15330
15331 #[test]
15332 fn autocommit_retain_disable_tries_compat_then_canonical_pragma() {
15333 let mut attempts = Vec::new();
15334
15335 let selected = disable_autocommit_retain(|pragma| {
15336 attempts.push(pragma);
15337 if pragma == "PRAGMA fsqlite.autocommit_retain = OFF;" {
15338 Err("compat namespace unavailable")
15339 } else {
15340 Ok(())
15341 }
15342 })
15343 .expect("canonical pragma should disable autocommit retain");
15344
15345 assert_eq!(selected, "PRAGMA autocommit_retain = OFF;");
15346 assert_eq!(attempts, AUTOCOMMIT_RETAIN_OFF_PRAGMAS);
15347 }
15348
15349 #[test]
15350 fn autocommit_retain_disable_fails_closed_when_no_pragma_works() {
15351 let mut attempts = Vec::new();
15352
15353 let err = disable_autocommit_retain(|pragma| {
15354 attempts.push(pragma);
15355 Err("unsupported pragma")
15356 })
15357 .expect_err("unsupported autocommit retain controls should fail closed");
15358
15359 assert_eq!(attempts, AUTOCOMMIT_RETAIN_OFF_PRAGMAS);
15360 let message = err.to_string();
15361 assert!(
15362 message.contains("refusing to keep a long-lived MVCC connection"),
15363 "error should force callers away from unbounded snapshot retention: {message}"
15364 );
15365 assert!(
15366 message.contains("PRAGMA fsqlite.autocommit_retain = OFF;")
15367 && message.contains("PRAGMA autocommit_retain = OFF;"),
15368 "error should preserve attempted PRAGMAs for diagnostics: {message}"
15369 );
15370 }
15371
15372 fn rusqlite_test_fixture_conn(db_path: &Path) -> rusqlite::Connection {
15381 rusqlite::Connection::open(db_path).expect("open rusqlite test fixture connection")
15382 }
15383
15384 fn seed_historical_db_direct(
15385 db_path: &Path,
15386 conversations: &[crate::model::types::Conversation],
15387 ) {
15388 if let Some(parent) = db_path.parent() {
15389 fs::create_dir_all(parent).unwrap();
15390 }
15391
15392 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
15393 conn.execute_batch(HISTORICAL_RECOVERY_CORE_SCHEMA).unwrap();
15394 conn.execute_compat(
15395 "INSERT INTO agents(id, slug, name, version, kind, created_at, updated_at)
15396 VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7)",
15397 fparams![1_i64, "codex", "Codex", "0.2.3", "cli", 0_i64, 0_i64],
15398 )
15399 .unwrap();
15400
15401 let mut next_message_id = 1_i64;
15402 for (conv_index, conv) in conversations.iter().enumerate() {
15403 let conversation_id = i64::try_from(conv_index + 1).unwrap();
15404 let workspace_id = conv.workspace.as_ref().map(|workspace| {
15405 let workspace_id = conversation_id;
15406 let workspace_path = workspace.to_string_lossy().into_owned();
15407 conn.execute_compat(
15408 "INSERT INTO workspaces(id, path, display_name) VALUES(?1, ?2, ?3)",
15409 fparams![
15410 workspace_id,
15411 workspace_path.as_str(),
15412 workspace_path.as_str()
15413 ],
15414 )
15415 .unwrap();
15416 workspace_id
15417 });
15418 let source_path = conv.source_path.to_string_lossy().into_owned();
15419 let metadata_json = conv.metadata_json.to_string();
15420 conn.execute_compat(
15421 "INSERT INTO conversations (
15422 id, agent_id, workspace_id, source_id, external_id, title, source_path,
15423 started_at, ended_at, approx_tokens, metadata_json, origin_host
15424 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
15425 fparams![
15426 conversation_id,
15427 1_i64,
15428 workspace_id,
15429 conv.source_id.as_str(),
15430 conv.external_id.as_deref(),
15431 conv.title.as_deref(),
15432 source_path.as_str(),
15433 conv.started_at,
15434 conv.ended_at,
15435 conv.approx_tokens,
15436 metadata_json.as_str(),
15437 conv.origin_host.as_deref()
15438 ],
15439 )
15440 .unwrap();
15441
15442 for msg in &conv.messages {
15443 let extra_json = msg.extra_json.to_string();
15444 let role = role_str(&msg.role);
15445 conn.execute_compat(
15446 "INSERT INTO messages(
15447 id, conversation_id, idx, role, author, created_at, content, extra_json
15448 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
15449 fparams![
15450 next_message_id,
15451 conversation_id,
15452 msg.idx,
15453 role.as_str(),
15454 msg.author.as_deref(),
15455 msg.created_at,
15456 msg.content.as_str(),
15457 extra_json.as_str()
15458 ],
15459 )
15460 .unwrap();
15461 next_message_id += 1;
15462 }
15463 }
15464 }
15465
15466 #[test]
15471 fn is_user_data_file_detects_bookmarks() {
15472 assert!(is_user_data_file(Path::new("/data/bookmarks.db")));
15473 assert!(is_user_data_file(Path::new("bookmarks.db")));
15474 }
15475
15476 #[test]
15477 fn is_user_data_file_detects_tui_state() {
15478 assert!(is_user_data_file(Path::new("/data/tui_state.json")));
15479 }
15480
15481 #[test]
15482 fn is_user_data_file_detects_sources_toml() {
15483 assert!(is_user_data_file(Path::new("/config/sources.toml")));
15484 }
15485
15486 #[test]
15487 fn is_user_data_file_detects_env() {
15488 assert!(is_user_data_file(Path::new(".env")));
15489 }
15490
15491 #[test]
15492 fn is_user_data_file_rejects_other_files() {
15493 assert!(!is_user_data_file(Path::new("index.db")));
15494 assert!(!is_user_data_file(Path::new("conversations.db")));
15495 assert!(!is_user_data_file(Path::new("random.txt")));
15496 }
15497
15498 #[test]
15503 fn create_backup_returns_none_for_nonexistent() {
15504 let dir = TempDir::new().unwrap();
15505 let db_path = dir.path().join("nonexistent.db");
15506 let result = create_backup(&db_path).unwrap();
15507 assert!(result.is_none());
15508 }
15509
15510 #[test]
15511 fn create_backup_creates_named_file() {
15512 let dir = TempDir::new().unwrap();
15513 let db_path = dir.path().join("test.db");
15514 std::fs::write(&db_path, b"test data").unwrap();
15515
15516 let backup_path = create_backup(&db_path).unwrap();
15517 assert!(backup_path.is_some());
15518 let backup = backup_path.unwrap();
15519 assert!(backup.exists());
15520 assert!(
15521 backup
15522 .file_name()
15523 .unwrap()
15524 .to_str()
15525 .unwrap()
15526 .contains("backup")
15527 );
15528 }
15529
15530 #[test]
15531 fn create_backup_paths_are_unique() {
15532 let dir = TempDir::new().unwrap();
15533 let db_path = dir.path().join("test.db");
15534 std::fs::write(&db_path, b"test data").unwrap();
15535
15536 let first = create_backup(&db_path).unwrap().unwrap();
15537 let second = create_backup(&db_path).unwrap().unwrap();
15538
15539 assert_ne!(first, second);
15540 assert!(first.exists());
15541 assert!(second.exists());
15542 }
15543
15544 #[test]
15545 fn lexical_rebuild_messages_query_uses_conversation_idx_access_path() {
15546 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
15547 use std::path::PathBuf;
15548
15549 let dir = TempDir::new().unwrap();
15550 let db_path = dir.path().join("agent_search.db");
15551 let storage = SqliteStorage::open(&db_path).unwrap();
15552
15553 let agent = Agent {
15554 id: None,
15555 slug: "claude_code".into(),
15556 name: "Claude Code".into(),
15557 version: None,
15558 kind: AgentKind::Cli,
15559 };
15560 let agent_id = storage.ensure_agent(&agent).unwrap();
15561 let conversation = Conversation {
15562 id: None,
15563 agent_slug: "claude_code".into(),
15564 workspace: Some(PathBuf::from("/tmp/workspace")),
15565 external_id: Some("conv-1".into()),
15566 title: Some("Lexical rebuild".into()),
15567 source_path: PathBuf::from("/tmp/conv-1.jsonl"),
15568 started_at: Some(1_700_000_000_000),
15569 ended_at: Some(1_700_000_000_100),
15570 approx_tokens: None,
15571 metadata_json: serde_json::Value::Null,
15572 messages: vec![
15573 Message {
15574 id: None,
15575 idx: 0,
15576 role: MessageRole::User,
15577 author: Some("user".into()),
15578 created_at: Some(1_700_000_000_010),
15579 content: "first".into(),
15580 extra_json: serde_json::Value::Null,
15581 snippets: Vec::new(),
15582 },
15583 Message {
15584 id: None,
15585 idx: 1,
15586 role: MessageRole::Agent,
15587 author: Some("assistant".into()),
15588 created_at: Some(1_700_000_000_020),
15589 content: "second".into(),
15590 extra_json: serde_json::Value::Null,
15591 snippets: Vec::new(),
15592 },
15593 ],
15594 source_id: LOCAL_SOURCE_ID.into(),
15595 origin_host: None,
15596 };
15597 storage
15598 .insert_conversation_tree(agent_id, None, &conversation)
15599 .unwrap();
15600 let conversation_id = storage
15601 .conn
15602 .query_row_map(
15603 "SELECT id FROM conversations WHERE external_id = ?1",
15604 fparams!["conv-1"],
15605 |row| row.get_typed::<i64>(0),
15606 )
15607 .unwrap();
15608
15609 let opcodes: Vec<String> = storage
15610 .conn
15611 .query_map_collect(
15612 "EXPLAIN \
15613 SELECT id, idx, role, author, created_at, content \
15614 FROM messages \
15615 WHERE conversation_id = ?1 ORDER BY idx",
15616 fparams![conversation_id],
15617 |row| row.get_typed(1),
15618 )
15619 .unwrap();
15620
15621 assert!(
15622 opcodes.iter().any(|opcode| opcode == "SeekGE"),
15623 "expected lexical rebuild message fetch to seek into the conversation_id/idx access path, got {opcodes:?}"
15624 );
15625 assert!(
15626 !opcodes.iter().any(|opcode| opcode == "SorterOpen"),
15627 "expected lexical rebuild message fetch to avoid sorter temp b-trees, got {opcodes:?}"
15628 );
15629 }
15630
15631 #[test]
15632 fn schema_check_rebuild_classification_ignores_transient_errors() {
15633 assert!(!schema_check_error_requires_rebuild(
15634 &frankensqlite::FrankenError::Busy
15635 ));
15636 assert!(!schema_check_error_requires_rebuild(
15637 &frankensqlite::FrankenError::DatabaseLocked {
15638 path: PathBuf::from("/tmp/test.db"),
15639 }
15640 ));
15641 assert!(!schema_check_error_requires_rebuild(
15642 &frankensqlite::FrankenError::CannotOpen {
15643 path: PathBuf::from("/tmp/test.db"),
15644 }
15645 ));
15646 assert!(!schema_check_error_requires_rebuild(
15647 &frankensqlite::FrankenError::Io(std::io::Error::other("disk hiccup"))
15648 ));
15649 }
15650
15651 #[test]
15652 fn schema_check_rebuild_classification_keeps_corruption_errors() {
15653 assert!(schema_check_error_requires_rebuild(
15654 &frankensqlite::FrankenError::DatabaseCorrupt {
15655 detail: "bad header".to_string(),
15656 }
15657 ));
15658 assert!(schema_check_error_requires_rebuild(
15659 &frankensqlite::FrankenError::WalCorrupt {
15660 detail: "bad wal".to_string(),
15661 }
15662 ));
15663 assert!(schema_check_error_requires_rebuild(
15664 &frankensqlite::FrankenError::NotADatabase {
15665 path: PathBuf::from("/tmp/test.db"),
15666 }
15667 ));
15668 assert!(schema_check_error_requires_rebuild(
15669 &frankensqlite::FrankenError::ShortRead {
15670 expected: 4096,
15671 actual: 64,
15672 }
15673 ));
15674 }
15675
15676 #[test]
15677 fn create_backup_refuses_raw_copy_after_retryable_vacuum_errors() {
15678 let retryable_errors = [
15679 frankensqlite::FrankenError::Busy,
15680 frankensqlite::FrankenError::BusyRecovery,
15681 frankensqlite::FrankenError::BusySnapshot {
15682 conflicting_pages: "1,2".to_string(),
15683 },
15684 frankensqlite::FrankenError::DatabaseLocked {
15685 path: PathBuf::from("/tmp/test.db"),
15686 },
15687 frankensqlite::FrankenError::LockFailed {
15688 detail: "fcntl lock still held".to_string(),
15689 },
15690 frankensqlite::FrankenError::WriteConflict { page: 7, holder: 9 },
15691 frankensqlite::FrankenError::SerializationFailure { page: 11 },
15692 frankensqlite::FrankenError::Internal("database is locked".to_string()),
15693 ];
15694
15695 for err in retryable_errors {
15696 assert!(
15697 backup_vacuum_error_requires_consistent_retry(&err),
15698 "retryable VACUUM failure must not fall back to raw bundle copy: {err}"
15699 );
15700 }
15701
15702 assert!(!backup_vacuum_error_requires_consistent_retry(
15703 &frankensqlite::FrankenError::NotADatabase {
15704 path: PathBuf::from("/tmp/test.db")
15705 }
15706 ));
15707 assert!(!backup_vacuum_error_requires_consistent_retry(
15708 &frankensqlite::FrankenError::DatabaseCorrupt {
15709 detail: "bad header".to_string()
15710 }
15711 ));
15712 }
15713
15714 #[test]
15715 fn create_backup_uses_hidden_vacuum_stage_path() {
15716 let backup_path = PathBuf::from("/tmp/test.db.backup.123.456.0");
15717 let stage_path = vacuum_stage_backup_path(&backup_path);
15718 let stage_name = stage_path
15719 .file_name()
15720 .and_then(|name| name.to_str())
15721 .unwrap_or_default();
15722
15723 assert!(stage_name.starts_with('.'));
15724 assert!(stage_name.ends_with(".vacuum-in-progress"));
15725 assert!(
15726 !is_backup_root_name(stage_name, "test.db.backup."),
15727 "incomplete VACUUM output must not be discoverable as a backup root"
15728 );
15729 }
15730
15731 #[test]
15732 fn create_backup_preserves_content() {
15733 let dir = TempDir::new().unwrap();
15734 let db_path = dir.path().join("test.db");
15735 let original_content = b"test database content 12345";
15736 std::fs::write(&db_path, original_content).unwrap();
15737
15738 let backup_path = create_backup(&db_path).unwrap().unwrap();
15739 let backup_content = std::fs::read(&backup_path).unwrap();
15740 assert_eq!(backup_content, original_content);
15741 }
15742
15743 #[test]
15744 fn create_backup_copies_sidecars_when_present() {
15745 let dir = TempDir::new().unwrap();
15746 let db_path = dir.path().join("test.db");
15747 std::fs::write(&db_path, b"db").unwrap();
15748 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
15749 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
15750
15751 let backup_path = create_backup(&db_path).unwrap().unwrap();
15752
15753 assert_eq!(
15754 std::fs::read(database_sidecar_path(&backup_path, "-wal")).unwrap(),
15755 b"wal"
15756 );
15757 assert_eq!(
15758 std::fs::read(database_sidecar_path(&backup_path, "-shm")).unwrap(),
15759 b"shm"
15760 );
15761 }
15762
15763 #[test]
15764 #[cfg(unix)]
15765 fn create_backup_rejects_symlink_root_during_raw_fallback() {
15766 use std::os::unix::fs::symlink;
15767
15768 let dir = TempDir::new().unwrap();
15769 let outside_db = dir.path().join("outside.db");
15770 let db_path = dir.path().join("test.db");
15771 std::fs::write(&outside_db, b"not sqlite").unwrap();
15772 symlink(&outside_db, &db_path).unwrap();
15773
15774 let err = create_backup(&db_path).unwrap_err();
15775
15776 assert!(
15777 err.to_string().contains("bundle symlink"),
15778 "unexpected error: {err:#}"
15779 );
15780 assert_eq!(std::fs::read(&outside_db).unwrap(), b"not sqlite");
15781 let backup_roots: Vec<_> = std::fs::read_dir(dir.path())
15782 .unwrap()
15783 .filter_map(|entry| entry.ok())
15784 .map(|entry| entry.file_name().to_string_lossy().into_owned())
15785 .filter(|name| name.starts_with("test.db.backup."))
15786 .collect();
15787 assert!(
15788 backup_roots.is_empty(),
15789 "symlinked backup source must not publish backup roots: {backup_roots:?}"
15790 );
15791 }
15792
15793 #[test]
15794 #[cfg(unix)]
15795 fn create_backup_rejects_symlink_sidecar_without_partial_backup() {
15796 use std::os::unix::fs::symlink;
15797
15798 let dir = TempDir::new().unwrap();
15799 let db_path = dir.path().join("test.db");
15800 let outside_wal = dir.path().join("outside.wal");
15801 let wal_path = database_sidecar_path(&db_path, "-wal");
15802 std::fs::write(&db_path, b"not sqlite").unwrap();
15803 std::fs::write(&outside_wal, b"outside wal").unwrap();
15804 symlink(&outside_wal, &wal_path).unwrap();
15805
15806 let err = create_backup(&db_path).unwrap_err();
15807
15808 assert!(
15809 err.to_string().contains("bundle symlink"),
15810 "unexpected error: {err:#}"
15811 );
15812 assert_eq!(std::fs::read(&outside_wal).unwrap(), b"outside wal");
15813 let backup_roots: Vec<_> = std::fs::read_dir(dir.path())
15814 .unwrap()
15815 .filter_map(|entry| entry.ok())
15816 .map(|entry| entry.file_name().to_string_lossy().into_owned())
15817 .filter(|name| name.starts_with("test.db.backup."))
15818 .collect();
15819 assert!(
15820 backup_roots.is_empty(),
15821 "sidecar preflight failure must not leave a partial backup root: {backup_roots:?}"
15822 );
15823 }
15824
15825 #[test]
15830 fn cleanup_old_backups_keeps_recent() {
15831 let dir = TempDir::new().unwrap();
15832 let db_path = dir.path().join("test.db");
15833
15834 for i in 0..5 {
15836 let backup_name = format!("test.db.backup.{}", 1000 + i);
15837 std::fs::write(dir.path().join(&backup_name), format!("backup {i}")).unwrap();
15838 }
15839
15840 cleanup_old_backups(&db_path, 3).unwrap();
15841
15842 let backups: Vec<_> = std::fs::read_dir(dir.path())
15844 .unwrap()
15845 .filter_map(|e| e.ok())
15846 .filter(|e| e.file_name().to_str().unwrap_or("").contains("backup"))
15847 .collect();
15848
15849 assert_eq!(backups.len(), 3);
15850 }
15851
15852 #[test]
15853 fn cleanup_old_backups_ignores_wal_and_shm_sidecars() {
15854 let dir = TempDir::new().unwrap();
15855 let db_path = dir.path().join("test.db");
15856
15857 for i in 0..3 {
15858 let backup_name = format!("test.db.backup.{}", 1000 + i);
15859 let backup_path = dir.path().join(&backup_name);
15860 std::fs::write(&backup_path, format!("backup {i}")).unwrap();
15861 std::fs::write(format!("{}-wal", backup_path.display()), b"wal").unwrap();
15862 std::fs::write(format!("{}-shm", backup_path.display()), b"shm").unwrap();
15863 std::thread::sleep(std::time::Duration::from_millis(20));
15864 }
15865
15866 cleanup_old_backups(&db_path, 2).unwrap();
15867
15868 let mut roots = Vec::new();
15869 let mut wals = Vec::new();
15870 let mut shms = Vec::new();
15871 for entry in std::fs::read_dir(dir.path())
15872 .unwrap()
15873 .filter_map(|e| e.ok())
15874 {
15875 let name = entry.file_name().to_string_lossy().into_owned();
15876 if name.ends_with("-wal") {
15877 wals.push(name);
15878 } else if name.ends_with("-shm") {
15879 shms.push(name);
15880 } else if name.contains("backup") {
15881 roots.push(name);
15882 }
15883 }
15884
15885 assert_eq!(roots.len(), 2, "should keep two backup roots");
15886 assert_eq!(
15887 wals.len(),
15888 2,
15889 "should keep WAL sidecars only for retained backups"
15890 );
15891 assert_eq!(
15892 shms.len(),
15893 2,
15894 "should keep SHM sidecars only for retained backups"
15895 );
15896 }
15897
15898 #[test]
15899 fn move_database_bundle_moves_database_and_sidecars() {
15900 let dir = TempDir::new().unwrap();
15901 let db_path = dir.path().join("test.db");
15902 let backup_path = dir.path().join("test.db.corrupt");
15903
15904 std::fs::write(&db_path, b"db").unwrap();
15905 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
15906 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
15907
15908 let moved = move_database_bundle(&db_path, &backup_path).unwrap();
15909 assert_eq!(
15910 moved,
15911 DatabaseBundleMoveResult {
15912 database: true,
15913 wal: true,
15914 shm: true
15915 }
15916 );
15917 assert!(moved.moved_any());
15918
15919 assert!(!db_path.exists());
15920 assert!(!database_sidecar_path(&db_path, "-wal").exists());
15921 assert!(!database_sidecar_path(&db_path, "-shm").exists());
15922
15923 assert_eq!(std::fs::read(&backup_path).unwrap(), b"db");
15924 assert_eq!(
15925 std::fs::read(database_sidecar_path(&backup_path, "-wal")).unwrap(),
15926 b"wal"
15927 );
15928 assert_eq!(
15929 std::fs::read(database_sidecar_path(&backup_path, "-shm")).unwrap(),
15930 b"shm"
15931 );
15932 }
15933
15934 #[test]
15935 fn move_database_bundle_preserves_orphan_sidecars_without_main_db() {
15936 let dir = TempDir::new().unwrap();
15937 let db_path = dir.path().join("test.db");
15938 let backup_path = dir.path().join("test.db.corrupt");
15939
15940 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
15941 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
15942
15943 let moved = move_database_bundle(&db_path, &backup_path).unwrap();
15944 assert_eq!(
15945 moved,
15946 DatabaseBundleMoveResult {
15947 database: false,
15948 wal: true,
15949 shm: true
15950 }
15951 );
15952 assert!(moved.moved_any());
15953 assert!(!db_path.exists());
15954 assert!(!database_sidecar_path(&db_path, "-wal").exists());
15955 assert!(!database_sidecar_path(&db_path, "-shm").exists());
15956 assert_eq!(
15957 std::fs::read(database_sidecar_path(&backup_path, "-wal")).unwrap(),
15958 b"wal"
15959 );
15960 assert_eq!(
15961 std::fs::read(database_sidecar_path(&backup_path, "-shm")).unwrap(),
15962 b"shm"
15963 );
15964 }
15965
15966 #[test]
15967 #[cfg(unix)]
15968 fn move_database_bundle_moves_dangling_symlink_database_root() {
15969 use std::os::unix::fs::symlink;
15970
15971 let dir = TempDir::new().unwrap();
15972 let db_path = dir.path().join("test.db");
15973 let backup_path = dir.path().join("test.db.corrupt");
15974 let missing_target = dir.path().join("missing-target.db");
15975
15976 symlink(&missing_target, &db_path).unwrap();
15977
15978 let moved = move_database_bundle(&db_path, &backup_path).unwrap();
15979
15980 assert_eq!(
15981 moved,
15982 DatabaseBundleMoveResult {
15983 database: true,
15984 wal: false,
15985 shm: false
15986 }
15987 );
15988 assert!(std::fs::symlink_metadata(&db_path).is_err());
15989 assert!(
15990 std::fs::symlink_metadata(&backup_path)
15991 .unwrap()
15992 .file_type()
15993 .is_symlink()
15994 );
15995 assert!(!missing_target.exists());
15996 }
15997
15998 #[test]
15999 #[cfg(unix)]
16000 fn move_database_bundle_moves_dangling_symlink_sidecars_without_main_db() {
16001 use std::os::unix::fs::symlink;
16002
16003 let dir = TempDir::new().unwrap();
16004 let db_path = dir.path().join("test.db");
16005 let backup_path = dir.path().join("test.db.corrupt");
16006 let missing_wal_target = dir.path().join("missing-wal");
16007 let missing_shm_target = dir.path().join("missing-shm");
16008 let wal_path = database_sidecar_path(&db_path, "-wal");
16009 let shm_path = database_sidecar_path(&db_path, "-shm");
16010
16011 symlink(&missing_wal_target, &wal_path).unwrap();
16012 symlink(&missing_shm_target, &shm_path).unwrap();
16013
16014 let moved = move_database_bundle(&db_path, &backup_path).unwrap();
16015
16016 assert_eq!(
16017 moved,
16018 DatabaseBundleMoveResult {
16019 database: false,
16020 wal: true,
16021 shm: true
16022 }
16023 );
16024 assert!(std::fs::symlink_metadata(&wal_path).is_err());
16025 assert!(std::fs::symlink_metadata(&shm_path).is_err());
16026 assert!(
16027 std::fs::symlink_metadata(database_sidecar_path(&backup_path, "-wal"))
16028 .unwrap()
16029 .file_type()
16030 .is_symlink()
16031 );
16032 assert!(
16033 std::fs::symlink_metadata(database_sidecar_path(&backup_path, "-shm"))
16034 .unwrap()
16035 .file_type()
16036 .is_symlink()
16037 );
16038 assert!(!missing_wal_target.exists());
16039 assert!(!missing_shm_target.exists());
16040 }
16041
16042 #[test]
16043 fn copy_database_bundle_copies_database_and_sidecars() {
16044 let dir = TempDir::new().unwrap();
16045 let db_path = dir.path().join("test.db");
16046 let copied_path = dir.path().join("copy.db");
16047
16048 std::fs::write(&db_path, b"db").unwrap();
16049 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
16050 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
16051
16052 copy_database_bundle(&db_path, &copied_path).unwrap();
16053
16054 assert_eq!(std::fs::read(&copied_path).unwrap(), b"db");
16055 assert_eq!(
16056 std::fs::read(database_sidecar_path(&copied_path, "-wal")).unwrap(),
16057 b"wal"
16058 );
16059 assert_eq!(
16060 std::fs::read(database_sidecar_path(&copied_path, "-shm")).unwrap(),
16061 b"shm"
16062 );
16063 assert_eq!(std::fs::read(&db_path).unwrap(), b"db");
16064 }
16065
16066 #[test]
16067 fn copy_database_bundle_creates_destination_parent() {
16068 let dir = TempDir::new().unwrap();
16069 let db_path = dir.path().join("test.db");
16070 let copied_path = dir.path().join("nested/copies/copy.db");
16071
16072 std::fs::write(&db_path, b"db").unwrap();
16073 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
16074
16075 copy_database_bundle(&db_path, &copied_path).unwrap();
16076
16077 assert!(copied_path.parent().unwrap().is_dir());
16078 assert_eq!(std::fs::read(&copied_path).unwrap(), b"db");
16079 assert_eq!(
16080 std::fs::read(database_sidecar_path(&copied_path, "-wal")).unwrap(),
16081 b"wal"
16082 );
16083 }
16084
16085 #[test]
16086 #[cfg(unix)]
16087 fn copy_database_bundle_rejects_symlink_source_root() {
16088 use std::os::unix::fs::symlink;
16089
16090 let dir = TempDir::new().unwrap();
16091 let outside_db = dir.path().join("outside.db");
16092 let db_path = dir.path().join("test.db");
16093 let copied_path = dir.path().join("copy.db");
16094
16095 std::fs::write(&outside_db, b"outside").unwrap();
16096 symlink(&outside_db, &db_path).unwrap();
16097
16098 let err = copy_database_bundle(&db_path, &copied_path).unwrap_err();
16099
16100 assert!(
16101 err.to_string().contains("bundle symlink"),
16102 "unexpected error: {err:#}"
16103 );
16104 assert!(!copied_path.exists());
16105 assert_eq!(std::fs::read(&outside_db).unwrap(), b"outside");
16106 }
16107
16108 #[test]
16109 #[cfg(unix)]
16110 fn copy_database_bundle_rejects_symlink_sidecar() {
16111 use std::os::unix::fs::symlink;
16112
16113 let dir = TempDir::new().unwrap();
16114 let db_path = dir.path().join("test.db");
16115 let copied_path = dir.path().join("copy.db");
16116 let outside_wal = dir.path().join("outside.wal");
16117 let wal_path = database_sidecar_path(&db_path, "-wal");
16118
16119 std::fs::write(&db_path, b"db").unwrap();
16120 std::fs::write(&outside_wal, b"outside wal").unwrap();
16121 symlink(&outside_wal, &wal_path).unwrap();
16122
16123 let err = copy_database_bundle(&db_path, &copied_path).unwrap_err();
16124
16125 assert!(
16126 err.to_string().contains("bundle symlink"),
16127 "unexpected error: {err:#}"
16128 );
16129 assert_eq!(std::fs::read(&outside_wal).unwrap(), b"outside wal");
16130 assert!(!copied_path.exists());
16131 assert!(!database_sidecar_path(&copied_path, "-wal").exists());
16132 }
16133
16134 #[test]
16135 fn move_database_bundle_creates_destination_parent_and_moves_sidecars() {
16136 let dir = TempDir::new().unwrap();
16137 let db_path = dir.path().join("test.db");
16138 let backup_path = dir.path().join("nested/backups/test.db.corrupt");
16139
16140 std::fs::write(&db_path, b"db").unwrap();
16141 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
16142 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
16143
16144 let moved = move_database_bundle(&db_path, &backup_path).unwrap();
16145 assert_eq!(
16146 moved,
16147 DatabaseBundleMoveResult {
16148 database: true,
16149 wal: true,
16150 shm: true
16151 }
16152 );
16153 assert!(backup_path.parent().unwrap().is_dir());
16154 assert_eq!(std::fs::read(&backup_path).unwrap(), b"db");
16155 assert_eq!(
16156 std::fs::read(database_sidecar_path(&backup_path, "-wal")).unwrap(),
16157 b"wal"
16158 );
16159 assert_eq!(
16160 std::fs::read(database_sidecar_path(&backup_path, "-shm")).unwrap(),
16161 b"shm"
16162 );
16163 }
16164
16165 #[test]
16166 fn remove_database_files_removes_orphan_sidecars_without_main_db() {
16167 let dir = TempDir::new().unwrap();
16168 let db_path = dir.path().join("test.db");
16169
16170 std::fs::write(database_sidecar_path(&db_path, "-wal"), b"wal").unwrap();
16171 std::fs::write(database_sidecar_path(&db_path, "-shm"), b"shm").unwrap();
16172
16173 remove_database_files(&db_path).unwrap();
16174
16175 assert!(!db_path.exists());
16176 assert!(!database_sidecar_path(&db_path, "-wal").exists());
16177 assert!(!database_sidecar_path(&db_path, "-shm").exists());
16178 }
16179
16180 #[test]
16181 fn cleanup_old_backups_ignores_backup_named_directories() {
16182 let dir = TempDir::new().unwrap();
16183 let db_path = dir.path().join("test.db");
16184
16185 for i in 0..3 {
16186 let backup_name = format!("test.db.backup.{}", 1000 + i);
16187 std::fs::write(dir.path().join(&backup_name), format!("backup {i}")).unwrap();
16188 }
16189 std::fs::create_dir(dir.path().join("test.db.backup.directory")).unwrap();
16190
16191 cleanup_old_backups(&db_path, 2).unwrap();
16192
16193 let mut backup_files = Vec::new();
16194 let mut backup_dirs = Vec::new();
16195 for entry in std::fs::read_dir(dir.path())
16196 .unwrap()
16197 .filter_map(|e| e.ok())
16198 {
16199 let name = entry.file_name().to_string_lossy().into_owned();
16200 if !name.starts_with("test.db.backup.") {
16201 continue;
16202 }
16203 if entry.path().is_dir() {
16204 backup_dirs.push(name);
16205 } else {
16206 backup_files.push(name);
16207 }
16208 }
16209
16210 assert_eq!(
16211 backup_files.len(),
16212 2,
16213 "only real backup files count toward retention"
16214 );
16215 assert_eq!(
16216 backup_dirs.len(),
16217 1,
16218 "backup-named directories should be ignored"
16219 );
16220 }
16221
16222 #[test]
16227 fn open_creates_new_database() {
16228 let dir = TempDir::new().unwrap();
16229 let db_path = dir.path().join("new.db");
16230 assert!(!db_path.exists());
16231
16232 let storage = SqliteStorage::open(&db_path).unwrap();
16233 assert!(db_path.exists());
16234 storage.close().unwrap();
16235 }
16236
16237 #[test]
16238 fn open_readonly_fails_for_nonexistent() {
16239 let dir = TempDir::new().unwrap();
16240 let db_path = dir.path().join("nonexistent.db");
16241 let result = SqliteStorage::open_readonly(&db_path);
16242 assert!(result.is_err());
16243 }
16244
16245 #[test]
16246 fn open_readonly_succeeds_for_existing() {
16247 let dir = TempDir::new().unwrap();
16248 let db_path = dir.path().join("existing.db");
16249
16250 let _storage = SqliteStorage::open(&db_path).unwrap();
16252 drop(_storage);
16253
16254 let storage = SqliteStorage::open_readonly(&db_path).unwrap();
16256 assert!(storage.schema_version().is_ok());
16257 }
16258
16259 #[test]
16260 fn reopen_existing_current_schema_is_idempotent() {
16261 let dir = TempDir::new().unwrap();
16262 let db_path = dir.path().join("existing.db");
16263
16264 {
16266 let storage = SqliteStorage::open(&db_path).unwrap();
16267 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
16268 }
16269
16270 let reopened = SqliteStorage::open(&db_path).unwrap();
16272 assert_eq!(
16273 reopened.schema_version().unwrap(),
16274 CURRENT_SCHEMA_VERSION,
16275 "reopening current schema DB should be idempotent"
16276 );
16277 }
16278
16279 #[test]
16280 fn open_or_rebuild_current_schema_does_not_trigger_rebuild() {
16281 let dir = TempDir::new().unwrap();
16282 let db_path = dir.path().join("existing.db");
16283
16284 {
16286 let storage = SqliteStorage::open(&db_path).unwrap();
16287 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
16288 }
16289
16290 let reopened = SqliteStorage::open_or_rebuild(&db_path)
16292 .expect("current schema DB should open without rebuild");
16293 assert_eq!(reopened.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
16294 }
16295
16296 #[test]
16297 fn open_or_rebuild_does_not_treat_non_database_paths_as_corruption() {
16298 let dir = TempDir::new().unwrap();
16299 let db_path = dir.path().join("db_dir");
16300 std::fs::create_dir(&db_path).unwrap();
16301
16302 let result = SqliteStorage::open_or_rebuild(&db_path);
16303
16304 assert!(
16305 matches!(
16306 result,
16307 Err(MigrationError::Database(_)) | Err(MigrationError::Io(_))
16308 ),
16309 "non-database path should report the underlying open error without rebuild"
16310 );
16311
16312 assert!(
16313 db_path.is_dir(),
16314 "non-database directory must be left in place"
16315 );
16316 }
16317
16318 #[test]
16323 fn schema_version_returns_current() {
16324 let dir = TempDir::new().unwrap();
16325 let db_path = dir.path().join("test.db");
16326 let storage = SqliteStorage::open(&db_path).unwrap();
16327 let version = storage.schema_version().unwrap();
16328 assert!(version >= 5, "Schema version should be at least 5");
16329 }
16330
16331 #[test]
16336 fn migration_v13_creates_analytics_tables() {
16337 let dir = TempDir::new().unwrap();
16338 let db_path = dir.path().join("test.db");
16339 let storage = SqliteStorage::open(&db_path).unwrap();
16340
16341 let version = storage.schema_version().unwrap();
16343 assert_eq!(
16344 version, CURRENT_SCHEMA_VERSION,
16345 "Schema version must match CURRENT_SCHEMA_VERSION after migration"
16346 );
16347
16348 let conn = storage.raw();
16349
16350 fn col_names(conn: &FrankenConnection, table: &str) -> Vec<String> {
16352 conn.query_map_collect(
16353 &format!("PRAGMA table_info({})", table),
16354 fparams![],
16355 |row: &FrankenRow| row.get_typed(1),
16356 )
16357 .unwrap()
16358 }
16359
16360 fn idx_names(conn: &FrankenConnection, table: &str) -> Vec<String> {
16362 conn.query_map_collect(
16363 &format!("PRAGMA index_list({})", table),
16364 fparams![],
16365 |row: &FrankenRow| row.get_typed(1),
16366 )
16367 .unwrap()
16368 }
16369
16370 let mm_cols = col_names(conn, "message_metrics");
16372 for expected in &[
16373 "message_id",
16374 "hour_id",
16375 "day_id",
16376 "content_tokens_est",
16377 "model_name",
16378 "model_family",
16379 "model_tier",
16380 "provider",
16381 "api_input_tokens",
16382 "has_plan",
16383 "agent_slug",
16384 "role",
16385 "api_data_source",
16386 ] {
16387 assert!(
16388 mm_cols.contains(&expected.to_string()),
16389 "message_metrics missing column: {expected}"
16390 );
16391 }
16392
16393 let uh_cols = col_names(conn, "usage_hourly");
16395 for expected in &[
16396 "hour_id",
16397 "plan_message_count",
16398 "plan_content_tokens_est_total",
16399 "plan_api_tokens_total",
16400 "api_coverage_message_count",
16401 "content_tokens_est_user",
16402 "api_thinking_tokens_total",
16403 ] {
16404 assert!(
16405 uh_cols.contains(&expected.to_string()),
16406 "usage_hourly missing column: {expected}"
16407 );
16408 }
16409
16410 let ud_cols = col_names(conn, "usage_daily");
16412 for expected in &[
16413 "day_id",
16414 "plan_content_tokens_est_total",
16415 "plan_api_tokens_total",
16416 "api_thinking_tokens_total",
16417 "content_tokens_est_assistant",
16418 "message_count",
16419 ] {
16420 assert!(
16421 ud_cols.contains(&expected.to_string()),
16422 "usage_daily missing column: {expected}"
16423 );
16424 }
16425
16426 let umd_cols = col_names(conn, "usage_models_daily");
16428 for expected in &[
16429 "day_id",
16430 "model_family",
16431 "model_tier",
16432 "message_count",
16433 "api_tokens_total",
16434 "api_coverage_message_count",
16435 ] {
16436 assert!(
16437 umd_cols.contains(&expected.to_string()),
16438 "usage_models_daily missing column: {expected}"
16439 );
16440 }
16441
16442 let mm_idxs = idx_names(conn, "message_metrics");
16444 assert!(
16445 mm_idxs.iter().any(|n| n.contains("idx_mm_hour")),
16446 "message_metrics must have hour index"
16447 );
16448 assert!(
16449 mm_idxs.iter().any(|n| n.contains("idx_mm_agent_day")),
16450 "message_metrics must have agent+day index"
16451 );
16452 assert!(
16453 mm_idxs
16454 .iter()
16455 .any(|n| n.contains("idx_mm_model_family_day")),
16456 "message_metrics must have model_family+day index"
16457 );
16458
16459 let uh_idxs = idx_names(conn, "usage_hourly");
16461 assert!(
16462 uh_idxs.iter().any(|n| n.contains("idx_uh_agent")),
16463 "usage_hourly must have agent index"
16464 );
16465
16466 let ud_idxs = idx_names(conn, "usage_daily");
16468 assert!(
16469 ud_idxs.iter().any(|n| n.contains("idx_ud_agent")),
16470 "usage_daily must have agent index"
16471 );
16472
16473 let umd_idxs = idx_names(conn, "usage_models_daily");
16475 assert!(
16476 umd_idxs.iter().any(|n| n.contains("idx_umd_model_day")),
16477 "usage_models_daily must have model+day index"
16478 );
16479
16480 let conversation_cols = col_names(conn, "conversations");
16481 assert!(
16482 conversation_cols.contains(&"last_message_idx".to_string())
16483 && conversation_cols.contains(&"last_message_created_at".to_string()),
16484 "fresh schema must include V15 tail columns without ALTER TABLE on conversations"
16485 );
16486 let fts_schema_rows: i64 = conn
16487 .query_row_map(
16488 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
16489 fparams![],
16490 |row: &FrankenRow| row.get_typed(0),
16491 )
16492 .unwrap();
16493 assert_eq!(
16494 fts_schema_rows, 0,
16495 "fresh schema should not create and immediately drop derived fts_messages"
16496 );
16497 let integrity: Vec<String> = conn
16498 .query_map_collect("PRAGMA integrity_check;", fparams![], |row: &FrankenRow| {
16499 row.get_typed(0)
16500 })
16501 .unwrap();
16502 assert_eq!(
16503 integrity,
16504 vec!["ok".to_string()],
16505 "fresh schema must pass SQLite integrity_check"
16506 );
16507 }
16508
16509 #[test]
16510 fn hour_id_round_trip() {
16511 let ts_ms = 1_770_508_800_000_i64;
16513 let hour_id = SqliteStorage::hour_id_from_millis(ts_ms);
16514 let day_id = SqliteStorage::day_id_from_millis(ts_ms);
16515
16516 assert_eq!(hour_id / 24, day_id, "hour_id/24 should equal day_id");
16518
16519 let back = SqliteStorage::millis_from_hour_id(hour_id);
16521 assert!(
16522 back <= ts_ms && ts_ms - back < 3_600_000,
16523 "Round-trip should land within the same hour"
16524 );
16525 }
16526
16527 #[test]
16528 fn day_and_hour_ids_floor_negative_millis() {
16529 let ts_ms = -1_i64;
16532 let expected_secs = -1_i64;
16533 let epoch_2020_secs = 1_577_836_800_i64;
16534
16535 assert_eq!(
16536 SqliteStorage::day_id_from_millis(ts_ms),
16537 (expected_secs - epoch_2020_secs).div_euclid(86_400)
16538 );
16539 assert_eq!(
16540 SqliteStorage::hour_id_from_millis(ts_ms),
16541 (expected_secs - epoch_2020_secs).div_euclid(3_600)
16542 );
16543 }
16544
16545 #[test]
16546 fn migration_v13_from_v10() {
16547 let dir = TempDir::new().unwrap();
16548 let db_path = dir.path().join("test.db");
16549
16550 {
16552 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
16553 conn.execute_batch("PRAGMA journal_mode=WAL;").unwrap();
16554 conn.execute_batch(
16555 "CREATE TABLE IF NOT EXISTS meta (key TEXT PRIMARY KEY, value TEXT);",
16556 )
16557 .unwrap();
16558 conn.execute("INSERT OR REPLACE INTO meta(key, value) VALUES('schema_version', '10')")
16559 .unwrap();
16560 conn.execute_batch(MIGRATION_V1).unwrap();
16565 conn.execute_batch(MIGRATION_V2).unwrap();
16566 conn.execute_batch(MIGRATION_V4).unwrap();
16567 conn.execute_batch(MIGRATION_V5).unwrap();
16568 conn.execute_batch(MIGRATION_V6).unwrap();
16569 conn.execute_batch(MIGRATION_V7).unwrap();
16570 conn.execute_batch(MIGRATION_V8).unwrap();
16571 conn.execute_batch(MIGRATION_V9).unwrap();
16572 conn.execute_batch(MIGRATION_V10).unwrap();
16573 conn.execute("UPDATE meta SET value = '10' WHERE key = 'schema_version'")
16574 .unwrap();
16575 }
16576 materialize_fresh_fts_schema_via_rusqlite(&db_path).unwrap();
16577
16578 let storage = SqliteStorage::open(&db_path).unwrap();
16580 let version = storage.schema_version().unwrap();
16581 assert_eq!(
16582 version, CURRENT_SCHEMA_VERSION,
16583 "Should have migrated from v10 to the current schema"
16584 );
16585
16586 let count: i64 = storage
16588 .raw()
16589 .query_row_map(
16590 "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name IN ('message_metrics', 'usage_hourly', 'usage_daily', 'usage_models_daily')",
16591 &[],
16592 |row: &FrankenRow| row.get_typed::<i64>(0),
16593 )
16594 .unwrap();
16595 assert_eq!(count, 4, "All 4 analytics tables should exist");
16596 }
16597
16598 #[test]
16603 fn analytics_ingest_populates_metrics_and_rollups() {
16604 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
16605 use std::path::PathBuf;
16606
16607 let dir = TempDir::new().unwrap();
16608 let db_path = dir.path().join("test.db");
16609 let storage = SqliteStorage::open(&db_path).unwrap();
16610
16611 let agent = Agent {
16613 id: None,
16614 slug: "claude_code".into(),
16615 name: "Claude Code".into(),
16616 version: Some("1.0".into()),
16617 kind: AgentKind::Cli,
16618 };
16619 let agent_id = storage.ensure_agent(&agent).unwrap();
16620
16621 let ts_ms = 1_770_551_400_000_i64;
16624 let expected_day = SqliteStorage::day_id_from_millis(ts_ms);
16625 let expected_hour = SqliteStorage::hour_id_from_millis(ts_ms);
16626
16627 let usage_json = serde_json::json!({
16629 "message": {
16630 "model": "claude-opus-4-6",
16631 "usage": {
16632 "input_tokens": 100,
16633 "output_tokens": 50,
16634 "cache_read_input_tokens": 200,
16635 "cache_creation_input_tokens": 30,
16636 "service_tier": "standard"
16637 }
16638 }
16639 });
16640
16641 let conv = Conversation {
16642 id: None,
16643 agent_slug: "claude_code".into(),
16644 workspace: None,
16645 external_id: Some("test-conv-1".into()),
16646 title: Some("Test conversation".into()),
16647 source_path: PathBuf::from("/tmp/test.jsonl"),
16648 started_at: Some(ts_ms),
16649 ended_at: Some(ts_ms + 60_000),
16650 approx_tokens: None,
16651 metadata_json: serde_json::Value::Null,
16652 messages: vec![
16653 Message {
16654 id: None,
16655 idx: 0,
16656 role: MessageRole::User,
16657 author: None,
16658 created_at: Some(ts_ms),
16659 content: "Hello, can you help me with a plan?".into(),
16660 extra_json: serde_json::Value::Null,
16661 snippets: vec![],
16662 },
16663 Message {
16664 id: None,
16665 idx: 1,
16666 role: MessageRole::Agent,
16667 author: None,
16668 created_at: Some(ts_ms + 30_000),
16669 content: "## Plan\n\n1. First step\n2. Second step\n3. Third step".into(),
16670 extra_json: usage_json,
16671 snippets: vec![],
16672 },
16673 Message {
16674 id: None,
16675 idx: 2,
16676 role: MessageRole::User,
16677 author: None,
16678 created_at: Some(ts_ms + 60_000),
16679 content: "Great, let's proceed!".into(),
16680 extra_json: serde_json::Value::Null,
16681 snippets: vec![],
16682 },
16683 ],
16684 source_id: "local".into(),
16685 origin_host: None,
16686 };
16687
16688 let outcomes = storage
16689 .insert_conversations_batched(&[(agent_id, None, &conv)])
16690 .unwrap();
16691 assert_eq!(outcomes.len(), 1);
16692 assert_eq!(outcomes[0].inserted_indices.len(), 3);
16693
16694 let conn = storage.raw();
16695
16696 let mm_count: i64 = conn
16698 .query_row_map("SELECT COUNT(*) FROM message_metrics", &[], |row| {
16699 row.get_typed::<i64>(0)
16700 })
16701 .unwrap();
16702 assert_eq!(mm_count, 3, "Should have 3 message_metrics rows");
16703
16704 #[allow(clippy::type_complexity)]
16706 let rows: Vec<(i64, i64, String, i64, i64, String, String, String, String)> = conn
16707 .query_map_collect(
16708 "SELECT hour_id, day_id, role, content_tokens_est, has_plan, api_data_source, model_family, model_tier, provider FROM message_metrics ORDER BY message_id",
16709 fparams![],
16710 |row: &FrankenRow| {
16711 Ok((
16712 row.get_typed(0)?,
16713 row.get_typed(1)?,
16714 row.get_typed(2)?,
16715 row.get_typed(3)?,
16716 row.get_typed(4)?,
16717 row.get_typed(5)?,
16718 row.get_typed(6)?,
16719 row.get_typed(7)?,
16720 row.get_typed(8)?,
16721 ))
16722 },
16723 )
16724 .unwrap();
16725
16726 assert_eq!(rows.len(), 3);
16727 assert_eq!(rows[0].0, expected_hour);
16729 assert_eq!(rows[0].1, expected_day);
16730 assert_eq!(rows[0].2, "user");
16732 assert_eq!(
16734 rows[1].4, 1,
16735 "Assistant message with plan should have has_plan=1"
16736 );
16737 assert_eq!(
16739 rows[1].5, "api",
16740 "Claude Code assistant message should have api data source"
16741 );
16742 assert_eq!(rows[0].5, "estimated");
16744 assert_eq!(rows[2].5, "estimated");
16745 assert_eq!(rows[1].6, "claude");
16746 assert_eq!(rows[1].7, "opus");
16747 assert_eq!(rows[1].8, "anthropic");
16748 assert_eq!(rows[0].6, "unknown");
16749 let user_chars = "Hello, can you help me with a plan?".len() as i64;
16751 assert_eq!(rows[0].3, user_chars / 4);
16752
16753 let (uh_msg, uh_user, uh_asst, uh_plan, uh_plan_content, uh_plan_api, uh_api_cov): (
16755 i64,
16756 i64,
16757 i64,
16758 i64,
16759 i64,
16760 i64,
16761 i64,
16762 ) = conn
16763 .query_row_map(
16764 "SELECT message_count, user_message_count, assistant_message_count, plan_message_count,
16765 plan_content_tokens_est_total, plan_api_tokens_total, api_coverage_message_count
16766 FROM usage_hourly WHERE hour_id = ?",
16767 fparams![expected_hour],
16768 |row: &FrankenRow| {
16769 Ok((
16770 row.get_typed(0)?,
16771 row.get_typed(1)?,
16772 row.get_typed(2)?,
16773 row.get_typed(3)?,
16774 row.get_typed(4)?,
16775 row.get_typed(5)?,
16776 row.get_typed(6)?,
16777 ))
16778 },
16779 )
16780 .unwrap();
16781 assert_eq!(uh_msg, 3, "Hourly rollup should have 3 messages");
16782 assert_eq!(uh_user, 2, "Hourly rollup should have 2 user messages");
16783 assert_eq!(uh_asst, 1, "Hourly rollup should have 1 assistant message");
16784 assert_eq!(uh_plan, 1, "Hourly rollup should have 1 plan message");
16785 assert!(
16786 uh_plan_content > 0,
16787 "Hourly rollup should include plan content tokens"
16788 );
16789 assert!(
16790 uh_plan_api > 0,
16791 "Hourly rollup should include plan API tokens"
16792 );
16793 assert_eq!(
16794 uh_api_cov, 1,
16795 "Hourly rollup should have 1 API-covered message"
16796 );
16797
16798 let (ud_msg, ud_api_cov): (i64, i64) = conn
16800 .query_row_map(
16801 "SELECT message_count, api_coverage_message_count FROM usage_daily WHERE day_id = ?",
16802 fparams![expected_day],
16803 |row: &FrankenRow| Ok((row.get_typed(0)?, row.get_typed(1)?)),
16804 )
16805 .unwrap();
16806 assert_eq!(ud_msg, 3, "Daily rollup should match hourly");
16807 assert_eq!(
16808 ud_api_cov, 1,
16809 "Daily api_coverage should be 1 (only assistant msg has real API data)"
16810 );
16811
16812 let api_only_input: i64 = conn
16814 .query_row_map(
16815 "SELECT COALESCE(SUM(api_input_tokens), 0) FROM message_metrics WHERE day_id = ? AND api_data_source = 'api'",
16816 fparams![expected_day],
16817 |row: &FrankenRow| row.get_typed::<i64>(0),
16818 )
16819 .unwrap();
16820 assert_eq!(
16821 api_only_input, 100,
16822 "Only API-sourced input tokens should be 100"
16823 );
16824
16825 let mm_total_content_est: i64 = conn
16827 .query_row_map(
16828 "SELECT SUM(content_tokens_est) FROM message_metrics WHERE day_id = ?",
16829 fparams![expected_day],
16830 |row| row.get_typed::<i64>(0),
16831 )
16832 .unwrap();
16833 let mm_plan_content_est: i64 = conn
16834 .query_row_map(
16835 "SELECT COALESCE(SUM(content_tokens_est), 0) FROM message_metrics WHERE day_id = ? AND has_plan = 1",
16836 fparams![expected_day],
16837 |row: &FrankenRow| row.get_typed::<i64>(0),
16838 )
16839 .unwrap();
16840 let mm_plan_api_total: i64 = conn
16841 .query_row_map(
16842 "SELECT COALESCE(SUM(COALESCE(api_input_tokens, 0) + COALESCE(api_output_tokens, 0) + COALESCE(api_cache_read_tokens, 0) + COALESCE(api_cache_creation_tokens, 0) + COALESCE(api_thinking_tokens, 0)), 0)
16843 FROM message_metrics WHERE day_id = ? AND has_plan = 1 AND api_data_source = 'api'",
16844 fparams![expected_day],
16845 |row: &FrankenRow| row.get_typed::<i64>(0),
16846 )
16847 .unwrap();
16848 let ud_content_est: i64 = conn
16849 .query_row_map(
16850 "SELECT content_tokens_est_total FROM usage_daily WHERE day_id = ?",
16851 fparams![expected_day],
16852 |row| row.get_typed::<i64>(0),
16853 )
16854 .unwrap();
16855 let (ud_plan_content_est, ud_plan_api_total): (i64, i64) = conn
16856 .query_row_map(
16857 "SELECT plan_content_tokens_est_total, plan_api_tokens_total FROM usage_daily WHERE day_id = ?",
16858 fparams![expected_day],
16859 |row: &FrankenRow| Ok((row.get_typed(0)?, row.get_typed(1)?)),
16860 )
16861 .unwrap();
16862 assert_eq!(
16863 mm_total_content_est, ud_content_est,
16864 "Daily rollup content_tokens_est_total must equal SUM of message_metrics"
16865 );
16866 assert_eq!(
16867 mm_plan_content_est, ud_plan_content_est,
16868 "Daily rollup plan_content_tokens_est_total must equal planned message_metrics content sum"
16869 );
16870 assert_eq!(
16871 mm_plan_api_total, ud_plan_api_total,
16872 "Daily rollup plan_api_tokens_total must equal planned message_metrics API token sum"
16873 );
16874
16875 let (claude_msg, claude_user, claude_asst, claude_api_total, claude_api_cov): (
16877 i64,
16878 i64,
16879 i64,
16880 i64,
16881 i64,
16882 ) = conn
16883 .query_row_map(
16884 "SELECT message_count, user_message_count, assistant_message_count, api_tokens_total, api_coverage_message_count
16885 FROM usage_models_daily
16886 WHERE day_id = ? AND model_family = 'claude' AND model_tier = 'opus'",
16887 fparams![expected_day],
16888 |row: &FrankenRow| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?, row.get_typed(3)?, row.get_typed(4)?)),
16889 )
16890 .unwrap();
16891 assert_eq!(claude_msg, 1);
16892 assert_eq!(claude_user, 0);
16893 assert_eq!(claude_asst, 1);
16894 assert_eq!(claude_api_total, 380);
16895 assert_eq!(claude_api_cov, 1);
16896
16897 let unknown_msg: i64 = conn
16898 .query_row_map(
16899 "SELECT message_count FROM usage_models_daily
16900 WHERE day_id = ? AND model_family = 'unknown' AND model_tier = 'unknown'",
16901 fparams![expected_day],
16902 |row| row.get_typed(0),
16903 )
16904 .unwrap();
16905 assert_eq!(
16906 unknown_msg, 2,
16907 "user messages should land in unknown model bucket"
16908 );
16909 }
16910
16911 #[test]
16912 fn has_plan_heuristic_detects_plans() {
16913 assert!(has_plan_heuristic(
16914 "## Plan\n\n1. First step\n2. Second step"
16915 ));
16916 assert!(has_plan_heuristic(
16917 "# Plan\nHere is what we will do:\n1. Step one\n2. Step two"
16918 ));
16919 assert!(has_plan_heuristic(
16920 "Plan:\n- Gather baseline\n- Implement changes\n- Validate with tests"
16921 ));
16922 assert!(has_plan_heuristic(
16923 "Next steps:\n1. Update schema\n2. Rebuild rollups"
16924 ));
16925 assert!(!has_plan_heuristic("Hello world"));
16926 assert!(!has_plan_heuristic("Short"));
16927 assert!(!has_plan_heuristic(
16928 "This is a regular message without plans"
16929 ));
16930 assert!(!has_plan_heuristic(
16931 "```json\n{\"tool\":\"shell\",\"stdout\":\"1. install\\n2. run\"}\n```"
16932 ));
16933 }
16934
16935 #[test]
16936 fn has_plan_for_role_only_counts_assistant_messages() {
16937 let plan_text = "## Plan\n1. First\n2. Second";
16938 assert!(has_plan_for_role("assistant", plan_text));
16939 assert!(has_plan_for_role("agent", plan_text));
16940 assert!(has_plan_for_role("Assistant", plan_text));
16941 assert!(!has_plan_for_role("user", plan_text));
16942 assert!(!has_plan_for_role("tool", plan_text));
16943 }
16944
16945 #[test]
16946 fn api_rollups_require_api_data_source() {
16947 let mut agg = AnalyticsRollupAggregator::new();
16948
16949 let estimated_plan = MessageMetricsEntry {
16950 message_id: 1,
16951 created_at_ms: 0,
16952 hour_id: 1,
16953 day_id: 1,
16954 agent_slug: "codex".into(),
16955 workspace_id: 0,
16956 source_id: "local".into(),
16957 role: "assistant".into(),
16958 content_chars: 120,
16959 content_tokens_est: 30,
16960 model_name: None,
16961 model_family: "unknown".into(),
16962 model_tier: "unknown".into(),
16963 provider: "unknown".into(),
16964 api_input_tokens: Some(100),
16965 api_output_tokens: Some(50),
16966 api_cache_read_tokens: Some(0),
16967 api_cache_creation_tokens: Some(0),
16968 api_thinking_tokens: Some(0),
16969 api_service_tier: None,
16970 api_data_source: "estimated".into(),
16971 tool_call_count: 0,
16972 has_tool_calls: false,
16973 has_plan: true,
16974 };
16975 agg.record(&estimated_plan);
16976
16977 let api_plan = MessageMetricsEntry {
16978 message_id: 2,
16979 created_at_ms: 0,
16980 hour_id: 1,
16981 day_id: 1,
16982 agent_slug: "codex".into(),
16983 workspace_id: 0,
16984 source_id: "local".into(),
16985 role: "assistant".into(),
16986 content_chars: 80,
16987 content_tokens_est: 20,
16988 model_name: None,
16989 model_family: "unknown".into(),
16990 model_tier: "unknown".into(),
16991 provider: "unknown".into(),
16992 api_input_tokens: Some(40),
16993 api_output_tokens: Some(10),
16994 api_cache_read_tokens: Some(0),
16995 api_cache_creation_tokens: Some(0),
16996 api_thinking_tokens: Some(0),
16997 api_service_tier: None,
16998 api_data_source: "api".into(),
16999 tool_call_count: 0,
17000 has_tool_calls: false,
17001 has_plan: true,
17002 };
17003 agg.record(&api_plan);
17004
17005 let key = (1_i64, "codex".to_string(), 0_i64, "local".to_string());
17006 let hourly = agg.hourly.get(&key).expect("hourly rollup key must exist");
17007 let daily = agg.daily.get(&key).expect("daily rollup key must exist");
17008 let model_key = (
17009 1_i64,
17010 "codex".to_string(),
17011 0_i64,
17012 "local".to_string(),
17013 "unknown".to_string(),
17014 "unknown".to_string(),
17015 );
17016 let models_daily = agg
17017 .models_daily
17018 .get(&model_key)
17019 .expect("model rollup key must exist");
17020
17021 assert_eq!(hourly.plan_message_count, 2);
17023 assert_eq!(hourly.plan_content_tokens_est_total, 50);
17024 assert_eq!(hourly.plan_api_tokens_total, 50);
17026 assert_eq!(daily.plan_api_tokens_total, 50);
17027 assert_eq!(models_daily.plan_api_tokens_total, 50);
17028 assert_eq!(hourly.api_tokens_total, 50);
17030 assert_eq!(hourly.api_input_tokens_total, 40);
17031 assert_eq!(hourly.api_output_tokens_total, 10);
17032 assert_eq!(hourly.api_coverage_message_count, 1);
17033 assert_eq!(daily.api_tokens_total, 50);
17034 assert_eq!(models_daily.api_tokens_total, 50);
17035 }
17036
17037 #[test]
17038 fn has_plan_heuristic_curated_corpus_thresholds() {
17039 let positives = [
17041 "## Plan\n1. Inspect current schema\n2. Add migration\n3. Verify rebuild",
17042 "Plan:\n1) Reproduce\n2) Patch\n3) Add tests",
17043 "Implementation plan:\n- Parse inputs\n- Update rollups\n- Run checks",
17044 "Next steps:\n1. Reserve file\n2. Implement\n3. Report status",
17045 "# Plan\n1. Gather requirements\n2. Ship changes",
17046 "Action plan:\n- Identify root cause\n- Fix it\n- Validate",
17047 ];
17048
17049 let negatives = [
17051 "The plan is to move fast and fix things later.",
17052 "```json\n{\"tool\":\"shell\",\"stdout\":\"1. ls\\n2. cat\"}\n```",
17053 "stdout:\n1. Build started\n2. Build finished\nexit code: 0",
17054 "I can help with that request. Let me know if you want details.",
17055 "Here is a list:\n- apples\n- oranges",
17056 "Status update: completed tasks and blockers below.",
17057 ];
17058
17059 let tp = positives
17060 .iter()
17061 .filter(|msg| has_plan_heuristic(msg))
17062 .count();
17063 let fp = negatives
17064 .iter()
17065 .filter(|msg| has_plan_heuristic(msg))
17066 .count();
17067
17068 let recall = tp as f64 / positives.len() as f64;
17069 let false_positive_rate = fp as f64 / negatives.len() as f64;
17070
17071 assert!(
17072 recall >= 0.80,
17073 "plan heuristic recall too low: got {recall:.2}"
17074 );
17075 assert!(
17076 false_positive_rate <= 0.20,
17077 "plan heuristic false-positive rate too high: got {false_positive_rate:.2}"
17078 );
17079 }
17080
17081 #[test]
17082 fn rebuild_analytics_repopulates_from_messages() {
17083 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
17084 use std::path::PathBuf;
17085
17086 let dir = TempDir::new().unwrap();
17087 let db_path = dir.path().join("test.db");
17088 let storage = SqliteStorage::open(&db_path).unwrap();
17089
17090 let agent = Agent {
17092 id: None,
17093 slug: "claude_code".into(),
17094 name: "Claude Code".into(),
17095 version: Some("1.0".into()),
17096 kind: AgentKind::Cli,
17097 };
17098 let agent_id = storage.ensure_agent(&agent).unwrap();
17099
17100 let ts_ms = 1_770_551_400_000_i64;
17102 let expected_day = SqliteStorage::day_id_from_millis(ts_ms);
17103 let expected_hour = SqliteStorage::hour_id_from_millis(ts_ms);
17104
17105 let usage_json = serde_json::json!({
17106 "message": {
17107 "model": "claude-opus-4-6",
17108 "usage": {
17109 "input_tokens": 100,
17110 "output_tokens": 50,
17111 "cache_read_input_tokens": 200,
17112 "cache_creation_input_tokens": 30,
17113 "service_tier": "standard"
17114 }
17115 }
17116 });
17117
17118 let conv = Conversation {
17119 id: None,
17120 agent_slug: "claude_code".into(),
17121 workspace: None,
17122 external_id: Some("test-rebuild-1".into()),
17123 title: Some("Test conversation".into()),
17124 source_path: PathBuf::from("/tmp/test.jsonl"),
17125 started_at: Some(ts_ms),
17126 ended_at: Some(ts_ms + 60_000),
17127 approx_tokens: None,
17128 metadata_json: serde_json::Value::Null,
17129 messages: vec![
17130 Message {
17131 id: None,
17132 idx: 0,
17133 role: MessageRole::User,
17134 author: None,
17135 created_at: Some(ts_ms),
17136 content: "Hello, can you help me with a plan?".into(),
17137 extra_json: serde_json::Value::Null,
17138 snippets: vec![],
17139 },
17140 Message {
17141 id: None,
17142 idx: 1,
17143 role: MessageRole::Agent,
17144 author: None,
17145 created_at: Some(ts_ms + 30_000),
17146 content: "## Plan\n\n1. First step\n2. Second step\n3. Third step".into(),
17147 extra_json: usage_json,
17148 snippets: vec![],
17149 },
17150 Message {
17151 id: None,
17152 idx: 2,
17153 role: MessageRole::User,
17154 author: None,
17155 created_at: Some(ts_ms + 60_000),
17156 content: "Great, let's proceed!".into(),
17157 extra_json: serde_json::Value::Null,
17158 snippets: vec![],
17159 },
17160 ],
17161 source_id: "local".into(),
17162 origin_host: None,
17163 };
17164
17165 storage
17166 .insert_conversations_batched(&[(agent_id, None, &conv)])
17167 .unwrap();
17168
17169 let conn = storage.raw();
17171 let orig_mm: i64 = conn
17172 .query_row_map("SELECT COUNT(*) FROM message_metrics", &[], |row| {
17173 row.get_typed(0)
17174 })
17175 .unwrap();
17176 let orig_hourly: i64 = conn
17177 .query_row_map("SELECT COUNT(*) FROM usage_hourly", &[], |row| {
17178 row.get_typed(0)
17179 })
17180 .unwrap();
17181 let orig_daily: i64 = conn
17182 .query_row_map("SELECT COUNT(*) FROM usage_daily", &[], |row| {
17183 row.get_typed(0)
17184 })
17185 .unwrap();
17186 let orig_models_daily: i64 = conn
17187 .query_row_map("SELECT COUNT(*) FROM usage_models_daily", &[], |row| {
17188 row.get_typed(0)
17189 })
17190 .unwrap();
17191 let orig_api_input: i64 = conn
17192 .query_row_map(
17193 "SELECT COALESCE(SUM(api_input_tokens), 0) FROM message_metrics WHERE api_data_source = 'api'",
17194 &[],
17195 |row: &FrankenRow| row.get_typed(0),
17196 )
17197 .unwrap();
17198
17199 assert_eq!(orig_mm, 3);
17200 assert!(orig_hourly > 0);
17201 assert!(orig_daily > 0);
17202 assert!(orig_models_daily > 0);
17203
17204 conn.execute("DELETE FROM message_metrics").unwrap();
17206 conn.execute("DELETE FROM usage_hourly").unwrap();
17207 conn.execute("DELETE FROM usage_daily").unwrap();
17208 conn.execute("DELETE FROM usage_models_daily").unwrap();
17209
17210 let zero: i64 = conn
17212 .query_row_map("SELECT COUNT(*) FROM message_metrics", &[], |row| {
17213 row.get_typed(0)
17214 })
17215 .unwrap();
17216 assert_eq!(zero, 0);
17217
17218 let result = storage.rebuild_analytics().unwrap();
17220
17221 assert_eq!(result.message_metrics_rows, 3);
17222 assert!(result.usage_hourly_rows > 0);
17223 assert!(result.usage_daily_rows > 0);
17224 assert!(result.usage_models_daily_rows > 0);
17225 assert!(
17226 result.elapsed_ms < 10_000,
17227 "Rebuild should be fast for 3 msgs"
17228 );
17229
17230 let conn = storage.raw();
17232 let rebuilt_mm: i64 = conn
17233 .query_row_map("SELECT COUNT(*) FROM message_metrics", &[], |row| {
17234 row.get_typed(0)
17235 })
17236 .unwrap();
17237 assert_eq!(
17238 rebuilt_mm, orig_mm,
17239 "Rebuilt message_metrics count should match"
17240 );
17241
17242 let rebuilt_hourly: i64 = conn
17243 .query_row_map("SELECT COUNT(*) FROM usage_hourly", &[], |row| {
17244 row.get_typed(0)
17245 })
17246 .unwrap();
17247 assert_eq!(
17248 rebuilt_hourly, orig_hourly,
17249 "Rebuilt hourly rows should match"
17250 );
17251
17252 let rebuilt_daily: i64 = conn
17253 .query_row_map("SELECT COUNT(*) FROM usage_daily", &[], |row| {
17254 row.get_typed(0)
17255 })
17256 .unwrap();
17257 assert_eq!(rebuilt_daily, orig_daily, "Rebuilt daily rows should match");
17258
17259 let rebuilt_models_daily: i64 = conn
17260 .query_row_map("SELECT COUNT(*) FROM usage_models_daily", &[], |row| {
17261 row.get_typed(0)
17262 })
17263 .unwrap();
17264 assert_eq!(
17265 rebuilt_models_daily, orig_models_daily,
17266 "Rebuilt model rollup rows should match"
17267 );
17268
17269 let rebuilt_api_input: i64 = conn
17271 .query_row_map(
17272 "SELECT COALESCE(SUM(api_input_tokens), 0) FROM message_metrics WHERE api_data_source = 'api'",
17273 &[],
17274 |row: &FrankenRow| row.get_typed(0),
17275 )
17276 .unwrap();
17277 assert_eq!(
17278 rebuilt_api_input, orig_api_input,
17279 "Rebuilt API input tokens should match original"
17280 );
17281
17282 let (uh_msg, uh_user, uh_asst, uh_plan, uh_plan_content, uh_plan_api): (
17284 i64,
17285 i64,
17286 i64,
17287 i64,
17288 i64,
17289 i64,
17290 ) = conn
17291 .query_row_map(
17292 "SELECT message_count, user_message_count, assistant_message_count, plan_message_count,
17293 plan_content_tokens_est_total, plan_api_tokens_total
17294 FROM usage_hourly WHERE hour_id = ?",
17295 fparams![expected_hour],
17296 |row: &FrankenRow| {
17297 Ok((
17298 row.get_typed(0)?,
17299 row.get_typed(1)?,
17300 row.get_typed(2)?,
17301 row.get_typed(3)?,
17302 row.get_typed(4)?,
17303 row.get_typed(5)?,
17304 ))
17305 },
17306 )
17307 .unwrap();
17308 assert_eq!(uh_msg, 3);
17309 assert_eq!(uh_user, 2);
17310 assert_eq!(uh_asst, 1);
17311 assert_eq!(uh_plan, 1);
17312 assert!(uh_plan_content > 0);
17313 assert!(uh_plan_api > 0);
17314
17315 let ud_msg: i64 = conn
17316 .query_row_map(
17317 "SELECT message_count FROM usage_daily WHERE day_id = ?",
17318 fparams![expected_day],
17319 |row| row.get_typed(0),
17320 )
17321 .unwrap();
17322 assert_eq!(ud_msg, 3);
17323 }
17324
17325 #[test]
17326 fn insert_conversations_batched_flushes_large_fts_batches() {
17327 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
17328 use std::path::PathBuf;
17329
17330 let dir = TempDir::new().unwrap();
17331 let db_path = dir.path().join("test.db");
17332 let storage = SqliteStorage::open(&db_path).unwrap();
17333 storage
17338 .ensure_search_fallback_fts_consistency()
17339 .expect("ensure FTS consistency before insert");
17340
17341 let agent = Agent {
17342 id: None,
17343 slug: "codex".into(),
17344 name: "Codex".into(),
17345 version: Some("0.2.3".into()),
17346 kind: AgentKind::Cli,
17347 };
17348 let agent_id = storage.ensure_agent(&agent).unwrap();
17349
17350 let content = "y".repeat((FTS_ENTRY_BATCH_MAX_CHARS / 2) + 1);
17351 let messages: Vec<_> = (0_i64..2)
17352 .map(|i| Message {
17353 id: None,
17354 idx: i,
17355 role: MessageRole::Agent,
17356 author: None,
17357 created_at: Some(1_700_000_000_000 + i),
17358 content: format!("{i}-{content}"),
17359 extra_json: serde_json::Value::Null,
17360 snippets: Vec::new(),
17361 })
17362 .collect();
17363 let conv = Conversation {
17364 id: None,
17365 agent_slug: "codex".into(),
17366 workspace: Some(PathBuf::from("/tmp/workspace")),
17367 external_id: Some("fts-large-batch".into()),
17368 title: Some("FTS Large Batch".into()),
17369 source_path: PathBuf::from("/tmp/rollout.jsonl"),
17370 started_at: Some(1_700_000_000_000),
17371 ended_at: Some(1_700_000_000_999),
17372 approx_tokens: None,
17373 metadata_json: serde_json::Value::Null,
17374 messages,
17375 source_id: "local".into(),
17376 origin_host: None,
17377 };
17378
17379 let outcomes = storage
17380 .insert_conversations_batched(&[(agent_id, None, &conv)])
17381 .unwrap();
17382 assert_eq!(outcomes.len(), 1);
17383 assert_eq!(outcomes[0].inserted_indices.len(), conv.messages.len());
17384
17385 let message_count: i64 = storage
17386 .conn
17387 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
17388 row.get_typed(0)
17389 })
17390 .unwrap();
17391 let fts_count: i64 = storage
17392 .conn
17393 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
17394 row.get_typed(0)
17395 })
17396 .unwrap();
17397
17398 assert_eq!(message_count, conv.messages.len() as i64);
17399 assert_eq!(fts_count, conv.messages.len() as i64);
17400 }
17401
17402 fn make_profiled_storage_remote_conversation(
17403 external_id: i64,
17404 msg_count: usize,
17405 ) -> Conversation {
17406 Conversation {
17407 id: None,
17408 agent_slug: "codex".into(),
17409 workspace: Some(PathBuf::from("/ws/profiled-storage-remote")),
17410 external_id: Some(format!("profiled-storage-remote-{external_id}")),
17411 title: Some(format!(
17412 "Profiled storage remote conversation {external_id}"
17413 )),
17414 source_path: PathBuf::from(format!("/log/profiled-storage-remote-{external_id}.jsonl")),
17415 started_at: Some(10_000 + external_id * 100),
17416 ended_at: Some(10_000 + external_id * 100 + msg_count as i64),
17417 approx_tokens: Some(msg_count as i64 * 32),
17418 metadata_json: serde_json::json!({ "bench": true }),
17419 messages: (0..msg_count)
17420 .map(|idx| Message {
17421 id: None,
17422 idx: idx as i64,
17423 role: if idx % 2 == 0 {
17424 MessageRole::User
17425 } else {
17426 MessageRole::Agent
17427 },
17428 author: Some("tester".into()),
17429 created_at: Some(20_000 + external_id * 100 + idx as i64),
17430 content: format!(
17431 "profiled storage remote content ext={external_id} idx={idx} {}",
17432 "x".repeat(64)
17433 ),
17434 extra_json: serde_json::json!({ "idx": idx }),
17435 snippets: Vec::new(),
17436 })
17437 .collect(),
17438 source_id: "profiled-storage-remote-source".into(),
17439 origin_host: Some("builder-profile".into()),
17440 }
17441 }
17442
17443 fn make_profiled_append_remote_merge_conversation(
17444 external_id: i64,
17445 msg_count: usize,
17446 ) -> Conversation {
17447 let base_ts = 100_000 + external_id * 1_000;
17448 Conversation {
17449 id: None,
17450 agent_slug: "codex".into(),
17451 workspace: Some(PathBuf::from("/ws/profiled-append-remote")),
17452 external_id: Some(format!("profiled-append-remote-{external_id}")),
17453 title: Some(format!("Profiled append remote conversation {external_id}")),
17454 source_path: PathBuf::from(format!("/log/profiled-append-remote-{external_id}.jsonl")),
17455 started_at: Some(base_ts),
17456 ended_at: Some(base_ts + msg_count as i64),
17457 approx_tokens: Some(msg_count as i64 * 50),
17458 metadata_json: serde_json::json!({ "bench": true }),
17459 messages: (0..msg_count)
17460 .map(|idx| Message {
17461 id: None,
17462 idx: idx as i64,
17463 role: if idx % 2 == 0 {
17464 MessageRole::User
17465 } else {
17466 MessageRole::Agent
17467 },
17468 author: Some(format!("model-{}", external_id % 5)),
17469 created_at: Some(base_ts + idx as i64),
17470 content: format!(
17471 "Profiled append remote conversation {} message {}: Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
17472 external_id, idx
17473 ),
17474 extra_json: serde_json::json!({ "bench": true }),
17475 snippets: Vec::new(),
17476 })
17477 .collect(),
17478 source_id: "profiled-append-remote-source".into(),
17479 origin_host: Some("builder-profile".into()),
17480 }
17481 }
17482
17483 #[test]
17484 fn insert_conversation_tree_batched_new_message_ids_match_snippet_rows() {
17485 let dir = TempDir::new().unwrap();
17486 let db_path = dir.path().join("batched-message-ids.db");
17487 let storage = SqliteStorage::open(&db_path).unwrap();
17488 let agent_id = storage
17489 .ensure_agent(&Agent {
17490 id: None,
17491 slug: "codex".into(),
17492 name: "Codex".into(),
17493 version: None,
17494 kind: AgentKind::Cli,
17495 })
17496 .unwrap();
17497 let workspace_id = storage
17498 .ensure_workspace(&PathBuf::from("/ws/profiled-storage-remote"), None)
17499 .unwrap();
17500 let mut conv = make_profiled_storage_remote_conversation(42, 5);
17501 for (idx, msg) in conv.messages.iter_mut().enumerate() {
17502 msg.snippets.push(Snippet {
17503 id: None,
17504 file_path: Some(PathBuf::from(format!("src/file_{idx}.rs"))),
17505 start_line: Some((idx + 1) as i64),
17506 end_line: Some((idx + 2) as i64),
17507 language: Some("rust".into()),
17508 snippet_text: Some(format!("fn snippet_{idx}() {{}}")),
17509 });
17510 }
17511 let outcome = storage
17512 .insert_conversation_tree(agent_id, Some(workspace_id), &conv)
17513 .unwrap();
17514
17515 let message_count: i64 = storage
17516 .conn
17517 .query_row_map(
17518 "SELECT COUNT(*) FROM messages WHERE conversation_id = ?1",
17519 fparams![outcome.conversation_id],
17520 |row| row.get_typed(0),
17521 )
17522 .unwrap();
17523 let joined_snippet_count: i64 = storage
17524 .conn
17525 .query_row_map(
17526 "SELECT COUNT(*)
17527 FROM snippets s
17528 JOIN messages m ON s.message_id = m.id
17529 WHERE m.conversation_id = ?1",
17530 fparams![outcome.conversation_id],
17531 |row| row.get_typed(0),
17532 )
17533 .unwrap();
17534
17535 assert_eq!(message_count, conv.messages.len() as i64);
17536 assert_eq!(joined_snippet_count, conv.messages.len() as i64);
17537 }
17538
17539 #[test]
17540 fn insert_conversation_tree_batched_appended_message_ids_match_snippet_rows() {
17541 let dir = TempDir::new().unwrap();
17542 let db_path = dir.path().join("batched-append-message-ids.db");
17543 let storage = SqliteStorage::open(&db_path).unwrap();
17544 let agent_id = storage
17545 .ensure_agent(&Agent {
17546 id: None,
17547 slug: "codex".into(),
17548 name: "Codex".into(),
17549 version: None,
17550 kind: AgentKind::Cli,
17551 })
17552 .unwrap();
17553 let workspace_id = storage
17554 .ensure_workspace(&PathBuf::from("/ws/profiled-storage-remote"), None)
17555 .unwrap();
17556
17557 let mut initial = make_profiled_storage_remote_conversation(77, 2);
17558 for (idx, msg) in initial.messages.iter_mut().enumerate() {
17559 msg.snippets.push(Snippet {
17560 id: None,
17561 file_path: Some(PathBuf::from(format!("src/append_initial_{idx}.rs"))),
17562 start_line: Some((idx + 1) as i64),
17563 end_line: Some((idx + 2) as i64),
17564 language: Some("rust".into()),
17565 snippet_text: Some(format!("fn append_initial_{idx}() {{}}")),
17566 });
17567 }
17568 let first = storage
17569 .insert_conversation_tree(agent_id, Some(workspace_id), &initial)
17570 .unwrap();
17571 assert_eq!(first.inserted_indices, vec![0, 1]);
17572
17573 let mut appended = make_profiled_storage_remote_conversation(77, 5);
17574 for (idx, msg) in appended.messages.iter_mut().enumerate() {
17575 msg.snippets.push(Snippet {
17576 id: None,
17577 file_path: Some(PathBuf::from(format!("src/append_full_{idx}.rs"))),
17578 start_line: Some((idx + 10) as i64),
17579 end_line: Some((idx + 11) as i64),
17580 language: Some("rust".into()),
17581 snippet_text: Some(format!("fn append_full_{idx}() {{}}")),
17582 });
17583 }
17584 let second = storage
17585 .insert_conversation_tree(agent_id, Some(workspace_id), &appended)
17586 .unwrap();
17587 assert_eq!(second.conversation_id, first.conversation_id);
17588 assert_eq!(second.inserted_indices, vec![2, 3, 4]);
17589
17590 let message_count: i64 = storage
17591 .conn
17592 .query_row_map(
17593 "SELECT COUNT(*) FROM messages WHERE conversation_id = ?1",
17594 fparams![first.conversation_id],
17595 |row| row.get_typed(0),
17596 )
17597 .unwrap();
17598 let joined_snippets: Vec<(i64, String)> = storage
17599 .conn
17600 .query_map_collect(
17601 "SELECT m.idx, s.file_path
17602 FROM snippets s
17603 JOIN messages m ON s.message_id = m.id
17604 WHERE m.conversation_id = ?1
17605 ORDER BY m.idx, s.id",
17606 fparams![first.conversation_id],
17607 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
17608 )
17609 .unwrap();
17610
17611 assert_eq!(message_count, 5);
17612 assert_eq!(
17613 joined_snippets,
17614 vec![
17615 (0, "src/append_initial_0.rs".to_string()),
17616 (1, "src/append_initial_1.rs".to_string()),
17617 (2, "src/append_full_2.rs".to_string()),
17618 (3, "src/append_full_3.rs".to_string()),
17619 (4, "src/append_full_4.rs".to_string()),
17620 ]
17621 );
17622 }
17623
17624 #[test]
17625 fn insert_conversation_tree_rehydrates_external_lookup_after_manual_clear() {
17626 let dir = TempDir::new().unwrap();
17627 let db_path = dir.path().join("external-lookup-rehydrate.db");
17628 let storage = SqliteStorage::open(&db_path).unwrap();
17629 let agent_id = storage
17630 .ensure_agent(&Agent {
17631 id: None,
17632 slug: "codex".into(),
17633 name: "Codex".into(),
17634 version: None,
17635 kind: AgentKind::Cli,
17636 })
17637 .unwrap();
17638 let workspace_id = storage
17639 .ensure_workspace(&PathBuf::from("/ws/profiled-storage-remote"), None)
17640 .unwrap();
17641
17642 let initial = make_profiled_storage_remote_conversation(88, 2);
17643 let first = storage
17644 .insert_conversation_tree(agent_id, Some(workspace_id), &initial)
17645 .unwrap();
17646 let external_id = initial.external_id.as_deref().unwrap();
17647 let lookup_key =
17648 conversation_external_lookup_key(&initial.source_id, agent_id, external_id);
17649 let lookup_id: i64 = storage
17650 .conn
17651 .query_row_map(
17652 "SELECT conversation_id
17653 FROM conversation_external_tail_lookup
17654 WHERE lookup_key = ?1",
17655 fparams![lookup_key.as_str()],
17656 |row| row.get_typed(0),
17657 )
17658 .unwrap();
17659 assert_eq!(lookup_id, first.conversation_id);
17660
17661 storage
17662 .conn
17663 .execute_compat(
17664 "DELETE FROM conversation_external_tail_lookup WHERE lookup_key = ?1",
17665 fparams![lookup_key.as_str()],
17666 )
17667 .unwrap();
17668
17669 let appended = make_profiled_storage_remote_conversation(88, 4);
17670 let second = storage
17671 .insert_conversation_tree(agent_id, Some(workspace_id), &appended)
17672 .unwrap();
17673 assert_eq!(second.conversation_id, first.conversation_id);
17674 assert_eq!(second.inserted_indices, vec![2, 3]);
17675
17676 let conversation_count: i64 = storage
17677 .conn
17678 .query_row_map(
17679 "SELECT COUNT(*)
17680 FROM conversations
17681 WHERE source_id = ?1 AND agent_id = ?2 AND external_id = ?3",
17682 fparams![initial.source_id.as_str(), agent_id, external_id],
17683 |row| row.get_typed(0),
17684 )
17685 .unwrap();
17686 let restored_lookup: (i64, Option<i64>, Option<i64>, Option<i64>) = storage
17687 .conn
17688 .query_row_map(
17689 "SELECT conversation_id, ended_at, last_message_idx, last_message_created_at
17690 FROM conversation_external_tail_lookup
17691 WHERE lookup_key = ?1",
17692 fparams![lookup_key.as_str()],
17693 |row| {
17694 Ok((
17695 row.get_typed(0)?,
17696 row.get_typed(1)?,
17697 row.get_typed(2)?,
17698 row.get_typed(3)?,
17699 ))
17700 },
17701 )
17702 .unwrap();
17703 let tail_state: (Option<i64>, Option<i64>, Option<i64>) = storage
17704 .conn
17705 .query_row_map(
17706 "SELECT ended_at, last_message_idx, last_message_created_at
17707 FROM conversation_tail_state
17708 WHERE conversation_id = ?1",
17709 fparams![first.conversation_id],
17710 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
17711 )
17712 .unwrap();
17713 assert_eq!(conversation_count, 1);
17714 assert_eq!(
17715 restored_lookup,
17716 (
17717 first.conversation_id,
17718 tail_state.0,
17719 tail_state.1,
17720 tail_state.2
17721 )
17722 );
17723 assert_eq!(
17724 tail_state,
17725 (
17726 appended.messages[3].created_at,
17727 Some(3),
17728 appended.messages[3].created_at
17729 )
17730 );
17731 }
17732
17733 #[test]
17734 fn insert_conversation_tree_recreates_daily_stats_after_manual_clear() {
17735 let dir = TempDir::new().unwrap();
17736 let db_path = dir.path().join("test.db");
17737 let storage = SqliteStorage::open(&db_path).unwrap();
17738 let agent_id = storage
17739 .ensure_agent(&Agent {
17740 id: None,
17741 slug: "codex".into(),
17742 name: "Codex".into(),
17743 version: None,
17744 kind: AgentKind::Cli,
17745 })
17746 .unwrap();
17747 let workspace = PathBuf::from("/ws/profiled-storage-remote");
17748 let workspace_id = storage.ensure_workspace(&workspace, None).unwrap();
17749
17750 storage
17751 .insert_conversation_tree(
17752 agent_id,
17753 Some(workspace_id),
17754 &make_profiled_storage_remote_conversation(0, 3),
17755 )
17756 .unwrap();
17757 storage.conn.execute("DELETE FROM daily_stats").unwrap();
17758
17759 storage
17760 .insert_conversation_tree(
17761 agent_id,
17762 Some(workspace_id),
17763 &make_profiled_storage_remote_conversation(1, 2),
17764 )
17765 .unwrap();
17766
17767 let row_count: i64 = storage
17768 .conn
17769 .query_row_map("SELECT COUNT(*) FROM daily_stats", fparams![], |row| {
17770 row.get_typed(0)
17771 })
17772 .unwrap();
17773 let (session_count, message_count): (i64, i64) = storage
17774 .conn
17775 .query_row_map(
17776 "SELECT session_count, message_count
17777 FROM daily_stats
17778 WHERE agent_slug = 'all' AND source_id = 'all'",
17779 fparams![],
17780 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
17781 )
17782 .unwrap();
17783
17784 assert_eq!(row_count, 4);
17785 assert_eq!(session_count, 1);
17786 assert_eq!(message_count, 2);
17787 }
17788
17789 #[test]
17790 #[serial]
17791 fn insert_conversation_tree_stage_profile_tracks_steady_state_remote_reuse() {
17792 let _defer_guard = set_env_var("CASS_DEFER_LEXICAL_UPDATES", "0");
17793
17794 for &(msg_count, iterations) in &[(5usize, 80usize), (20, 50), (50, 24)] {
17795 let dir = TempDir::new().unwrap();
17796 let db_path = dir.path().join(format!("profile-{msg_count}.db"));
17797 let storage = SqliteStorage::open(&db_path).unwrap();
17798 let agent_id = storage
17799 .ensure_agent(&Agent {
17800 id: None,
17801 slug: "codex".into(),
17802 name: "Codex".into(),
17803 version: None,
17804 kind: AgentKind::Cli,
17805 })
17806 .unwrap();
17807 let workspace = PathBuf::from("/ws/profiled-storage-remote");
17808 let workspace_id = storage.ensure_workspace(&workspace, None).unwrap();
17809
17810 storage
17811 .insert_conversation_tree(
17812 agent_id,
17813 Some(workspace_id),
17814 &make_profiled_storage_remote_conversation(0, msg_count),
17815 )
17816 .unwrap();
17817
17818 let mut profile = InsertConversationTreePerfProfile::default();
17819 for external_id in 1..=iterations {
17820 storage
17821 .insert_conversation_tree_with_profile(
17822 agent_id,
17823 Some(workspace_id),
17824 &make_profiled_storage_remote_conversation(external_id as i64, msg_count),
17825 &mut profile,
17826 )
17827 .unwrap();
17828 }
17829
17830 let accounted_duration = profile.source_duration
17831 + profile.tx_open_duration
17832 + profile.existing_lookup_duration
17833 + profile.conversation_row_duration
17834 + profile.message_insert_duration
17835 + profile.snippet_insert_duration
17836 + profile.fts_entry_duration
17837 + profile.fts_flush_duration
17838 + profile.analytics_duration
17839 + profile.commit_duration;
17840 assert_eq!(profile.invocations, iterations);
17841 assert_eq!(profile.messages, iterations * msg_count);
17842 assert_eq!(profile.inserted_messages, iterations * msg_count);
17843 assert!(
17844 profile.total_duration >= accounted_duration,
17845 "accounted stage durations cannot exceed total duration"
17846 );
17847
17848 profile.log_summary(&format!("remote_reuse_{msg_count}_msgs"));
17849 }
17850 }
17851
17852 #[test]
17853 #[serial]
17854 fn insert_conversation_tree_stage_profile_tracks_append_remote_source_merge() {
17855 let _defer_guard = set_env_var("CASS_DEFER_LEXICAL_UPDATES", "0");
17856
17857 for &(msg_count, iterations) in &[(5usize, 80usize), (20, 50), (50, 24)] {
17858 let dir = TempDir::new().unwrap();
17859 let db_path = dir.path().join(format!("append-profile-{msg_count}.db"));
17860 let storage = SqliteStorage::open(&db_path).unwrap();
17861 let agent_id = storage
17862 .ensure_agent(&Agent {
17863 id: None,
17864 slug: "codex".into(),
17865 name: "Codex".into(),
17866 version: None,
17867 kind: AgentKind::Cli,
17868 })
17869 .unwrap();
17870 let workspace = PathBuf::from("/ws/profiled-append-remote");
17871 let workspace_id = storage.ensure_workspace(&workspace, None).unwrap();
17872
17873 for external_id in 0..iterations {
17874 storage
17875 .insert_conversation_tree(
17876 agent_id,
17877 Some(workspace_id),
17878 &make_profiled_append_remote_merge_conversation(
17879 external_id as i64,
17880 msg_count,
17881 ),
17882 )
17883 .unwrap();
17884 }
17885
17886 let mut profile = InsertConversationTreePerfProfile::default();
17887 for external_id in 0..iterations {
17888 storage
17889 .append_existing_conversation_with_profile(
17890 agent_id,
17891 Some(workspace_id),
17892 &make_profiled_append_remote_merge_conversation(
17893 external_id as i64,
17894 msg_count * 2,
17895 ),
17896 &mut profile,
17897 )
17898 .unwrap();
17899 }
17900
17901 let accounted_duration = profile.source_duration
17902 + profile.tx_open_duration
17903 + profile.existing_lookup_duration
17904 + profile.existing_idx_lookup_duration
17905 + profile.existing_replay_lookup_duration
17906 + profile.dedupe_filter_duration
17907 + profile.conversation_row_duration
17908 + profile.message_insert_duration
17909 + profile.snippet_insert_duration
17910 + profile.fts_entry_duration
17911 + profile.fts_flush_duration
17912 + profile.analytics_duration
17913 + profile.commit_duration;
17914 assert_eq!(profile.invocations, iterations);
17915 assert_eq!(profile.messages, iterations * msg_count * 2);
17916 assert_eq!(profile.inserted_messages, iterations * msg_count);
17917 assert!(
17918 profile.total_duration >= accounted_duration,
17919 "accounted append stage durations cannot exceed total duration"
17920 );
17921
17922 profile.log_summary(&format!("append_remote_merge_{msg_count}_msgs"));
17923 }
17924 }
17925
17926 #[test]
17927 fn rebuild_daily_stats_recomputes_materialized_totals_without_monolithic_group_by() {
17928 let dir = TempDir::new().unwrap();
17929 let db_path = dir.path().join("test.db");
17930 let storage = SqliteStorage::open(&db_path).unwrap();
17931 let started_at = 1_700_000_000_000_i64;
17932 let day_id = FrankenStorage::day_id_from_millis(started_at);
17933 let hour_id = FrankenStorage::hour_id_from_millis(started_at);
17934
17935 storage
17936 .conn
17937 .execute_compat(
17938 "INSERT INTO agents (id, slug, name, version, kind, created_at, updated_at)
17939 VALUES (?1, ?2, ?3, NULL, ?4, 0, 0)",
17940 fparams![1_i64, "codex", "Codex", "cli"],
17941 )
17942 .unwrap();
17943 storage
17944 .conn
17945 .execute_compat(
17946 "INSERT INTO agents (id, slug, name, version, kind, created_at, updated_at)
17947 VALUES (?1, ?2, ?3, NULL, ?4, 0, 0)",
17948 fparams![2_i64, "claude", "Claude", "cli"],
17949 )
17950 .unwrap();
17951
17952 storage
17953 .conn
17954 .execute_compat(
17955 "INSERT INTO conversations (
17956 id, agent_id, workspace_id, source_id, external_id, title, source_path,
17957 started_at, ended_at, approx_tokens, metadata_json, origin_host, metadata_bin
17958 ) VALUES (?1, ?2, NULL, ?3, ?4, ?5, ?6, ?7, ?8, NULL, ?9, NULL, NULL)",
17959 fparams![
17960 1_i64,
17961 1_i64,
17962 LOCAL_SOURCE_ID,
17963 "daily-a",
17964 "Daily A",
17965 "/tmp/daily-a.jsonl",
17966 started_at,
17967 started_at + 200,
17968 "{}"
17969 ],
17970 )
17971 .unwrap();
17972 storage
17973 .conn
17974 .execute_compat(
17975 "INSERT INTO conversations (
17976 id, agent_id, workspace_id, source_id, external_id, title, source_path,
17977 started_at, ended_at, approx_tokens, metadata_json, origin_host, metadata_bin
17978 ) VALUES (?1, ?2, NULL, ?3, ?4, ?5, ?6, ?7, ?8, NULL, ?9, NULL, NULL)",
17979 fparams![
17980 2_i64,
17981 2_i64,
17982 LOCAL_SOURCE_ID,
17983 "daily-b",
17984 "Daily B",
17985 "/tmp/daily-b.jsonl",
17986 started_at,
17987 started_at + 300,
17988 "{}"
17989 ],
17990 )
17991 .unwrap();
17992
17993 storage
17994 .conn
17995 .execute_compat(
17996 "INSERT INTO messages (
17997 id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin
17998 ) VALUES (?1, ?2, ?3, ?4, NULL, ?5, ?6, NULL, NULL)",
17999 fparams![1_i64, 1_i64, 0_i64, "user", started_at, "hello"],
18000 )
18001 .unwrap();
18002 storage
18003 .conn
18004 .execute_compat(
18005 "INSERT INTO messages (
18006 id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin
18007 ) VALUES (?1, ?2, ?3, ?4, NULL, ?5, ?6, NULL, NULL)",
18008 fparams![2_i64, 1_i64, 1_i64, "assistant", started_at + 100, "response"],
18009 )
18010 .unwrap();
18011 storage
18012 .conn
18013 .execute_compat(
18014 "INSERT INTO messages (
18015 id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin
18016 ) VALUES (?1, ?2, ?3, ?4, NULL, ?5, ?6, NULL, NULL)",
18017 fparams![3_i64, 2_i64, 0_i64, "user", started_at + 50, "abc"],
18018 )
18019 .unwrap();
18020
18021 for (message_id, agent_slug, role, content_len) in [
18022 (1_i64, "codex", "user", 5_i64),
18023 (2_i64, "codex", "assistant", 8_i64),
18024 (3_i64, "claude", "user", 3_i64),
18025 ] {
18026 storage
18027 .conn
18028 .execute_compat(
18029 "INSERT INTO message_metrics (
18030 message_id, created_at_ms, hour_id, day_id, agent_slug, workspace_id, source_id,
18031 role, content_chars, content_tokens_est, api_input_tokens, api_output_tokens,
18032 api_cache_read_tokens, api_cache_creation_tokens, api_thinking_tokens,
18033 api_service_tier, api_data_source, tool_call_count, has_tool_calls, has_plan,
18034 model_name, model_family, model_tier, provider
18035 ) VALUES (
18036 ?1, ?2, ?3, ?4, ?5, ?6, ?7,
18037 ?8, ?9, ?10, ?11, ?12,
18038 ?13, ?14, ?15,
18039 ?16, ?17, ?18, ?19, ?20,
18040 ?21, ?22, ?23, ?24
18041 )",
18042 fparams![
18043 message_id,
18044 started_at,
18045 hour_id,
18046 day_id,
18047 agent_slug,
18048 0_i64,
18049 LOCAL_SOURCE_ID,
18050 role,
18051 content_len,
18052 content_len / 4,
18053 0_i64,
18054 0_i64,
18055 0_i64,
18056 0_i64,
18057 0_i64,
18058 "",
18059 "estimated",
18060 0_i64,
18061 0_i64,
18062 0_i64,
18063 "",
18064 "unknown",
18065 "unknown",
18066 "unknown"
18067 ],
18068 )
18069 .unwrap();
18070 }
18071
18072 storage.conn.execute("DELETE FROM daily_stats").unwrap();
18073
18074 let rebuilt = storage.rebuild_daily_stats().unwrap();
18075 assert_eq!(rebuilt.total_sessions, 2);
18076
18077 let health = storage.daily_stats_health().unwrap();
18078 assert_eq!(health.conversation_count, 2);
18079 assert_eq!(health.materialized_total, 2);
18080 assert_eq!(health.drift, 0);
18081
18082 let total_messages: i64 = storage
18083 .conn
18084 .query_row_map(
18085 "SELECT message_count FROM daily_stats WHERE agent_slug = 'all' AND source_id = 'all'",
18086 fparams![],
18087 |row| row.get_typed(0),
18088 )
18089 .unwrap();
18090 assert_eq!(total_messages, 3);
18091 }
18092
18093 #[test]
18094 fn rebuild_daily_stats_preserves_byte_counts_with_message_metrics() {
18095 let dir = TempDir::new().unwrap();
18096 let db_path = dir.path().join("test.db");
18097 let storage = SqliteStorage::open(&db_path).unwrap();
18098
18099 let content = "ASCII🙂é漢字";
18100 let expected_bytes = content.len() as i64;
18101 let started_at = 1_704_067_200_000_i64;
18102 let day_id = FrankenStorage::day_id_from_millis(started_at);
18103 let hour_id = FrankenStorage::hour_id_from_millis(started_at);
18104
18105 storage
18106 .conn
18107 .execute_compat(
18108 "INSERT INTO agents (id, slug, name, version, kind, created_at, updated_at)
18109 VALUES (?1, ?2, ?3, NULL, ?4, 0, 0)",
18110 fparams![1_i64, "tester", "Tester", "cli"],
18111 )
18112 .unwrap();
18113 storage
18114 .conn
18115 .execute_compat(
18116 "INSERT INTO conversations (
18117 id, agent_id, workspace_id, source_id, external_id, title, source_path,
18118 started_at, ended_at, approx_tokens, metadata_json, origin_host, metadata_bin
18119 ) VALUES (?1, ?2, NULL, ?3, ?4, ?5, ?6, ?7, NULL, NULL, ?8, NULL, NULL)",
18120 fparams![
18121 1_i64,
18122 1_i64,
18123 LOCAL_SOURCE_ID,
18124 "unicode-metrics",
18125 "Unicode Metrics",
18126 "/tmp/unicode-metrics.jsonl",
18127 started_at,
18128 "{}"
18129 ],
18130 )
18131 .unwrap();
18132 storage
18133 .conn
18134 .execute_compat(
18135 "INSERT INTO messages (
18136 id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin
18137 ) VALUES (?1, ?2, ?3, ?4, NULL, ?5, ?6, NULL, NULL)",
18138 fparams![1_i64, 1_i64, 0_i64, "user", started_at, content],
18139 )
18140 .unwrap();
18141 storage
18142 .conn
18143 .execute_compat(
18144 "INSERT INTO message_metrics (
18145 message_id, created_at_ms, hour_id, day_id, agent_slug, workspace_id, source_id,
18146 role, content_chars, content_tokens_est, api_input_tokens, api_output_tokens,
18147 api_cache_read_tokens, api_cache_creation_tokens, api_thinking_tokens,
18148 api_service_tier, api_data_source, tool_call_count, has_tool_calls, has_plan,
18149 model_name, model_family, model_tier, provider
18150 ) VALUES (
18151 ?1, ?2, ?3, ?4, ?5, ?6, ?7,
18152 ?8, ?9, ?10, ?11, ?12,
18153 ?13, ?14, ?15,
18154 ?16, ?17, ?18, ?19, ?20,
18155 ?21, ?22, ?23, ?24
18156 )",
18157 fparams![
18158 1_i64,
18159 started_at,
18160 hour_id,
18161 day_id,
18162 "tester",
18163 0_i64,
18164 LOCAL_SOURCE_ID,
18165 "user",
18166 expected_bytes,
18167 expected_bytes / 4,
18168 0_i64,
18169 0_i64,
18170 0_i64,
18171 0_i64,
18172 0_i64,
18173 "",
18174 "estimated",
18175 0_i64,
18176 0_i64,
18177 0_i64,
18178 "",
18179 "unknown",
18180 "unknown",
18181 "unknown"
18182 ],
18183 )
18184 .unwrap();
18185
18186 let mut tx = storage.conn.transaction().unwrap();
18187 franken_update_daily_stats_in_tx(
18188 &storage,
18189 &tx,
18190 "tester",
18191 LOCAL_SOURCE_ID,
18192 Some(started_at),
18193 StatsDelta {
18194 session_count_delta: 1,
18195 message_count_delta: 1,
18196 total_chars_delta: expected_bytes,
18197 },
18198 )
18199 .unwrap();
18200 tx.commit().unwrap();
18201
18202 let inline_total: i64 = storage
18203 .conn
18204 .query_row_map(
18205 "SELECT total_chars FROM daily_stats WHERE agent_slug = 'all' AND source_id = 'all'",
18206 fparams![],
18207 |row| row.get_typed(0),
18208 )
18209 .unwrap();
18210 assert_eq!(inline_total, expected_bytes);
18211
18212 storage.conn.execute("DELETE FROM daily_stats").unwrap();
18213
18214 let rebuilt = storage.rebuild_daily_stats().unwrap();
18215 assert_eq!(rebuilt.total_sessions, 1);
18216
18217 let rebuilt_total: i64 = storage
18218 .conn
18219 .query_row_map(
18220 "SELECT total_chars FROM daily_stats WHERE agent_slug = 'all' AND source_id = 'all'",
18221 fparams![],
18222 |row| row.get_typed(0),
18223 )
18224 .unwrap();
18225 assert_eq!(rebuilt_total, expected_bytes);
18226 }
18227
18228 #[test]
18229 fn rebuild_daily_stats_raw_fallback_preserves_byte_counts() {
18230 let dir = TempDir::new().unwrap();
18231 let db_path = dir.path().join("test.db");
18232 let storage = SqliteStorage::open(&db_path).unwrap();
18233
18234 let content = "fallback🙂é漢字";
18235 let expected_bytes = content.len() as i64;
18236 let started_at = 1_704_067_200_000_i64;
18237 storage
18238 .conn
18239 .execute_compat(
18240 "INSERT INTO agents (id, slug, name, version, kind, created_at, updated_at)
18241 VALUES (?1, ?2, ?3, NULL, ?4, 0, 0)",
18242 fparams![1_i64, "tester", "Tester", "cli"],
18243 )
18244 .unwrap();
18245 storage
18246 .conn
18247 .execute_compat(
18248 "INSERT INTO conversations (
18249 id, agent_id, workspace_id, source_id, external_id, title, source_path,
18250 started_at, ended_at, approx_tokens, metadata_json, origin_host, metadata_bin
18251 ) VALUES (?1, ?2, NULL, ?3, ?4, ?5, ?6, ?7, NULL, NULL, ?8, NULL, NULL)",
18252 fparams![
18253 1_i64,
18254 1_i64,
18255 LOCAL_SOURCE_ID,
18256 "unicode-fallback",
18257 "Unicode Fallback",
18258 "/tmp/unicode-fallback.jsonl",
18259 started_at,
18260 "{}"
18261 ],
18262 )
18263 .unwrap();
18264 storage
18265 .conn
18266 .execute_compat(
18267 "INSERT INTO messages (
18268 id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin
18269 ) VALUES (?1, ?2, ?3, ?4, NULL, ?5, ?6, NULL, NULL)",
18270 fparams![1_i64, 1_i64, 0_i64, "assistant", started_at, content],
18271 )
18272 .unwrap();
18273
18274 let mut tx = storage.conn.transaction().unwrap();
18275 franken_update_daily_stats_in_tx(
18276 &storage,
18277 &tx,
18278 "tester",
18279 LOCAL_SOURCE_ID,
18280 Some(started_at),
18281 StatsDelta {
18282 session_count_delta: 1,
18283 message_count_delta: 1,
18284 total_chars_delta: expected_bytes,
18285 },
18286 )
18287 .unwrap();
18288 tx.commit().unwrap();
18289
18290 storage.conn.execute("DELETE FROM daily_stats").unwrap();
18291
18292 let rebuilt = storage.rebuild_daily_stats().unwrap();
18293 assert_eq!(rebuilt.total_sessions, 1);
18294
18295 let rebuilt_total: i64 = storage
18296 .conn
18297 .query_row_map(
18298 "SELECT total_chars FROM daily_stats WHERE agent_slug = 'all' AND source_id = 'all'",
18299 fparams![],
18300 |row| row.get_typed(0),
18301 )
18302 .unwrap();
18303 assert_eq!(rebuilt_total, expected_bytes);
18304 }
18305
18306 #[test]
18307 fn insert_conversations_batched_appends_duplicate_external_id() {
18308 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18309 use std::path::PathBuf;
18310
18311 let dir = TempDir::new().unwrap();
18312 let db_path = dir.path().join("test.db");
18313 let storage = SqliteStorage::open(&db_path).unwrap();
18314
18315 let agent = Agent {
18316 id: None,
18317 slug: "codex".into(),
18318 name: "Codex".into(),
18319 version: Some("0.2.3".into()),
18320 kind: AgentKind::Cli,
18321 };
18322 let agent_id = storage.ensure_agent(&agent).unwrap();
18323
18324 let base_conv = |messages: Vec<Message>| Conversation {
18325 id: None,
18326 agent_slug: "codex".into(),
18327 workspace: Some(PathBuf::from("/tmp/workspace")),
18328 external_id: Some("shared-session".into()),
18329 title: Some("Shared Session".into()),
18330 source_path: PathBuf::from("/tmp/rollout.jsonl"),
18331 started_at: Some(1_700_000_000_000),
18332 ended_at: Some(1_700_000_000_999),
18333 approx_tokens: None,
18334 metadata_json: serde_json::Value::Null,
18335 messages,
18336 source_id: "local".into(),
18337 origin_host: None,
18338 };
18339
18340 let conv_a = base_conv(vec![
18341 Message {
18342 id: None,
18343 idx: 0,
18344 role: MessageRole::User,
18345 author: None,
18346 created_at: Some(1_700_000_000_000),
18347 content: "first".into(),
18348 extra_json: serde_json::Value::Null,
18349 snippets: Vec::new(),
18350 },
18351 Message {
18352 id: None,
18353 idx: 1,
18354 role: MessageRole::Agent,
18355 author: None,
18356 created_at: Some(1_700_000_000_100),
18357 content: "second".into(),
18358 extra_json: serde_json::Value::Null,
18359 snippets: Vec::new(),
18360 },
18361 ]);
18362 let conv_b = base_conv(vec![
18363 Message {
18364 id: None,
18365 idx: 0,
18366 role: MessageRole::User,
18367 author: None,
18368 created_at: Some(1_700_000_000_000),
18369 content: "first".into(),
18370 extra_json: serde_json::Value::Null,
18371 snippets: Vec::new(),
18372 },
18373 Message {
18374 id: None,
18375 idx: 1,
18376 role: MessageRole::Agent,
18377 author: None,
18378 created_at: Some(1_700_000_000_100),
18379 content: "second".into(),
18380 extra_json: serde_json::Value::Null,
18381 snippets: Vec::new(),
18382 },
18383 Message {
18384 id: None,
18385 idx: 2,
18386 role: MessageRole::User,
18387 author: None,
18388 created_at: Some(1_700_000_000_200),
18389 content: "third".into(),
18390 extra_json: serde_json::Value::Null,
18391 snippets: Vec::new(),
18392 },
18393 Message {
18394 id: None,
18395 idx: 3,
18396 role: MessageRole::Agent,
18397 author: None,
18398 created_at: Some(1_700_000_000_300),
18399 content: "fourth".into(),
18400 extra_json: serde_json::Value::Null,
18401 snippets: Vec::new(),
18402 },
18403 ]);
18404
18405 let outcomes = storage
18406 .insert_conversations_batched(&[(agent_id, None, &conv_a), (agent_id, None, &conv_b)])
18407 .unwrap();
18408 assert_eq!(outcomes.len(), 2);
18409 assert_eq!(outcomes[0].inserted_indices, vec![0, 1]);
18410 assert_eq!(outcomes[1].inserted_indices, vec![2, 3]);
18411 assert_eq!(outcomes[0].conversation_id, outcomes[1].conversation_id);
18412
18413 let conversation_count: i64 = storage
18414 .conn
18415 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
18416 row.get_typed(0)
18417 })
18418 .unwrap();
18419 let conversation_count_not_indexed: i64 = storage
18420 .conn
18421 .query_row_map(
18422 "SELECT COUNT(*) FROM conversations NOT INDEXED",
18423 fparams![],
18424 |row| row.get_typed(0),
18425 )
18426 .unwrap();
18427 let conversation_count_source_index: i64 = storage
18428 .conn
18429 .query_row_map(
18430 "SELECT COUNT(*) FROM conversations INDEXED BY idx_conversations_source_id",
18431 fparams![],
18432 |row| row.get_typed(0),
18433 )
18434 .unwrap();
18435 let message_count: i64 = storage
18436 .conn
18437 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
18438 row.get_typed(0)
18439 })
18440 .unwrap();
18441 let reopened_storage = SqliteStorage::open(&db_path).unwrap();
18442 let reopened_conversation_count: i64 = reopened_storage
18443 .conn
18444 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
18445 row.get_typed(0)
18446 })
18447 .unwrap();
18448 let reopened_conversation_count_not_indexed: i64 = reopened_storage
18449 .conn
18450 .query_row_map(
18451 "SELECT COUNT(*) FROM conversations NOT INDEXED",
18452 fparams![],
18453 |row| row.get_typed(0),
18454 )
18455 .unwrap();
18456 let reopened_conversation_ids: Vec<i64> = reopened_storage
18457 .conn
18458 .query_map_collect(
18459 "SELECT id FROM conversations ORDER BY id",
18460 fparams![],
18461 |row| row.get_typed(0),
18462 )
18463 .unwrap();
18464 let reopened_conversation_ids_not_indexed: Vec<i64> = reopened_storage
18465 .conn
18466 .query_map_collect(
18467 "SELECT id FROM conversations NOT INDEXED ORDER BY id",
18468 fparams![],
18469 |row| row.get_typed(0),
18470 )
18471 .unwrap();
18472 let reopened_conversation_ids_source_index: Vec<i64> = reopened_storage
18473 .conn
18474 .query_map_collect(
18475 "SELECT id FROM conversations INDEXED BY idx_conversations_source_id ORDER BY id",
18476 fparams![],
18477 |row| row.get_typed(0),
18478 )
18479 .unwrap();
18480
18481 assert_eq!(reopened_conversation_ids, vec![outcomes[0].conversation_id]);
18482 assert_eq!(
18483 reopened_conversation_ids_not_indexed,
18484 vec![outcomes[0].conversation_id]
18485 );
18486 assert_eq!(
18487 reopened_conversation_ids_source_index,
18488 vec![outcomes[0].conversation_id]
18489 );
18490 assert_eq!(reopened_conversation_count, 1);
18491 assert_eq!(reopened_conversation_count_not_indexed, 1);
18492 assert_eq!(conversation_count_not_indexed, 1);
18493 assert_eq!(conversation_count_source_index, 1);
18494 assert_eq!(conversation_count, 1);
18495 assert_eq!(message_count, 4);
18496 }
18497
18498 #[test]
18499 fn franken_insert_conversation_or_get_existing_recovers_unique_conflict() {
18500 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18501 use std::path::PathBuf;
18502
18503 let dir = TempDir::new().unwrap();
18504 let db_path = dir.path().join("test.db");
18505 let storage = SqliteStorage::open(&db_path).unwrap();
18506
18507 let agent = Agent {
18508 id: None,
18509 slug: "codex".into(),
18510 name: "Codex".into(),
18511 version: Some("0.2.3".into()),
18512 kind: AgentKind::Cli,
18513 };
18514 let agent_id = storage.ensure_agent(&agent).unwrap();
18515
18516 let conv = Conversation {
18517 id: None,
18518 agent_slug: "codex".into(),
18519 workspace: Some(PathBuf::from("/tmp/workspace")),
18520 external_id: Some("recover-duplicate".into()),
18521 title: Some("Recover Duplicate".into()),
18522 source_path: PathBuf::from("/tmp/rollout.jsonl"),
18523 started_at: Some(1_700_000_000_000),
18524 ended_at: Some(1_700_000_000_100),
18525 approx_tokens: None,
18526 metadata_json: serde_json::Value::Null,
18527 messages: vec![Message {
18528 id: None,
18529 idx: 0,
18530 role: MessageRole::User,
18531 author: None,
18532 created_at: Some(1_700_000_000_000),
18533 content: "hello".into(),
18534 extra_json: serde_json::Value::Null,
18535 snippets: Vec::new(),
18536 }],
18537 source_id: "local".into(),
18538 origin_host: None,
18539 };
18540
18541 let tx = storage.conn.transaction().unwrap();
18542 let inserted_id = franken_insert_conversation(&tx, agent_id, None, &conv)
18543 .unwrap()
18544 .expect("first insert should succeed");
18545
18546 let conversation_key = conversation_merge_key(agent_id, &conv);
18547 let resolved = franken_insert_conversation_or_get_existing_after_miss(
18548 &tx,
18549 agent_id,
18550 None,
18551 &conv,
18552 &conversation_key,
18553 )
18554 .unwrap();
18555
18556 assert!(
18557 matches!(
18558 resolved,
18559 ConversationInsertStatus::Existing(existing_id)
18560 if existing_id.cmp(&inserted_id).is_eq()
18561 ),
18562 "expected existing conversation id {inserted_id}"
18563 );
18564
18565 let conversation_count: i64 = tx
18566 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
18567 row.get_typed(0)
18568 })
18569 .unwrap();
18570 assert_eq!(conversation_count, 1);
18571 }
18572
18573 #[test]
18574 fn insert_conversations_batched_merges_duplicate_external_id_with_gaps() {
18575 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18576 use std::path::PathBuf;
18577
18578 let dir = TempDir::new().unwrap();
18579 let db_path = dir.path().join("test.db");
18580 let storage = SqliteStorage::open(&db_path).unwrap();
18581
18582 let agent = Agent {
18583 id: None,
18584 slug: "codex".into(),
18585 name: "Codex".into(),
18586 version: Some("0.2.3".into()),
18587 kind: AgentKind::Cli,
18588 };
18589 let agent_id = storage.ensure_agent(&agent).unwrap();
18590
18591 let base_conv = |messages: Vec<Message>| Conversation {
18592 id: None,
18593 agent_slug: "codex".into(),
18594 workspace: Some(PathBuf::from("/tmp/workspace")),
18595 external_id: Some("shared-session-gap".into()),
18596 title: Some("Shared Session Gap".into()),
18597 source_path: PathBuf::from("/tmp/rollout.jsonl"),
18598 started_at: Some(1_700_000_000_000),
18599 ended_at: Some(1_700_000_000_999),
18600 approx_tokens: None,
18601 metadata_json: serde_json::Value::Null,
18602 messages,
18603 source_id: "local".into(),
18604 origin_host: None,
18605 };
18606
18607 let conv_a = base_conv(vec![
18608 Message {
18609 id: None,
18610 idx: 2,
18611 role: MessageRole::User,
18612 author: None,
18613 created_at: Some(1_700_000_000_200),
18614 content: "third".into(),
18615 extra_json: serde_json::Value::Null,
18616 snippets: Vec::new(),
18617 },
18618 Message {
18619 id: None,
18620 idx: 3,
18621 role: MessageRole::Agent,
18622 author: None,
18623 created_at: Some(1_700_000_000_300),
18624 content: "fourth".into(),
18625 extra_json: serde_json::Value::Null,
18626 snippets: Vec::new(),
18627 },
18628 ]);
18629 let conv_b = base_conv(vec![
18630 Message {
18631 id: None,
18632 idx: 0,
18633 role: MessageRole::User,
18634 author: None,
18635 created_at: Some(1_700_000_000_000),
18636 content: "first".into(),
18637 extra_json: serde_json::Value::Null,
18638 snippets: Vec::new(),
18639 },
18640 Message {
18641 id: None,
18642 idx: 1,
18643 role: MessageRole::Agent,
18644 author: None,
18645 created_at: Some(1_700_000_000_100),
18646 content: "second".into(),
18647 extra_json: serde_json::Value::Null,
18648 snippets: Vec::new(),
18649 },
18650 Message {
18651 id: None,
18652 idx: 3,
18653 role: MessageRole::Agent,
18654 author: None,
18655 created_at: Some(1_700_000_000_300),
18656 content: "fourth".into(),
18657 extra_json: serde_json::Value::Null,
18658 snippets: Vec::new(),
18659 },
18660 ]);
18661
18662 let outcomes = storage
18663 .insert_conversations_batched(&[(agent_id, None, &conv_a), (agent_id, None, &conv_b)])
18664 .unwrap();
18665 assert_eq!(outcomes.len(), 2);
18666 assert_eq!(outcomes[0].inserted_indices, vec![2, 3]);
18667 assert_eq!(outcomes[1].inserted_indices, vec![0, 1]);
18668 assert_eq!(outcomes[0].conversation_id, outcomes[1].conversation_id);
18669
18670 let stored_indices: Vec<i64> = storage
18671 .conn
18672 .query_map_collect("SELECT idx FROM messages ORDER BY idx", fparams![], |row| {
18673 row.get_typed(0)
18674 })
18675 .unwrap();
18676 assert_eq!(stored_indices, vec![0, 1, 2, 3]);
18677 }
18678
18679 #[test]
18680 fn insert_conversations_batched_refreshes_partial_pending_message_lookup() {
18681 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18682 use std::path::PathBuf;
18683
18684 let dir = TempDir::new().unwrap();
18685 let db_path = dir.path().join("test.db");
18686 let storage = SqliteStorage::open(&db_path).unwrap();
18687
18688 let agent = Agent {
18689 id: None,
18690 slug: "codex".into(),
18691 name: "Codex".into(),
18692 version: Some("0.2.3".into()),
18693 kind: AgentKind::Cli,
18694 };
18695 let agent_id = storage.ensure_agent(&agent).unwrap();
18696
18697 let make_message = |idx: i64, content: &str| Message {
18698 id: None,
18699 idx,
18700 role: if idx == 0 {
18701 MessageRole::User
18702 } else {
18703 MessageRole::Agent
18704 },
18705 author: None,
18706 created_at: Some(1_700_000_000_000 + idx),
18707 content: content.into(),
18708 extra_json: serde_json::Value::Null,
18709 snippets: Vec::new(),
18710 };
18711
18712 let base_conv = |messages: Vec<Message>| Conversation {
18713 id: None,
18714 agent_slug: "codex".into(),
18715 workspace: Some(PathBuf::from("/tmp/workspace")),
18716 external_id: Some("partial-cache-session".into()),
18717 title: Some("Partial cache session".into()),
18718 source_path: PathBuf::from("/tmp/partial-cache.jsonl"),
18719 started_at: Some(1_700_000_000_000),
18720 ended_at: Some(1_700_000_000_100),
18721 approx_tokens: None,
18722 metadata_json: serde_json::Value::Null,
18723 messages,
18724 source_id: "local".into(),
18725 origin_host: None,
18726 };
18727
18728 let canonical = base_conv(vec![
18729 make_message(0, "canonical zero"),
18730 make_message(20, "canonical twenty"),
18731 ]);
18732 storage
18733 .insert_conversation_tree(agent_id, None, &canonical)
18734 .unwrap();
18735
18736 let exact_prefix = base_conv(vec![make_message(0, "canonical zero")]);
18737 let conflicting_tail = base_conv(vec![make_message(20, "conflicting twenty")]);
18738
18739 let outcomes = storage
18740 .insert_conversations_batched(&[
18741 (agent_id, None, &exact_prefix),
18742 (agent_id, None, &conflicting_tail),
18743 ])
18744 .unwrap();
18745
18746 assert_eq!(outcomes.len(), 2);
18747 assert!(outcomes[0].inserted_indices.is_empty());
18748 assert!(
18749 outcomes[1].inserted_indices.is_empty(),
18750 "the second batch item must refresh the partial pending lookup and retain the canonical idx=20 row"
18751 );
18752
18753 let stored_messages: Vec<(i64, String)> = storage
18754 .conn
18755 .query_map_collect(
18756 "SELECT idx, content FROM messages ORDER BY idx",
18757 fparams![],
18758 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
18759 )
18760 .unwrap();
18761 assert_eq!(
18762 stored_messages,
18763 vec![
18764 (0, "canonical zero".to_string()),
18765 (20, "canonical twenty".to_string()),
18766 ]
18767 );
18768 }
18769
18770 #[test]
18771 fn insert_conversations_batched_reprocessing_conversation_is_idempotent() {
18772 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
18773 use std::path::PathBuf;
18774
18775 const MESSAGE_COUNT: i64 = 64;
18776
18777 let dir = TempDir::new().unwrap();
18778 let db_path = dir.path().join("test.db");
18779 let storage = SqliteStorage::open(&db_path).unwrap();
18780
18781 let agent = Agent {
18782 id: None,
18783 slug: "codex".into(),
18784 name: "Codex".into(),
18785 version: Some("0.2.3".into()),
18786 kind: AgentKind::Cli,
18787 };
18788 let agent_id = storage.ensure_agent(&agent).unwrap();
18789
18790 let messages: Vec<Message> = (0..MESSAGE_COUNT)
18791 .map(|idx| Message {
18792 id: None,
18793 idx,
18794 role: if idx % 2 == 0 {
18795 MessageRole::User
18796 } else {
18797 MessageRole::Agent
18798 },
18799 author: None,
18800 created_at: Some(1_700_000_000_000 + idx),
18801 content: format!("message {idx}"),
18802 extra_json: serde_json::Value::Null,
18803 snippets: Vec::new(),
18804 })
18805 .collect();
18806
18807 let conversation = Conversation {
18808 id: None,
18809 agent_slug: "codex".into(),
18810 workspace: Some(PathBuf::from("/tmp/workspace")),
18811 external_id: Some("large-reprocess-session".into()),
18812 title: Some("Large Reprocess Session".into()),
18813 source_path: PathBuf::from("/tmp/large-reprocess-session.jsonl"),
18814 started_at: Some(1_700_000_000_000),
18815 ended_at: Some(1_700_000_000_000 + MESSAGE_COUNT - 1),
18816 approx_tokens: None,
18817 metadata_json: serde_json::Value::Null,
18818 messages,
18819 source_id: "local".into(),
18820 origin_host: None,
18821 };
18822
18823 let first = storage
18824 .insert_conversations_batched(&[(agent_id, None, &conversation)])
18825 .unwrap();
18826 let second = storage
18827 .insert_conversations_batched(&[(agent_id, None, &conversation)])
18828 .unwrap();
18829
18830 assert_eq!(first.len(), 1);
18831 assert_eq!(second.len(), 1);
18832 assert_eq!(first[0].inserted_indices.len(), MESSAGE_COUNT as usize);
18833 assert!(
18834 second[0].inserted_indices.is_empty(),
18835 "full reprocessing of a large conversation must not attempt duplicate idx inserts"
18836 );
18837 assert_eq!(first[0].conversation_id, second[0].conversation_id);
18838
18839 let conversation_count: i64 = storage
18840 .conn
18841 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
18842 row.get_typed(0)
18843 })
18844 .unwrap();
18845 let message_count: i64 = storage
18846 .conn
18847 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
18848 row.get_typed(0)
18849 })
18850 .unwrap();
18851
18852 assert_eq!(conversation_count, 1);
18853 assert_eq!(message_count, MESSAGE_COUNT);
18854 }
18855
18856 #[test]
18857 fn parallel_insert_conversation_tree_keeps_unique_external_ids_distinct() {
18858 use crate::connectors::{NormalizedConversation, NormalizedMessage};
18859 use crate::indexer::persist::map_to_internal;
18860 use crate::model::types::{Agent, AgentKind};
18861 use frankensqlite::compat::{ConnectionExt, RowExt};
18862 use rand::RngExt;
18863 use rayon::prelude::*;
18864
18865 fn retryable_franken_error(err: &anyhow::Error) -> bool {
18866 err.downcast_ref::<frankensqlite::FrankenError>()
18867 .or_else(|| {
18868 err.root_cause()
18869 .downcast_ref::<frankensqlite::FrankenError>()
18870 })
18871 .is_some_and(|inner| {
18872 matches!(
18873 inner,
18874 frankensqlite::FrankenError::Busy
18875 | frankensqlite::FrankenError::BusyRecovery
18876 | frankensqlite::FrankenError::BusySnapshot { .. }
18877 | frankensqlite::FrankenError::WriteConflict { .. }
18878 | frankensqlite::FrankenError::SerializationFailure { .. }
18879 )
18880 })
18881 }
18882
18883 fn with_retry<F, T>(mut f: F) -> anyhow::Result<T>
18884 where
18885 F: FnMut() -> anyhow::Result<T>,
18886 {
18887 let mut rng = rand::rng();
18888 let mut backoff_ms = 4_u64;
18889 for attempt in 0..=24 {
18890 match f() {
18891 Ok(value) => return Ok(value),
18892 Err(err) if attempt < 24 && retryable_franken_error(&err) => {
18893 let sleep_ms = backoff_ms + rng.random_range(0..=backoff_ms);
18894 std::thread::sleep(Duration::from_millis(sleep_ms));
18895 backoff_ms = (backoff_ms * 2).min(512);
18896 }
18897 Err(err) => return Err(err),
18898 }
18899 }
18900 unreachable!("retry loop must return on success or final failure")
18901 }
18902
18903 let dir = TempDir::new().unwrap();
18904 let db_path = dir.path().join("parallel_insert_conversation_tree.db");
18905 let seed = FrankenStorage::open(&db_path).unwrap();
18906 drop(seed);
18907
18908 let conversations: Vec<NormalizedConversation> = (0..10)
18909 .map(|i| NormalizedConversation {
18910 agent_slug: format!("agent-{}", i % 3),
18911 external_id: Some(format!("conv-{i}")),
18912 title: Some(format!("Conversation {i}")),
18913 workspace: Some(PathBuf::from(format!("/ws/{i}"))),
18914 source_path: PathBuf::from(format!("/log/{i}.jsonl")),
18915 started_at: Some(1_000 + i * 100),
18916 ended_at: Some(1_000 + i * 100 + 50),
18917 metadata: serde_json::json!({}),
18918 messages: (0..3)
18919 .map(|j| NormalizedMessage {
18920 idx: j,
18921 role: if j % 2 == 0 { "user" } else { "assistant" }.to_string(),
18922 author: Some("tester".into()),
18923 created_at: Some(1_000 + i * 100 + j * 10),
18924 content: format!("parallel-distinct-test conv={i} msg={j}"),
18925 extra: serde_json::json!({}),
18926 snippets: vec![],
18927 invocations: Vec::new(),
18928 })
18929 .collect(),
18930 })
18931 .collect();
18932
18933 let mut outcomes: Vec<(String, i64, Vec<i64>)> = conversations
18934 .par_chunks(3)
18935 .map(|chunk| {
18936 let storage = FrankenStorage::open_writer(&db_path).unwrap();
18937 let mut agent_cache: HashMap<String, i64> = HashMap::new();
18938 let mut workspace_cache: HashMap<PathBuf, i64> = HashMap::new();
18939 let mut chunk_outcomes = Vec::with_capacity(chunk.len());
18940
18941 for conv in chunk {
18942 let agent_slug = conv.agent_slug.clone();
18943 let workspace = conv.workspace.clone();
18944 let external_id = conv.external_id.clone().expect("external id");
18945 let internal = map_to_internal(conv);
18946 let outcome = with_retry(|| {
18947 let agent_id = if let Some(id) = agent_cache.get(&agent_slug) {
18948 *id
18949 } else {
18950 let agent = Agent {
18951 id: None,
18952 slug: agent_slug.clone(),
18953 name: agent_slug.clone(),
18954 version: None,
18955 kind: AgentKind::Cli,
18956 };
18957 let id = storage.ensure_agent(&agent)?;
18958 agent_cache.insert(agent_slug.clone(), id);
18959 id
18960 };
18961 let workspace_id = if let Some(path) = &workspace {
18962 if let Some(id) = workspace_cache.get(path) {
18963 Some(*id)
18964 } else {
18965 let id = storage.ensure_workspace(path, None)?;
18966 workspace_cache.insert(path.clone(), id);
18967 Some(id)
18968 }
18969 } else {
18970 None
18971 };
18972 storage.insert_conversation_tree(agent_id, workspace_id, &internal)
18973 })
18974 .unwrap();
18975 chunk_outcomes.push((
18976 external_id,
18977 outcome.conversation_id,
18978 outcome.inserted_indices,
18979 ));
18980 }
18981
18982 storage.close().unwrap();
18983 chunk_outcomes
18984 })
18985 .flatten()
18986 .collect();
18987 outcomes.sort_by(|left, right| left.0.cmp(&right.0));
18988
18989 assert!(
18990 outcomes
18991 .iter()
18992 .all(|(_, _, inserted_indices)| inserted_indices == &vec![0, 1, 2]),
18993 "unique external ids must not be routed through the existing-conversation merge path: {outcomes:?}"
18994 );
18995
18996 let distinct_ids: HashSet<i64> = outcomes
18997 .iter()
18998 .map(|(_, conversation_id, _)| *conversation_id)
18999 .collect();
19000 assert_eq!(
19001 distinct_ids.len(),
19002 conversations.len(),
19003 "unique external ids must produce distinct conversation ids: {outcomes:?}"
19004 );
19005
19006 let reader = FrankenStorage::open(&db_path).unwrap();
19007 let stored_rows: Vec<(i64, String)> = reader
19008 .raw()
19009 .query_map_collect(
19010 "SELECT id, external_id FROM conversations ORDER BY id",
19011 &[],
19012 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
19013 )
19014 .unwrap();
19015 let stored_count: i64 = reader
19016 .raw()
19017 .query_row_map("SELECT COUNT(*) FROM conversations", &[], |row| {
19018 row.get_typed(0)
19019 })
19020 .unwrap();
19021
19022 assert_eq!(
19023 stored_count as usize,
19024 conversations.len(),
19025 "parallel distinct inserts must persist one row per external id; rows={stored_rows:?}; outcomes={outcomes:?}"
19026 );
19027 assert_eq!(
19028 stored_rows.len(),
19029 conversations.len(),
19030 "parallel distinct inserts must remain visible after reopening; rows={stored_rows:?}; outcomes={outcomes:?}"
19031 );
19032 }
19033
19034 #[test]
19035 fn insert_conversation_tree_merges_duplicate_external_id_with_gaps() {
19036 use crate::connectors::{NormalizedConversation, NormalizedMessage};
19037 use crate::indexer::persist::map_to_internal;
19038 use crate::model::types::{Agent, AgentKind};
19039 use std::path::PathBuf;
19040
19041 let dir = TempDir::new().unwrap();
19042 let db_path = dir.path().join("test.db");
19043 let storage = SqliteStorage::open(&db_path).unwrap();
19044
19045 let agent = Agent {
19046 id: None,
19047 slug: "codex".into(),
19048 name: "Codex".into(),
19049 version: Some("0.2.3".into()),
19050 kind: AgentKind::Cli,
19051 };
19052 let agent_id = storage.ensure_agent(&agent).unwrap();
19053
19054 let base_conv = |messages: Vec<NormalizedMessage>| NormalizedConversation {
19055 agent_slug: "codex".into(),
19056 workspace: Some(PathBuf::from("/tmp/workspace")),
19057 external_id: Some("tree-gap-session".into()),
19058 title: Some("Tree Gap Session".into()),
19059 source_path: PathBuf::from("/tmp/tree.jsonl"),
19060 started_at: Some(1_700_000_000_000),
19061 ended_at: Some(1_700_000_000_999),
19062 metadata: serde_json::Value::Null,
19063 messages,
19064 };
19065
19066 let conv_a = map_to_internal(&base_conv(vec![
19067 NormalizedMessage {
19068 idx: 2,
19069 role: "user".into(),
19070 author: None,
19071 created_at: Some(1_700_000_000_200),
19072 content: "third".into(),
19073 extra: serde_json::Value::Null,
19074 snippets: Vec::new(),
19075 invocations: Vec::new(),
19076 },
19077 NormalizedMessage {
19078 idx: 3,
19079 role: "assistant".into(),
19080 author: None,
19081 created_at: Some(1_700_000_000_300),
19082 content: "fourth".into(),
19083 extra: serde_json::Value::Null,
19084 snippets: Vec::new(),
19085 invocations: Vec::new(),
19086 },
19087 ]));
19088 let conv_b = map_to_internal(&base_conv(vec![
19089 NormalizedMessage {
19090 idx: 0,
19091 role: "user".into(),
19092 author: None,
19093 created_at: Some(1_700_000_000_000),
19094 content: "first".into(),
19095 extra: serde_json::Value::Null,
19096 snippets: Vec::new(),
19097 invocations: Vec::new(),
19098 },
19099 NormalizedMessage {
19100 idx: 1,
19101 role: "assistant".into(),
19102 author: None,
19103 created_at: Some(1_700_000_000_100),
19104 content: "second".into(),
19105 extra: serde_json::Value::Null,
19106 snippets: Vec::new(),
19107 invocations: Vec::new(),
19108 },
19109 NormalizedMessage {
19110 idx: 3,
19111 role: "assistant".into(),
19112 author: None,
19113 created_at: Some(1_700_000_000_300),
19114 content: "fourth".into(),
19115 extra: serde_json::Value::Null,
19116 snippets: Vec::new(),
19117 invocations: Vec::new(),
19118 },
19119 ]));
19120
19121 let first = storage
19122 .insert_conversation_tree(agent_id, None, &conv_a)
19123 .unwrap();
19124 let second = storage
19125 .insert_conversation_tree(agent_id, None, &conv_b)
19126 .unwrap();
19127
19128 assert_eq!(first.inserted_indices, vec![2, 3]);
19129 assert_eq!(second.inserted_indices, vec![0, 1]);
19130 assert_eq!(first.conversation_id, second.conversation_id);
19131
19132 let stored_indices: Vec<i64> = storage
19133 .conn
19134 .query_map_collect("SELECT idx FROM messages ORDER BY idx", fparams![], |row| {
19135 row.get_typed(0)
19136 })
19137 .unwrap();
19138 assert_eq!(stored_indices, vec![0, 1, 2, 3]);
19139 }
19140
19141 #[test]
19142 fn insert_conversation_tree_skips_duplicate_message_indices_for_new_conversation() {
19143 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19144 use std::path::PathBuf;
19145
19146 let dir = TempDir::new().unwrap();
19147 let db_path = dir.path().join("test.db");
19148 let storage = SqliteStorage::open(&db_path).unwrap();
19149
19150 let agent = Agent {
19151 id: None,
19152 slug: "codex".into(),
19153 name: "Codex".into(),
19154 version: Some("0.2.3".into()),
19155 kind: AgentKind::Cli,
19156 };
19157 let agent_id = storage.ensure_agent(&agent).unwrap();
19158
19159 let conversation = Conversation {
19160 id: None,
19161 agent_slug: "codex".into(),
19162 workspace: Some(PathBuf::from("/tmp/workspace")),
19163 external_id: Some("duplicate-new-session".into()),
19164 title: Some("Duplicate New Session".into()),
19165 source_path: PathBuf::from("/tmp/duplicate-new-session.jsonl"),
19166 started_at: Some(1_700_000_000_000),
19167 ended_at: Some(1_700_000_000_999),
19168 approx_tokens: None,
19169 metadata_json: serde_json::Value::Null,
19170 messages: vec![
19171 Message {
19172 id: None,
19173 idx: 0,
19174 role: MessageRole::User,
19175 author: None,
19176 created_at: Some(1_700_000_000_000),
19177 content: "first canonical".into(),
19178 extra_json: serde_json::Value::Null,
19179 snippets: Vec::new(),
19180 },
19181 Message {
19182 id: None,
19183 idx: 0,
19184 role: MessageRole::User,
19185 author: None,
19186 created_at: Some(1_700_000_000_001),
19187 content: "duplicate idx should be skipped".into(),
19188 extra_json: serde_json::Value::Null,
19189 snippets: Vec::new(),
19190 },
19191 Message {
19192 id: None,
19193 idx: 1,
19194 role: MessageRole::Agent,
19195 author: None,
19196 created_at: Some(1_700_000_000_100),
19197 content: "second".into(),
19198 extra_json: serde_json::Value::Null,
19199 snippets: Vec::new(),
19200 },
19201 ],
19202 source_id: "local".into(),
19203 origin_host: None,
19204 };
19205
19206 let outcome = storage
19207 .insert_conversation_tree(agent_id, None, &conversation)
19208 .unwrap();
19209
19210 assert_eq!(outcome.inserted_indices, vec![0, 1]);
19211
19212 let stored_messages: Vec<(i64, String)> = storage
19213 .conn
19214 .query_map_collect(
19215 "SELECT idx, content FROM messages ORDER BY idx",
19216 fparams![],
19217 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
19218 )
19219 .unwrap();
19220 assert_eq!(
19221 stored_messages,
19222 vec![
19223 (0, "first canonical".to_string()),
19224 (1, "second".to_string())
19225 ]
19226 );
19227 }
19228
19229 #[test]
19230 fn insert_conversation_tree_merges_duplicate_source_path_without_external_id() {
19231 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19232 use std::path::PathBuf;
19233
19234 let dir = TempDir::new().unwrap();
19235 let db_path = dir.path().join("test.db");
19236 let storage = SqliteStorage::open(&db_path).unwrap();
19237
19238 let agent = Agent {
19239 id: None,
19240 slug: "codex".into(),
19241 name: "Codex".into(),
19242 version: Some("0.2.3".into()),
19243 kind: AgentKind::Cli,
19244 };
19245 let agent_id = storage.ensure_agent(&agent).unwrap();
19246
19247 let base_conv = |messages: Vec<Message>| Conversation {
19248 id: None,
19249 agent_slug: "codex".into(),
19250 workspace: Some(PathBuf::from("/tmp/workspace")),
19251 external_id: None,
19252 title: Some("Source Path Merge".into()),
19253 source_path: PathBuf::from("/tmp/shared-session.jsonl"),
19254 started_at: Some(1_700_000_000_000),
19255 ended_at: Some(1_700_000_000_999),
19256 approx_tokens: None,
19257 metadata_json: serde_json::Value::Null,
19258 messages,
19259 source_id: "local".into(),
19260 origin_host: None,
19261 };
19262
19263 let first = storage
19264 .insert_conversation_tree(
19265 agent_id,
19266 None,
19267 &base_conv(vec![
19268 Message {
19269 id: None,
19270 idx: 0,
19271 role: MessageRole::User,
19272 author: None,
19273 created_at: Some(1_700_000_000_000),
19274 content: "first".into(),
19275 extra_json: serde_json::Value::Null,
19276 snippets: Vec::new(),
19277 },
19278 Message {
19279 id: None,
19280 idx: 1,
19281 role: MessageRole::Agent,
19282 author: None,
19283 created_at: Some(1_700_000_000_100),
19284 content: "second".into(),
19285 extra_json: serde_json::Value::Null,
19286 snippets: Vec::new(),
19287 },
19288 ]),
19289 )
19290 .unwrap();
19291
19292 let second = storage
19293 .insert_conversation_tree(
19294 agent_id,
19295 None,
19296 &base_conv(vec![
19297 Message {
19298 id: None,
19299 idx: 1,
19300 role: MessageRole::Agent,
19301 author: None,
19302 created_at: Some(1_700_000_000_100),
19303 content: "second".into(),
19304 extra_json: serde_json::Value::Null,
19305 snippets: Vec::new(),
19306 },
19307 Message {
19308 id: None,
19309 idx: 2,
19310 role: MessageRole::User,
19311 author: None,
19312 created_at: Some(1_700_000_000_200),
19313 content: "third".into(),
19314 extra_json: serde_json::Value::Null,
19315 snippets: Vec::new(),
19316 },
19317 ]),
19318 )
19319 .unwrap();
19320
19321 assert_eq!(first.conversation_id, second.conversation_id);
19322 assert_eq!(first.inserted_indices, vec![0, 1]);
19323 assert_eq!(second.inserted_indices, vec![2]);
19324
19325 let stored_indices: Vec<i64> = storage
19326 .conn
19327 .query_map_collect("SELECT idx FROM messages ORDER BY idx", fparams![], |row| {
19328 row.get_typed(0)
19329 })
19330 .unwrap();
19331 assert_eq!(stored_indices, vec![0, 1, 2]);
19332 }
19333
19334 #[test]
19335 fn insert_conversation_tree_merges_source_path_duplicates_with_start_drift() {
19336 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19337 use std::path::PathBuf;
19338
19339 let dir = TempDir::new().unwrap();
19340 let db_path = dir.path().join("test.db");
19341 let storage = SqliteStorage::open(&db_path).unwrap();
19342
19343 let agent = Agent {
19344 id: None,
19345 slug: "codex".into(),
19346 name: "Codex".into(),
19347 version: Some("0.2.3".into()),
19348 kind: AgentKind::Cli,
19349 };
19350 let agent_id = storage.ensure_agent(&agent).unwrap();
19351
19352 let base_conv = |started_at: Option<i64>, messages: Vec<Message>| Conversation {
19353 id: None,
19354 agent_slug: "codex".into(),
19355 workspace: Some(PathBuf::from("/tmp/workspace")),
19356 external_id: None,
19357 title: Some("Drift Merge".into()),
19358 source_path: PathBuf::from("/tmp/drift-session.jsonl"),
19359 started_at,
19360 ended_at: Some(1_700_000_000_999),
19361 approx_tokens: None,
19362 metadata_json: serde_json::Value::Null,
19363 messages,
19364 source_id: "local".into(),
19365 origin_host: None,
19366 };
19367
19368 let first = storage
19369 .insert_conversation_tree(
19370 agent_id,
19371 None,
19372 &base_conv(
19373 Some(1_700_000_000_000),
19374 vec![
19375 Message {
19376 id: None,
19377 idx: 0,
19378 role: MessageRole::User,
19379 author: None,
19380 created_at: Some(1_700_000_000_000),
19381 content: "first".into(),
19382 extra_json: serde_json::Value::Null,
19383 snippets: Vec::new(),
19384 },
19385 Message {
19386 id: None,
19387 idx: 1,
19388 role: MessageRole::Agent,
19389 author: None,
19390 created_at: Some(1_700_000_000_100),
19391 content: "second".into(),
19392 extra_json: serde_json::Value::Null,
19393 snippets: Vec::new(),
19394 },
19395 ],
19396 ),
19397 )
19398 .unwrap();
19399
19400 let second = storage
19401 .insert_conversation_tree(
19402 agent_id,
19403 None,
19404 &base_conv(
19405 Some(1_700_000_004_000),
19406 vec![
19407 Message {
19408 id: None,
19409 idx: 1,
19410 role: MessageRole::Agent,
19411 author: None,
19412 created_at: Some(1_700_000_000_100),
19413 content: "second".into(),
19414 extra_json: serde_json::Value::Null,
19415 snippets: Vec::new(),
19416 },
19417 Message {
19418 id: None,
19419 idx: 2,
19420 role: MessageRole::User,
19421 author: None,
19422 created_at: Some(1_700_000_004_200),
19423 content: "third".into(),
19424 extra_json: serde_json::Value::Null,
19425 snippets: Vec::new(),
19426 },
19427 ],
19428 ),
19429 )
19430 .unwrap();
19431
19432 assert_eq!(first.conversation_id, second.conversation_id);
19433 assert_eq!(second.inserted_indices, vec![2]);
19434 }
19435
19436 #[test]
19437 fn insert_conversation_tree_keeps_single_message_overlap_sessions_separate() {
19438 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19439 use std::path::PathBuf;
19440
19441 let dir = TempDir::new().unwrap();
19442 let db_path = dir.path().join("test.db");
19443 let storage = SqliteStorage::open(&db_path).unwrap();
19444
19445 let agent = Agent {
19446 id: None,
19447 slug: "codex".into(),
19448 name: "Codex".into(),
19449 version: Some("0.2.3".into()),
19450 kind: AgentKind::Cli,
19451 };
19452 let agent_id = storage.ensure_agent(&agent).unwrap();
19453
19454 let make_conv = |started_at: i64, idx: i64, content: &str| Conversation {
19455 id: None,
19456 agent_slug: "codex".into(),
19457 workspace: Some(PathBuf::from("/tmp/workspace")),
19458 external_id: None,
19459 title: Some("Partial overlap".into()),
19460 source_path: PathBuf::from("/tmp/reused-session.jsonl"),
19461 started_at: Some(started_at),
19462 ended_at: Some(started_at + 500),
19463 approx_tokens: None,
19464 metadata_json: serde_json::Value::Null,
19465 messages: vec![Message {
19466 id: None,
19467 idx,
19468 role: MessageRole::User,
19469 author: None,
19470 created_at: Some(started_at),
19471 content: content.into(),
19472 extra_json: serde_json::Value::Null,
19473 snippets: Vec::new(),
19474 }],
19475 source_id: "local".into(),
19476 origin_host: None,
19477 };
19478
19479 storage
19480 .insert_conversation_tree(
19481 agent_id,
19482 None,
19483 &Conversation {
19484 messages: vec![
19485 Message {
19486 id: None,
19487 idx: 0,
19488 role: MessageRole::User,
19489 author: None,
19490 created_at: Some(1_700_000_000_000),
19491 content: "shared opener".into(),
19492 extra_json: serde_json::Value::Null,
19493 snippets: Vec::new(),
19494 },
19495 Message {
19496 id: None,
19497 idx: 1,
19498 role: MessageRole::Agent,
19499 author: None,
19500 created_at: Some(1_700_000_000_100),
19501 content: "first session unique".into(),
19502 extra_json: serde_json::Value::Null,
19503 snippets: Vec::new(),
19504 },
19505 ],
19506 ..make_conv(1_700_000_000_000, 0, "unused")
19507 },
19508 )
19509 .unwrap();
19510 storage
19511 .insert_conversation_tree(
19512 agent_id,
19513 None,
19514 &make_conv(1_700_000_900_000, 0, "shared opener"),
19515 )
19516 .unwrap();
19517
19518 let conversation_count: i64 = storage
19519 .conn
19520 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
19521 row.get_typed(0)
19522 })
19523 .unwrap();
19524 assert_eq!(conversation_count, 2);
19525 }
19526
19527 #[test]
19528 fn insert_conversation_tree_keeps_distinct_source_path_sessions_separate() {
19529 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19530 use std::path::PathBuf;
19531
19532 let dir = TempDir::new().unwrap();
19533 let db_path = dir.path().join("test.db");
19534 let storage = SqliteStorage::open(&db_path).unwrap();
19535
19536 let agent = Agent {
19537 id: None,
19538 slug: "codex".into(),
19539 name: "Codex".into(),
19540 version: Some("0.2.3".into()),
19541 kind: AgentKind::Cli,
19542 };
19543 let agent_id = storage.ensure_agent(&agent).unwrap();
19544
19545 let make_conv = |started_at: i64, created_at: i64, content: &str| Conversation {
19546 id: None,
19547 agent_slug: "codex".into(),
19548 workspace: Some(PathBuf::from("/tmp/workspace")),
19549 external_id: None,
19550 title: Some("Same Path Different Session".into()),
19551 source_path: PathBuf::from("/tmp/reused-session.jsonl"),
19552 started_at: Some(started_at),
19553 ended_at: Some(started_at + 500),
19554 approx_tokens: None,
19555 metadata_json: serde_json::Value::Null,
19556 messages: vec![Message {
19557 id: None,
19558 idx: 0,
19559 role: MessageRole::User,
19560 author: None,
19561 created_at: Some(created_at),
19562 content: content.into(),
19563 extra_json: serde_json::Value::Null,
19564 snippets: Vec::new(),
19565 }],
19566 source_id: "local".into(),
19567 origin_host: None,
19568 };
19569
19570 storage
19571 .insert_conversation_tree(
19572 agent_id,
19573 None,
19574 &make_conv(1_700_000_000_000, 1_700_000_000_000, "first session"),
19575 )
19576 .unwrap();
19577 storage
19578 .insert_conversation_tree(
19579 agent_id,
19580 None,
19581 &make_conv(1_700_000_900_000, 1_700_000_900_000, "second session"),
19582 )
19583 .unwrap();
19584
19585 let conversation_count: i64 = storage
19586 .conn
19587 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
19588 row.get_typed(0)
19589 })
19590 .unwrap();
19591 assert_eq!(conversation_count, 2);
19592 }
19593
19594 #[test]
19595 fn insert_conversation_tree_merges_replay_equivalent_messages_with_shifted_idx() {
19596 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19597 use std::path::PathBuf;
19598
19599 let dir = TempDir::new().unwrap();
19600 let db_path = dir.path().join("test.db");
19601 let storage = SqliteStorage::open(&db_path).unwrap();
19602
19603 let agent = Agent {
19604 id: None,
19605 slug: "codex".into(),
19606 name: "Codex".into(),
19607 version: Some("0.2.3".into()),
19608 kind: AgentKind::Cli,
19609 };
19610 let agent_id = storage.ensure_agent(&agent).unwrap();
19611
19612 let make_conv = |started_at: i64, messages: Vec<Message>| Conversation {
19613 id: None,
19614 agent_slug: "codex".into(),
19615 workspace: Some(PathBuf::from("/tmp/workspace")),
19616 external_id: None,
19617 title: Some("Shifted replay".into()),
19618 source_path: PathBuf::from("/tmp/replay-session.jsonl"),
19619 started_at: Some(started_at),
19620 ended_at: Some(started_at + 500),
19621 approx_tokens: None,
19622 metadata_json: serde_json::Value::Null,
19623 messages,
19624 source_id: "local".into(),
19625 origin_host: None,
19626 };
19627
19628 let first = storage
19629 .insert_conversation_tree(
19630 agent_id,
19631 None,
19632 &make_conv(
19633 1_700_000_000_000,
19634 vec![
19635 Message {
19636 id: None,
19637 idx: 0,
19638 role: MessageRole::User,
19639 author: None,
19640 created_at: Some(1_700_000_000_000),
19641 content: "first".into(),
19642 extra_json: serde_json::Value::Null,
19643 snippets: Vec::new(),
19644 },
19645 Message {
19646 id: None,
19647 idx: 1,
19648 role: MessageRole::Agent,
19649 author: None,
19650 created_at: Some(1_700_000_000_100),
19651 content: "second".into(),
19652 extra_json: serde_json::Value::Null,
19653 snippets: Vec::new(),
19654 },
19655 ],
19656 ),
19657 )
19658 .unwrap();
19659
19660 let second = storage
19661 .insert_conversation_tree(
19662 agent_id,
19663 None,
19664 &make_conv(
19665 1_700_000_900_000,
19666 vec![
19667 Message {
19668 id: None,
19669 idx: 10,
19670 role: MessageRole::User,
19671 author: None,
19672 created_at: Some(1_700_000_000_000),
19673 content: "first".into(),
19674 extra_json: serde_json::Value::Null,
19675 snippets: Vec::new(),
19676 },
19677 Message {
19678 id: None,
19679 idx: 11,
19680 role: MessageRole::Agent,
19681 author: None,
19682 created_at: Some(1_700_000_000_100),
19683 content: "second".into(),
19684 extra_json: serde_json::Value::Null,
19685 snippets: Vec::new(),
19686 },
19687 Message {
19688 id: None,
19689 idx: 12,
19690 role: MessageRole::User,
19691 author: None,
19692 created_at: Some(1_700_000_000_200),
19693 content: "third".into(),
19694 extra_json: serde_json::Value::Null,
19695 snippets: Vec::new(),
19696 },
19697 ],
19698 ),
19699 )
19700 .unwrap();
19701
19702 assert_eq!(first.conversation_id, second.conversation_id);
19703 assert_eq!(second.inserted_indices, vec![12]);
19704
19705 let stored_indices: Vec<i64> = storage
19706 .conn
19707 .query_map_collect(
19708 "SELECT idx FROM messages WHERE conversation_id = ?1 ORDER BY idx",
19709 fparams![first.conversation_id],
19710 |row| row.get_typed(0),
19711 )
19712 .unwrap();
19713 assert_eq!(stored_indices, vec![0, 1, 12]);
19714 }
19715
19716 #[test]
19717 fn salvage_historical_databases_imports_backups_once_and_merges_overlap() {
19718 use crate::model::types::{Conversation, Message, MessageRole};
19719 use std::path::PathBuf;
19720
19721 fn base_conv(source_path: &str, messages: Vec<Message>) -> Conversation {
19722 Conversation {
19723 id: None,
19724 agent_slug: "codex".into(),
19725 workspace: Some(PathBuf::from("/tmp/workspace")),
19726 external_id: None,
19727 title: Some("Recovered".into()),
19728 source_path: PathBuf::from(source_path),
19729 started_at: Some(1_700_000_000_000),
19730 ended_at: Some(1_700_000_000_999),
19731 approx_tokens: None,
19732 metadata_json: serde_json::Value::Null,
19733 messages,
19734 source_id: "local".into(),
19735 origin_host: None,
19736 }
19737 }
19738
19739 let dir = TempDir::new().unwrap();
19740 let canonical_db = dir.path().join("agent_search.db");
19741 let storage = SqliteStorage::open(&canonical_db).unwrap();
19742
19743 let overlapping_a = base_conv(
19744 "/tmp/shared-history.jsonl",
19745 vec![
19746 Message {
19747 id: None,
19748 idx: 0,
19749 role: MessageRole::User,
19750 author: None,
19751 created_at: Some(1_700_000_000_000),
19752 content: "first".into(),
19753 extra_json: serde_json::Value::Null,
19754 snippets: Vec::new(),
19755 },
19756 Message {
19757 id: None,
19758 idx: 1,
19759 role: MessageRole::Agent,
19760 author: None,
19761 created_at: Some(1_700_000_000_100),
19762 content: "second".into(),
19763 extra_json: serde_json::Value::Null,
19764 snippets: Vec::new(),
19765 },
19766 ],
19767 );
19768 let overlapping_b = base_conv(
19769 "/tmp/shared-history.jsonl",
19770 vec![
19771 Message {
19772 id: None,
19773 idx: 1,
19774 role: MessageRole::Agent,
19775 author: None,
19776 created_at: Some(1_700_000_000_100),
19777 content: "second".into(),
19778 extra_json: serde_json::Value::Null,
19779 snippets: Vec::new(),
19780 },
19781 Message {
19782 id: None,
19783 idx: 2,
19784 role: MessageRole::User,
19785 author: None,
19786 created_at: Some(1_700_000_000_200),
19787 content: "third".into(),
19788 extra_json: serde_json::Value::Null,
19789 snippets: Vec::new(),
19790 },
19791 ],
19792 );
19793 let unique = Conversation {
19794 source_path: PathBuf::from("/tmp/unique-history.jsonl"),
19795 messages: vec![Message {
19796 id: None,
19797 idx: 0,
19798 role: MessageRole::User,
19799 author: None,
19800 created_at: Some(1_700_000_001_000),
19801 content: "unique".into(),
19802 extra_json: serde_json::Value::Null,
19803 snippets: Vec::new(),
19804 }],
19805 started_at: Some(1_700_000_001_000),
19806 ended_at: Some(1_700_000_001_100),
19807 ..base_conv("/tmp/unique-history.jsonl", Vec::new())
19808 };
19809
19810 seed_historical_db_direct(
19811 &dir.path()
19812 .join("backups/agent_search.db.20260322T020200.bak"),
19813 std::slice::from_ref(&overlapping_a),
19814 );
19815 seed_historical_db_direct(
19816 &dir.path().join("agent_search.corrupt.20260324_212907"),
19817 &[overlapping_b, unique],
19818 );
19819
19820 let first = storage.salvage_historical_databases(&canonical_db).unwrap();
19821 assert_eq!(first.bundles_considered, 2);
19822 assert_eq!(first.bundles_imported, 2);
19823 assert_eq!(first.messages_imported, 4);
19824
19825 let conversations = storage.list_conversations(10, 0).unwrap();
19826 assert_eq!(conversations.len(), 2);
19827
19828 let shared_id = conversations
19829 .iter()
19830 .find(|conv| conv.source_path == std::path::Path::new("/tmp/shared-history.jsonl"))
19831 .and_then(|conv| conv.id)
19832 .unwrap();
19833 let shared_indices: Vec<i64> = storage
19834 .fetch_messages(shared_id)
19835 .unwrap()
19836 .into_iter()
19837 .map(|msg| msg.idx)
19838 .collect();
19839 assert_eq!(shared_indices, vec![0, 1, 2]);
19840
19841 let second = storage.salvage_historical_databases(&canonical_db).unwrap();
19842 assert_eq!(second.bundles_imported, 0);
19843 assert_eq!(second.messages_imported, 0);
19844 }
19845
19846 #[test]
19847 fn salvage_historical_databases_normalizes_host_only_remote_provenance() {
19848 use crate::model::types::{Conversation, Message, MessageRole};
19849 use std::path::PathBuf;
19850
19851 let dir = TempDir::new().unwrap();
19852 let canonical_db = dir.path().join("agent_search.db");
19853 let storage = SqliteStorage::open(&canonical_db).unwrap();
19854
19855 let host_only_remote = Conversation {
19856 id: None,
19857 agent_slug: "codex".into(),
19858 workspace: Some(PathBuf::from("/tmp/workspace")),
19859 external_id: None,
19860 title: Some("Recovered Host Only Remote".into()),
19861 source_path: PathBuf::from("/tmp/host-only-history.jsonl"),
19862 started_at: Some(1_700_000_000_000),
19863 ended_at: Some(1_700_000_000_999),
19864 approx_tokens: None,
19865 metadata_json: serde_json::Value::Null,
19866 messages: vec![Message {
19867 id: None,
19868 idx: 0,
19869 role: MessageRole::User,
19870 author: None,
19871 created_at: Some(1_700_000_000_000),
19872 content: "host-only remote".into(),
19873 extra_json: serde_json::Value::Null,
19874 snippets: Vec::new(),
19875 }],
19876 source_id: " ".into(),
19877 origin_host: Some("builder-5".into()),
19878 };
19879
19880 let historical_db = dir
19881 .path()
19882 .join("backups/agent_search.db.20260322T020200.bak");
19883 seed_historical_db_direct(&historical_db, std::slice::from_ref(&host_only_remote));
19884
19885 let historical_conn =
19886 FrankenConnection::open(historical_db.to_string_lossy().into_owned()).unwrap();
19887 historical_conn
19888 .execute_compat(
19889 "INSERT INTO sources(id, kind, host_label, created_at, updated_at) VALUES(?1, ?2, ?3, ?4, ?5)",
19890 fparams![" ", "ssh", "builder-5", 0_i64, 0_i64],
19891 )
19892 .unwrap();
19893 historical_conn
19894 .execute_compat(
19895 "UPDATE conversations SET source_id = ?1, origin_host = ?2 WHERE source_path = ?3",
19896 fparams![" ", "builder-5", "/tmp/host-only-history.jsonl"],
19897 )
19898 .unwrap();
19899 historical_conn
19900 .execute_compat("DELETE FROM sources WHERE id = ?1", fparams!["builder-5"])
19901 .unwrap();
19902 drop(historical_conn);
19903
19904 let first = storage.salvage_historical_databases(&canonical_db).unwrap();
19905 assert_eq!(first.bundles_imported, 1);
19906 assert_eq!(first.messages_imported, 1);
19907
19908 let source_ids = storage.get_source_ids().unwrap();
19909 assert_eq!(source_ids, vec!["builder-5".to_string()]);
19910
19911 let conversations = storage.list_conversations(10, 0).unwrap();
19912 assert_eq!(conversations.len(), 1);
19913 assert_eq!(conversations[0].source_id, "builder-5");
19914 assert_eq!(conversations[0].origin_host.as_deref(), Some("builder-5"));
19915 }
19916
19917 #[test]
19918 fn historical_salvage_retry_splits_single_conversation_until_it_fits() {
19919 use crate::model::types::{Conversation, Message, MessageRole};
19920 use std::path::PathBuf;
19921
19922 let mut attempts: Vec<Vec<usize>> = Vec::new();
19923 let entry = HistoricalBatchEntry {
19924 source_row_id: 77,
19925 agent_id: 1,
19926 workspace_id: None,
19927 conversation: Conversation {
19928 id: None,
19929 agent_slug: "gemini".into(),
19930 workspace: Some(PathBuf::from("/tmp/workspace")),
19931 external_id: Some("conv-77".into()),
19932 title: Some("Large recovered conversation".into()),
19933 source_path: PathBuf::from("/tmp/history.jsonl"),
19934 started_at: Some(1_700_000_000_000),
19935 ended_at: Some(1_700_000_000_999),
19936 approx_tokens: None,
19937 metadata_json: serde_json::Value::Null,
19938 messages: (0..4)
19939 .map(|idx| Message {
19940 id: None,
19941 idx,
19942 role: MessageRole::User,
19943 author: None,
19944 created_at: Some(1_700_000_000_000 + idx),
19945 content: format!("message-{idx}"),
19946 extra_json: serde_json::Value::Null,
19947 snippets: Vec::new(),
19948 })
19949 .collect(),
19950 source_id: LOCAL_SOURCE_ID.into(),
19951 origin_host: None,
19952 },
19953 };
19954
19955 let totals = SqliteStorage::import_historical_batch_with_retry(
19956 std::slice::from_ref(&entry),
19957 &mut |batch| {
19958 attempts.push(
19959 batch
19960 .iter()
19961 .map(|entry| entry.conversation.messages.len())
19962 .collect(),
19963 );
19964 let total_messages: usize = batch
19965 .iter()
19966 .map(|entry| entry.conversation.messages.len())
19967 .sum();
19968 if total_messages > 1 {
19969 Err(anyhow!("out of memory"))
19970 } else {
19971 Ok(HistoricalBatchImportTotals {
19972 inserted_source_rows: batch.len(),
19973 inserted_messages: total_messages,
19974 })
19975 }
19976 },
19977 )
19978 .unwrap();
19979
19980 assert_eq!(
19981 totals,
19982 HistoricalBatchImportTotals {
19983 inserted_source_rows: 1,
19984 inserted_messages: 4,
19985 }
19986 );
19987 assert_eq!(attempts.first().cloned(), Some(vec![4]));
19988 assert!(
19989 attempts.iter().filter(|sizes| sizes == &&vec![1]).count() >= 4,
19990 "expected recursive fallback to reach one-message slices"
19991 );
19992 }
19993
19994 #[test]
19995 fn salvage_historical_databases_resumes_from_progress_checkpoint() {
19996 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
19997 use std::path::PathBuf;
19998
19999 fn make_conv(source_path: &str, idx_seed: i64) -> Conversation {
20000 Conversation {
20001 id: None,
20002 agent_slug: "codex".into(),
20003 workspace: Some(PathBuf::from("/tmp/workspace")),
20004 external_id: Some(format!("conv-{idx_seed}")),
20005 title: Some(format!("Recovered {idx_seed}")),
20006 source_path: PathBuf::from(source_path),
20007 started_at: Some(1_700_000_000_000 + idx_seed),
20008 ended_at: Some(1_700_000_000_100 + idx_seed),
20009 approx_tokens: None,
20010 metadata_json: serde_json::Value::Null,
20011 messages: vec![Message {
20012 id: None,
20013 idx: 0,
20014 role: MessageRole::User,
20015 author: None,
20016 created_at: Some(1_700_000_000_000 + idx_seed),
20017 content: format!("message-{idx_seed}"),
20018 extra_json: serde_json::Value::Null,
20019 snippets: Vec::new(),
20020 }],
20021 source_id: LOCAL_SOURCE_ID.into(),
20022 origin_host: None,
20023 }
20024 }
20025
20026 let dir = TempDir::new().unwrap();
20027 let canonical_db = dir.path().join("agent_search.db");
20028 let backup_db = dir
20029 .path()
20030 .join("backups/agent_search.db.20260322T020200.bak");
20031 let storage = SqliteStorage::open(&canonical_db).unwrap();
20032 let conv_a = make_conv("/tmp/one.jsonl", 1);
20033 let conv_b = make_conv("/tmp/two.jsonl", 2);
20034 let conv_c = make_conv("/tmp/three.jsonl", 3);
20035 seed_historical_db_direct(
20036 &backup_db,
20037 &[conv_a.clone(), conv_b.clone(), conv_c.clone()],
20038 );
20039
20040 let agent = Agent {
20041 id: None,
20042 slug: "codex".into(),
20043 name: "Codex".into(),
20044 version: Some("0.2.3".into()),
20045 kind: AgentKind::Cli,
20046 };
20047 let agent_id = storage.ensure_agent(&agent).unwrap();
20048 storage
20049 .insert_conversation_tree(agent_id, None, &conv_a)
20050 .unwrap();
20051
20052 let bundle = discover_historical_database_bundles(&canonical_db)
20053 .into_iter()
20054 .find(|bundle| bundle.root_path == backup_db)
20055 .unwrap();
20056 let first_row_id: i64 = FrankenConnection::open(backup_db.to_string_lossy().into_owned())
20057 .unwrap()
20058 .query_row_map(
20059 "SELECT id FROM conversations WHERE source_path = ?1",
20060 fparams!["/tmp/one.jsonl"],
20061 |row| row.get_typed(0),
20062 )
20063 .unwrap();
20064 storage
20065 .record_historical_bundle_progress(&bundle, "direct-readonly", first_row_id, 50, 99)
20066 .unwrap();
20067
20068 let outcome = storage.salvage_historical_databases(&canonical_db).unwrap();
20069 assert_eq!(outcome.bundles_imported, 1);
20070 assert_eq!(outcome.conversations_imported, 52);
20071 assert_eq!(outcome.messages_imported, 101);
20072 assert_eq!(storage.list_conversations(10, 0).unwrap().len(), 3);
20073
20074 let progress_key = SqliteStorage::historical_bundle_progress_key(&bundle);
20075 let progress_left: Option<String> = storage
20076 .conn
20077 .query_row_map(
20078 "SELECT value FROM meta WHERE key = ?1",
20079 fparams![progress_key.as_str()],
20080 |row| row.get_typed(0),
20081 )
20082 .optional()
20083 .unwrap();
20084 assert!(
20085 progress_left.is_none(),
20086 "completed salvage should clear bundle progress"
20087 );
20088
20089 let second = storage.salvage_historical_databases(&canonical_db).unwrap();
20090 assert_eq!(second.bundles_imported, 0);
20091 assert_eq!(second.messages_imported, 0);
20092 }
20093
20094 #[test]
20095 fn salvage_historical_databases_skips_bundle_when_checkpoint_covers_backup() {
20096 use crate::model::types::{Conversation, Message, MessageRole};
20102 use std::path::PathBuf;
20103
20104 fn make_conv(source_path: &str, idx_seed: i64) -> Conversation {
20105 Conversation {
20106 id: None,
20107 agent_slug: "codex".into(),
20108 workspace: Some(PathBuf::from("/tmp/workspace")),
20109 external_id: Some(format!("conv-{idx_seed}")),
20110 title: Some(format!("Recovered {idx_seed}")),
20111 source_path: PathBuf::from(source_path),
20112 started_at: Some(1_700_000_000_000 + idx_seed),
20113 ended_at: Some(1_700_000_000_100 + idx_seed),
20114 approx_tokens: None,
20115 metadata_json: serde_json::Value::Null,
20116 messages: vec![Message {
20117 id: None,
20118 idx: 0,
20119 role: MessageRole::User,
20120 author: None,
20121 created_at: Some(1_700_000_000_000 + idx_seed),
20122 content: format!("message-{idx_seed}"),
20123 extra_json: serde_json::Value::Null,
20124 snippets: Vec::new(),
20125 }],
20126 source_id: LOCAL_SOURCE_ID.into(),
20127 origin_host: None,
20128 }
20129 }
20130
20131 let dir = TempDir::new().unwrap();
20132 let canonical_db = dir.path().join("agent_search.db");
20133 let backup_db = dir
20134 .path()
20135 .join("backups/agent_search.db.20260322T020200.bak");
20136 let storage = SqliteStorage::open(&canonical_db).unwrap();
20137 seed_historical_db_direct(
20138 &backup_db,
20139 &[
20140 make_conv("/tmp/one.jsonl", 1),
20141 make_conv("/tmp/two.jsonl", 2),
20142 make_conv("/tmp/three.jsonl", 3),
20143 ],
20144 );
20145
20146 let bundle = discover_historical_database_bundles(&canonical_db)
20147 .into_iter()
20148 .find(|bundle| bundle.root_path == backup_db)
20149 .unwrap();
20150
20151 let backup_max_id: i64 = FrankenConnection::open(backup_db.to_string_lossy().into_owned())
20153 .unwrap()
20154 .query_row_map(
20155 "SELECT COALESCE(MAX(id), 0) FROM conversations",
20156 fparams![],
20157 |row| row.get_typed(0),
20158 )
20159 .unwrap();
20160 assert!(backup_max_id > 0, "seeded backup should have conversations");
20161 storage
20162 .record_historical_bundle_progress(&bundle, "direct-readonly", backup_max_id, 3, 3)
20163 .unwrap();
20164
20165 let outcome = storage.salvage_historical_databases(&canonical_db).unwrap();
20166 assert_eq!(
20167 outcome.bundles_imported, 0,
20168 "fully-checkpointed bundle must not be re-scanned"
20169 );
20170 assert_eq!(outcome.conversations_imported, 0);
20171 assert_eq!(outcome.messages_imported, 0);
20172 assert_eq!(
20173 storage.list_conversations(10, 0).unwrap().len(),
20174 0,
20175 "skip path must not import anything"
20176 );
20177 assert!(
20178 storage.historical_bundle_already_imported(&bundle).unwrap(),
20179 "skipped bundle must be ledgered as salvaged so future runs short-circuit"
20180 );
20181
20182 let progress_key = SqliteStorage::historical_bundle_progress_key(&bundle);
20183 let progress_left: Option<String> = storage
20184 .conn
20185 .query_row_map(
20186 "SELECT value FROM meta WHERE key = ?1",
20187 fparams![progress_key.as_str()],
20188 |row| row.get_typed(0),
20189 )
20190 .optional()
20191 .unwrap();
20192 assert!(
20193 progress_left.is_none(),
20194 "skip path must clear the bundle progress checkpoint"
20195 );
20196 }
20197
20198 #[test]
20199 fn list_conversations_for_lexical_rebuild_uses_stable_id_order() {
20200 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20201 use std::path::PathBuf;
20202
20203 let dir = TempDir::new().unwrap();
20204 let db_path = dir.path().join("agent_search.db");
20205 let storage = SqliteStorage::open(&db_path).unwrap();
20206 let agent = Agent {
20207 id: None,
20208 slug: "codex".into(),
20209 name: "Codex".into(),
20210 version: Some("0.2.3".into()),
20211 kind: AgentKind::Cli,
20212 };
20213 let agent_id = storage.ensure_agent(&agent).unwrap();
20214
20215 let make_conv = |source_path: &str, started_at: i64| Conversation {
20216 id: None,
20217 agent_slug: "codex".into(),
20218 workspace: Some(PathBuf::from("/tmp/workspace")),
20219 external_id: Some(source_path.to_string()),
20220 title: Some(source_path.to_string()),
20221 source_path: PathBuf::from(source_path),
20222 started_at: Some(started_at),
20223 ended_at: Some(started_at + 1),
20224 approx_tokens: None,
20225 metadata_json: serde_json::Value::Null,
20226 messages: vec![Message {
20227 id: None,
20228 idx: 0,
20229 role: MessageRole::User,
20230 author: None,
20231 created_at: Some(started_at),
20232 content: format!("message for {source_path}"),
20233 extra_json: serde_json::Value::Null,
20234 snippets: Vec::new(),
20235 }],
20236 source_id: LOCAL_SOURCE_ID.into(),
20237 origin_host: None,
20238 };
20239
20240 let conv_a = make_conv("/tmp/a.jsonl", 3_000);
20241 let conv_b = make_conv("/tmp/b.jsonl", 1_000);
20242 let conv_c = make_conv("/tmp/c.jsonl", 2_000);
20243
20244 storage
20245 .insert_conversation_tree(agent_id, None, &conv_a)
20246 .unwrap();
20247 storage
20248 .insert_conversation_tree(agent_id, None, &conv_b)
20249 .unwrap();
20250 storage
20251 .insert_conversation_tree(agent_id, None, &conv_c)
20252 .unwrap();
20253
20254 let user_order: Vec<PathBuf> = storage
20255 .list_conversations(10, 0)
20256 .unwrap()
20257 .into_iter()
20258 .map(|conv| conv.source_path)
20259 .collect();
20260 assert_eq!(
20261 user_order,
20262 vec![
20263 PathBuf::from("/tmp/a.jsonl"),
20264 PathBuf::from("/tmp/c.jsonl"),
20265 PathBuf::from("/tmp/b.jsonl"),
20266 ]
20267 );
20268
20269 let (agent_slugs, workspace_paths) = storage.build_lexical_rebuild_lookups().unwrap();
20270 let rebuild_order: Vec<PathBuf> = storage
20271 .list_conversations_for_lexical_rebuild_after_id(10, 0, &agent_slugs, &workspace_paths)
20272 .unwrap()
20273 .into_iter()
20274 .map(|conv| conv.source_path)
20275 .collect();
20276 assert_eq!(
20277 rebuild_order,
20278 vec![
20279 PathBuf::from("/tmp/a.jsonl"),
20280 PathBuf::from("/tmp/b.jsonl"),
20281 PathBuf::from("/tmp/c.jsonl"),
20282 ]
20283 );
20284
20285 let first_page = storage
20286 .list_conversations_for_lexical_rebuild_after_id(2, 0, &agent_slugs, &workspace_paths)
20287 .unwrap();
20288 let first_page_paths: Vec<PathBuf> = first_page
20289 .iter()
20290 .map(|conv| conv.source_path.clone())
20291 .collect();
20292 assert_eq!(
20293 first_page_paths,
20294 vec![PathBuf::from("/tmp/a.jsonl"), PathBuf::from("/tmp/b.jsonl")]
20295 );
20296
20297 let second_page = storage
20298 .list_conversations_for_lexical_rebuild_after_id(
20299 2,
20300 first_page
20301 .last()
20302 .and_then(|conv| conv.id)
20303 .expect("first page should include an id"),
20304 &agent_slugs,
20305 &workspace_paths,
20306 )
20307 .unwrap();
20308 let second_page_paths: Vec<PathBuf> = second_page
20309 .iter()
20310 .map(|conv| conv.source_path.clone())
20311 .collect();
20312 assert_eq!(second_page_paths, vec![PathBuf::from("/tmp/c.jsonl")]);
20313
20314 let bounded_page = storage
20315 .list_conversations_for_lexical_rebuild_after_id_through_id(
20316 10,
20317 0,
20318 first_page
20319 .last()
20320 .and_then(|conv| conv.id)
20321 .expect("first page should include an id"),
20322 &agent_slugs,
20323 &workspace_paths,
20324 )
20325 .unwrap();
20326 let bounded_paths: Vec<PathBuf> = bounded_page
20327 .iter()
20328 .map(|conv| conv.source_path.clone())
20329 .collect();
20330 assert_eq!(
20331 bounded_paths,
20332 vec![PathBuf::from("/tmp/a.jsonl"), PathBuf::from("/tmp/b.jsonl")]
20333 );
20334 }
20335
20336 #[test]
20337 fn keyset_traversal_handles_sparse_holey_conversation_ids() {
20338 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20339 use std::path::PathBuf;
20340
20341 let dir = TempDir::new().unwrap();
20342 let db_path = dir.path().join("agent_search.db");
20343 let storage = SqliteStorage::open(&db_path).unwrap();
20344 let agent = Agent {
20345 id: None,
20346 slug: "codex".into(),
20347 name: "Codex".into(),
20348 version: Some("0.2.3".into()),
20349 kind: AgentKind::Cli,
20350 };
20351 let agent_id = storage.ensure_agent(&agent).unwrap();
20352
20353 let make_conv = |label: &str, ts: i64| Conversation {
20354 id: None,
20355 agent_slug: "codex".into(),
20356 workspace: Some(PathBuf::from("/tmp/workspace")),
20357 external_id: Some(label.to_string()),
20358 title: Some(label.to_string()),
20359 source_path: PathBuf::from(format!("/tmp/{label}.jsonl")),
20360 started_at: Some(ts),
20361 ended_at: Some(ts + 1),
20362 approx_tokens: None,
20363 metadata_json: serde_json::Value::Null,
20364 messages: vec![Message {
20365 id: None,
20366 idx: 0,
20367 role: MessageRole::User,
20368 author: None,
20369 created_at: Some(ts),
20370 content: format!("msg for {label}"),
20371 extra_json: serde_json::Value::Null,
20372 snippets: Vec::new(),
20373 }],
20374 source_id: LOCAL_SOURCE_ID.into(),
20375 origin_host: None,
20376 };
20377
20378 for i in 0..6 {
20379 storage
20380 .insert_conversation_tree(
20381 agent_id,
20382 None,
20383 &make_conv(&format!("conv-{i}"), 1000 + i),
20384 )
20385 .unwrap();
20386 }
20387
20388 storage.conn.execute("PRAGMA foreign_keys = OFF").unwrap();
20389 storage
20390 .conn
20391 .execute_compat("DELETE FROM conversations WHERE id IN (2, 4)", fparams![])
20392 .unwrap();
20393 storage
20394 .conn
20395 .execute_compat(
20396 "DELETE FROM messages WHERE conversation_id IN (2, 4)",
20397 fparams![],
20398 )
20399 .unwrap();
20400 storage.conn.execute("PRAGMA foreign_keys = ON").unwrap();
20401
20402 let (agent_slugs, workspace_paths) = storage.build_lexical_rebuild_lookups().unwrap();
20403
20404 let page1 = storage
20405 .list_conversations_for_lexical_rebuild_after_id(2, 0, &agent_slugs, &workspace_paths)
20406 .unwrap();
20407 assert_eq!(page1.len(), 2);
20408 let page1_ids: Vec<i64> = page1.iter().map(|c| c.id.unwrap()).collect();
20409 assert_eq!(page1_ids, vec![1, 3]);
20410
20411 let page2 = storage
20412 .list_conversations_for_lexical_rebuild_after_id(
20413 2,
20414 *page1_ids.last().unwrap(),
20415 &agent_slugs,
20416 &workspace_paths,
20417 )
20418 .unwrap();
20419 assert_eq!(page2.len(), 2);
20420 let page2_ids: Vec<i64> = page2.iter().map(|c| c.id.unwrap()).collect();
20421 assert_eq!(page2_ids, vec![5, 6]);
20422
20423 let page3 = storage
20424 .list_conversations_for_lexical_rebuild_after_id(
20425 2,
20426 *page2_ids.last().unwrap(),
20427 &agent_slugs,
20428 &workspace_paths,
20429 )
20430 .unwrap();
20431 assert!(page3.is_empty());
20432
20433 let all_ids: Vec<i64> = page1_ids.iter().chain(page2_ids.iter()).copied().collect();
20434 assert_eq!(all_ids, vec![1, 3, 5, 6]);
20435 }
20436
20437 #[test]
20438 fn keyset_traversal_through_id_with_sparse_ranges() {
20439 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20440 use std::path::PathBuf;
20441
20442 let dir = TempDir::new().unwrap();
20443 let db_path = dir.path().join("agent_search.db");
20444 let storage = SqliteStorage::open(&db_path).unwrap();
20445 let agent = Agent {
20446 id: None,
20447 slug: "codex".into(),
20448 name: "Codex".into(),
20449 version: Some("0.2.3".into()),
20450 kind: AgentKind::Cli,
20451 };
20452 let agent_id = storage.ensure_agent(&agent).unwrap();
20453
20454 let make_conv = |label: &str, ts: i64| Conversation {
20455 id: None,
20456 agent_slug: "codex".into(),
20457 workspace: Some(PathBuf::from("/tmp/workspace")),
20458 external_id: Some(label.to_string()),
20459 title: Some(label.to_string()),
20460 source_path: PathBuf::from(format!("/tmp/{label}.jsonl")),
20461 started_at: Some(ts),
20462 ended_at: Some(ts + 1),
20463 approx_tokens: None,
20464 metadata_json: serde_json::Value::Null,
20465 messages: vec![Message {
20466 id: None,
20467 idx: 0,
20468 role: MessageRole::User,
20469 author: None,
20470 created_at: Some(ts),
20471 content: format!("msg for {label}"),
20472 extra_json: serde_json::Value::Null,
20473 snippets: Vec::new(),
20474 }],
20475 source_id: LOCAL_SOURCE_ID.into(),
20476 origin_host: None,
20477 };
20478
20479 for i in 0..10 {
20480 storage
20481 .insert_conversation_tree(
20482 agent_id,
20483 None,
20484 &make_conv(&format!("conv-{i}"), 1000 + i),
20485 )
20486 .unwrap();
20487 }
20488
20489 storage.conn.execute("PRAGMA foreign_keys = OFF").unwrap();
20490 storage
20491 .conn
20492 .execute_compat(
20493 "DELETE FROM conversations WHERE id IN (3, 5, 7, 8)",
20494 fparams![],
20495 )
20496 .unwrap();
20497 storage
20498 .conn
20499 .execute_compat(
20500 "DELETE FROM messages WHERE conversation_id IN (3, 5, 7, 8)",
20501 fparams![],
20502 )
20503 .unwrap();
20504 storage.conn.execute("PRAGMA foreign_keys = ON").unwrap();
20505
20506 let (agent_slugs, workspace_paths) = storage.build_lexical_rebuild_lookups().unwrap();
20507
20508 let through_5 = storage
20509 .list_conversations_for_lexical_rebuild_after_id_through_id(
20510 100,
20511 0,
20512 5,
20513 &agent_slugs,
20514 &workspace_paths,
20515 )
20516 .unwrap();
20517 let through_5_ids: Vec<i64> = through_5.iter().map(|c| c.id.unwrap()).collect();
20518 assert_eq!(through_5_ids, vec![1, 2, 4]);
20519
20520 let after_4_through_10 = storage
20521 .list_conversations_for_lexical_rebuild_after_id_through_id(
20522 100,
20523 4,
20524 10,
20525 &agent_slugs,
20526 &workspace_paths,
20527 )
20528 .unwrap();
20529 let ids: Vec<i64> = after_4_through_10.iter().map(|c| c.id.unwrap()).collect();
20530 assert_eq!(ids, vec![6, 9, 10]);
20531
20532 let after_10 = storage
20533 .list_conversations_for_lexical_rebuild_after_id_through_id(
20534 100,
20535 10,
20536 20,
20537 &agent_slugs,
20538 &workspace_paths,
20539 )
20540 .unwrap();
20541 assert!(after_10.is_empty());
20542 }
20543
20544 #[test]
20545 fn list_conversation_footprints_for_lexical_rebuild_estimates_bytes_and_keeps_empty_conversations()
20546 {
20547 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20548 use std::path::PathBuf;
20549
20550 let dir = TempDir::new().unwrap();
20551 let db_path = dir.path().join("agent_search.db");
20552 let storage = SqliteStorage::open(&db_path).unwrap();
20553 let agent = Agent {
20554 id: None,
20555 slug: "codex".into(),
20556 name: "Codex".into(),
20557 version: Some("0.2.3".into()),
20558 kind: AgentKind::Cli,
20559 };
20560 let agent_id = storage.ensure_agent(&agent).unwrap();
20561
20562 let insert = |external_id: &str, base_ts: i64, messages: Vec<Message>| {
20563 storage
20564 .insert_conversation_tree(
20565 agent_id,
20566 None,
20567 &Conversation {
20568 id: None,
20569 agent_slug: "codex".into(),
20570 workspace: Some(PathBuf::from("/tmp/workspace")),
20571 external_id: Some(external_id.to_string()),
20572 title: Some(external_id.to_string()),
20573 source_path: PathBuf::from(format!("/tmp/{external_id}.jsonl")),
20574 started_at: Some(base_ts),
20575 ended_at: Some(base_ts + 100),
20576 approx_tokens: None,
20577 metadata_json: serde_json::Value::Null,
20578 messages,
20579 source_id: LOCAL_SOURCE_ID.into(),
20580 origin_host: None,
20581 },
20582 )
20583 .unwrap()
20584 .conversation_id
20585 };
20586
20587 let ascii_id = insert(
20588 "footprint-ascii",
20589 1_700_000_000_000,
20590 vec![
20591 Message {
20592 id: None,
20593 idx: 0,
20594 role: MessageRole::User,
20595 author: None,
20596 created_at: Some(1_700_000_000_001),
20597 content: "abc".into(),
20598 extra_json: serde_json::Value::Null,
20599 snippets: Vec::new(),
20600 },
20601 Message {
20602 id: None,
20603 idx: 1,
20604 role: MessageRole::Agent,
20605 author: None,
20606 created_at: Some(1_700_000_000_002),
20607 content: "defg".into(),
20608 extra_json: serde_json::Value::Null,
20609 snippets: Vec::new(),
20610 },
20611 ],
20612 );
20613 let empty_id = insert("footprint-empty", 1_700_000_001_000, Vec::new());
20614 let utf8_id = insert(
20615 "footprint-utf8",
20616 1_700_000_002_000,
20617 vec![Message {
20618 id: None,
20619 idx: 0,
20620 role: MessageRole::Tool,
20621 author: None,
20622 created_at: Some(1_700_000_002_001),
20623 content: "hé🙂".into(),
20624 extra_json: serde_json::Value::Null,
20625 snippets: Vec::new(),
20626 }],
20627 );
20628 let sparse_id = insert(
20629 "footprint-sparse",
20630 1_700_000_003_000,
20631 vec![Message {
20632 id: None,
20633 idx: 10,
20634 role: MessageRole::User,
20635 author: None,
20636 created_at: Some(1_700_000_003_010),
20637 content: "sparse".into(),
20638 extra_json: serde_json::Value::Null,
20639 snippets: Vec::new(),
20640 }],
20641 );
20642 storage
20643 .conn
20644 .execute_compat(
20645 "DELETE FROM conversation_tail_state WHERE conversation_id = ?1",
20646 fparams![utf8_id],
20647 )
20648 .unwrap();
20649
20650 let footprints = storage
20651 .list_conversation_footprints_for_lexical_rebuild()
20652 .unwrap();
20653 assert_eq!(
20654 footprints,
20655 vec![
20656 LexicalRebuildConversationFootprintRow {
20657 conversation_id: ascii_id,
20658 message_count: 2,
20659 message_bytes: 2 * LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
20660 },
20661 LexicalRebuildConversationFootprintRow {
20662 conversation_id: empty_id,
20663 message_count: 0,
20664 message_bytes: 0,
20665 },
20666 LexicalRebuildConversationFootprintRow {
20667 conversation_id: utf8_id,
20668 message_count: 1,
20669 message_bytes: LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
20670 },
20671 LexicalRebuildConversationFootprintRow {
20672 conversation_id: sparse_id,
20673 message_count: 11,
20674 message_bytes: 11 * LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
20675 },
20676 ]
20677 );
20678 }
20679
20680 #[test]
20681 fn list_conversation_footprints_for_lexical_rebuild_falls_back_for_missing_tail_cache() {
20682 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20683 use std::path::PathBuf;
20684
20685 let dir = TempDir::new().unwrap();
20686 let db_path = dir.path().join("agent_search.db");
20687 let storage = SqliteStorage::open(&db_path).unwrap();
20688 let agent = Agent {
20689 id: None,
20690 slug: "codex".into(),
20691 name: "Codex".into(),
20692 version: Some("0.2.3".into()),
20693 kind: AgentKind::Cli,
20694 };
20695 let agent_id = storage.ensure_agent(&agent).unwrap();
20696 let conversation_id = storage
20697 .insert_conversation_tree(
20698 agent_id,
20699 None,
20700 &Conversation {
20701 id: None,
20702 agent_slug: "codex".into(),
20703 workspace: Some(PathBuf::from("/tmp/workspace")),
20704 external_id: Some("footprint-missing-tail".to_string()),
20705 title: Some("footprint-missing-tail".to_string()),
20706 source_path: PathBuf::from("/tmp/footprint-missing-tail.jsonl"),
20707 started_at: Some(1_700_000_000_000),
20708 ended_at: Some(1_700_000_000_100),
20709 approx_tokens: None,
20710 metadata_json: serde_json::Value::Null,
20711 messages: vec![Message {
20712 id: None,
20713 idx: 10,
20714 role: MessageRole::User,
20715 author: None,
20716 created_at: Some(1_700_000_000_010),
20717 content: "legacy sparse tail".into(),
20718 extra_json: serde_json::Value::Null,
20719 snippets: Vec::new(),
20720 }],
20721 source_id: LOCAL_SOURCE_ID.into(),
20722 origin_host: None,
20723 },
20724 )
20725 .unwrap()
20726 .conversation_id;
20727
20728 storage
20729 .conn
20730 .execute_compat(
20731 "UPDATE conversations
20732 SET last_message_idx = NULL, last_message_created_at = NULL
20733 WHERE id = ?1",
20734 fparams![conversation_id],
20735 )
20736 .unwrap();
20737 storage
20738 .conn
20739 .execute_compat(
20740 "DELETE FROM conversation_tail_state WHERE conversation_id = ?1",
20741 fparams![conversation_id],
20742 )
20743 .unwrap();
20744
20745 let footprints = storage
20746 .list_conversation_footprints_for_lexical_rebuild()
20747 .unwrap();
20748
20749 assert_eq!(
20750 footprints,
20751 vec![LexicalRebuildConversationFootprintRow {
20752 conversation_id,
20753 message_count: 11,
20754 message_bytes: 11 * LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
20755 }],
20756 "missing tail-cache metadata should fall back to messages MAX(idx) instead of treating legacy conversations as empty"
20757 );
20758 }
20759
20760 #[test]
20761 fn list_conversation_footprints_for_lexical_rebuild_raises_stale_low_tail_cache() {
20762 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20763 use std::path::PathBuf;
20764
20765 let dir = TempDir::new().unwrap();
20766 let db_path = dir.path().join("agent_search.db");
20767 let storage = SqliteStorage::open(&db_path).unwrap();
20768 let agent = Agent {
20769 id: None,
20770 slug: "codex".into(),
20771 name: "Codex".into(),
20772 version: Some("0.2.3".into()),
20773 kind: AgentKind::Cli,
20774 };
20775 let agent_id = storage.ensure_agent(&agent).unwrap();
20776 let conversation_id = storage
20777 .insert_conversation_tree(
20778 agent_id,
20779 None,
20780 &Conversation {
20781 id: None,
20782 agent_slug: "codex".into(),
20783 workspace: Some(PathBuf::from("/tmp/workspace")),
20784 external_id: Some("footprint-stale-tail".to_string()),
20785 title: Some("footprint-stale-tail".to_string()),
20786 source_path: PathBuf::from("/tmp/footprint-stale-tail.jsonl"),
20787 started_at: Some(1_700_000_000_000),
20788 ended_at: Some(1_700_000_000_100),
20789 approx_tokens: None,
20790 metadata_json: serde_json::Value::Null,
20791 messages: (0..3)
20792 .map(|idx| Message {
20793 id: None,
20794 idx,
20795 role: MessageRole::User,
20796 author: None,
20797 created_at: Some(1_700_000_000_010 + idx),
20798 content: format!("message {idx}"),
20799 extra_json: serde_json::Value::Null,
20800 snippets: Vec::new(),
20801 })
20802 .collect(),
20803 source_id: LOCAL_SOURCE_ID.into(),
20804 origin_host: None,
20805 },
20806 )
20807 .unwrap()
20808 .conversation_id;
20809
20810 storage
20811 .conn
20812 .execute_compat(
20813 "UPDATE conversations
20814 SET last_message_idx = 0, last_message_created_at = 1700000000010
20815 WHERE id = ?1",
20816 fparams![conversation_id],
20817 )
20818 .unwrap();
20819 storage
20820 .conn
20821 .execute_compat(
20822 "UPDATE conversation_tail_state
20823 SET last_message_idx = 0, last_message_created_at = 1700000000010
20824 WHERE conversation_id = ?1",
20825 fparams![conversation_id],
20826 )
20827 .unwrap();
20828
20829 let footprints = storage
20830 .list_conversation_footprints_for_lexical_rebuild()
20831 .unwrap();
20832
20833 assert_eq!(
20834 footprints,
20835 vec![LexicalRebuildConversationFootprintRow {
20836 conversation_id,
20837 message_count: 3,
20838 message_bytes: 3 * LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
20839 }],
20840 "stale-low tail caches must not under-plan lexical shards and trip doc>plan invariants"
20841 );
20842 }
20843
20844 #[test]
20845 fn list_conversation_footprints_for_lexical_rebuild_tolerates_missing_tail_state_table() {
20846 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20847 use std::path::PathBuf;
20848
20849 let dir = TempDir::new().unwrap();
20850 let db_path = dir.path().join("agent_search.db");
20851 let storage = SqliteStorage::open(&db_path).unwrap();
20852 let agent = Agent {
20853 id: None,
20854 slug: "codex".into(),
20855 name: "Codex".into(),
20856 version: Some("0.2.3".into()),
20857 kind: AgentKind::Cli,
20858 };
20859 let agent_id = storage.ensure_agent(&agent).unwrap();
20860 let conversation_id = storage
20861 .insert_conversation_tree(
20862 agent_id,
20863 None,
20864 &Conversation {
20865 id: None,
20866 agent_slug: "codex".into(),
20867 workspace: Some(PathBuf::from("/tmp/workspace")),
20868 external_id: Some("footprint-missing-tail-table".to_string()),
20869 title: Some("footprint-missing-tail-table".to_string()),
20870 source_path: PathBuf::from("/tmp/footprint-missing-tail-table.jsonl"),
20871 started_at: Some(1_700_000_000_000),
20872 ended_at: Some(1_700_000_000_100),
20873 approx_tokens: None,
20874 metadata_json: serde_json::Value::Null,
20875 messages: vec![Message {
20876 id: None,
20877 idx: 10,
20878 role: MessageRole::User,
20879 author: None,
20880 created_at: Some(1_700_000_000_010),
20881 content: "legacy sparse tail without hot table".into(),
20882 extra_json: serde_json::Value::Null,
20883 snippets: Vec::new(),
20884 }],
20885 source_id: LOCAL_SOURCE_ID.into(),
20886 origin_host: None,
20887 },
20888 )
20889 .unwrap()
20890 .conversation_id;
20891
20892 storage
20893 .conn
20894 .execute_compat(
20895 "UPDATE conversations
20896 SET last_message_idx = NULL, last_message_created_at = NULL
20897 WHERE id = ?1",
20898 fparams![conversation_id],
20899 )
20900 .unwrap();
20901 storage
20902 .conn
20903 .execute_compat("DROP TABLE conversation_tail_state", fparams![])
20904 .unwrap();
20905
20906 let footprints = storage
20907 .list_conversation_footprints_for_lexical_rebuild()
20908 .unwrap();
20909
20910 assert_eq!(
20911 footprints,
20912 vec![LexicalRebuildConversationFootprintRow {
20913 conversation_id,
20914 message_count: 11,
20915 message_bytes: 11 * LEXICAL_REBUILD_PLANNER_ESTIMATED_BYTES_PER_MESSAGE,
20916 }],
20917 "read-only lexical self-heal must tolerate pre-tail-cache databases and use messages MAX(idx)"
20918 );
20919 }
20920
20921 #[test]
20922 fn list_conversation_footprints_for_lexical_rebuild_tolerates_legacy_search_demo_fixture() {
20923 let fixture_db = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
20924 .join("tests")
20925 .join("fixtures")
20926 .join("search_demo_data")
20927 .join("agent_search.db");
20928 let storage = FrankenStorage::open_readonly(&fixture_db).unwrap();
20929
20930 let footprints = storage
20931 .list_conversation_footprints_for_lexical_rebuild()
20932 .unwrap();
20933
20934 assert!(
20935 !footprints.is_empty(),
20936 "search self-heal should be able to plan a lexical rebuild from the legacy search demo fixture"
20937 );
20938 assert!(
20939 footprints
20940 .iter()
20941 .all(|footprint| footprint.message_count > 0),
20942 "legacy fixture conversations should derive message counts from messages when tail caches are absent"
20943 );
20944 }
20945
20946 #[test]
20947 fn lexical_rebuild_listing_normalizes_host_only_remote_source_from_blank_source_id() {
20948 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
20949 use std::path::PathBuf;
20950
20951 let dir = TempDir::new().unwrap();
20952 let db_path = dir.path().join("agent_search.db");
20953 let storage = SqliteStorage::open(&db_path).unwrap();
20954 let agent = Agent {
20955 id: None,
20956 slug: "codex".into(),
20957 name: "Codex".into(),
20958 version: Some("0.2.3".into()),
20959 kind: AgentKind::Cli,
20960 };
20961 let agent_id = storage.ensure_agent(&agent).unwrap();
20962 let conversation = Conversation {
20963 id: None,
20964 agent_slug: "codex".into(),
20965 workspace: Some(PathBuf::from("/tmp/workspace")),
20966 external_id: Some("legacy-blank-source".into()),
20967 title: Some("Legacy blank source".into()),
20968 source_path: PathBuf::from("/tmp/legacy-blank-source.jsonl"),
20969 started_at: Some(1_700_000_000_000),
20970 ended_at: Some(1_700_000_000_100),
20971 approx_tokens: None,
20972 metadata_json: serde_json::Value::Null,
20973 messages: vec![Message {
20974 id: None,
20975 idx: 0,
20976 role: MessageRole::User,
20977 author: None,
20978 created_at: Some(1_700_000_000_000),
20979 content: "hello".into(),
20980 extra_json: serde_json::Value::Null,
20981 snippets: Vec::new(),
20982 }],
20983 source_id: LOCAL_SOURCE_ID.into(),
20984 origin_host: None,
20985 };
20986
20987 let conversation_id = storage
20988 .insert_conversation_tree(agent_id, None, &conversation)
20989 .unwrap()
20990 .conversation_id;
20991 storage.conn.execute("PRAGMA foreign_keys = OFF").unwrap();
20992 storage
20993 .conn
20994 .execute_compat(
20995 "UPDATE conversations SET source_id = ?1, origin_host = ?2 WHERE id = ?3",
20996 fparams![" ", "dev@laptop", conversation_id],
20997 )
20998 .unwrap();
20999 storage.conn.execute("PRAGMA foreign_keys = ON").unwrap();
21000
21001 let listed = storage.list_conversations(10, 0).unwrap();
21002 assert_eq!(listed.len(), 1);
21003 assert_eq!(listed[0].source_id, "dev@laptop");
21004 assert_eq!(listed[0].origin_host.as_deref(), Some("dev@laptop"));
21005
21006 let (agent_slugs, workspace_paths) = storage.build_lexical_rebuild_lookups().unwrap();
21007 let rebuild_listed = storage
21008 .list_conversations_for_lexical_rebuild_after_id(10, 0, &agent_slugs, &workspace_paths)
21009 .unwrap();
21010 assert_eq!(rebuild_listed.len(), 1);
21011 assert_eq!(rebuild_listed[0].source_id, "dev@laptop");
21012 assert_eq!(rebuild_listed[0].origin_host.as_deref(), Some("dev@laptop"));
21013 }
21014
21015 #[test]
21016 fn seed_canonical_from_best_historical_bundle_copies_data_and_resets_runtime_meta() {
21017 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
21018 use std::path::PathBuf;
21019
21020 let dir = TempDir::new().unwrap();
21021 let canonical_db = dir.path().join("agent_search.db");
21022 let source_db = dir
21023 .path()
21024 .join("backups/agent_search.db.20260322T020200.bak");
21025
21026 fs::create_dir_all(source_db.parent().unwrap()).unwrap();
21027
21028 let source = SqliteStorage::open(&source_db).unwrap();
21029 let agent = Agent {
21030 id: None,
21031 slug: "codex".into(),
21032 name: "Codex".into(),
21033 version: Some("0.2.3".into()),
21034 kind: AgentKind::Cli,
21035 };
21036 let agent_id = source.ensure_agent(&agent).unwrap();
21037 let conversation = Conversation {
21038 id: None,
21039 agent_slug: "codex".into(),
21040 workspace: Some(PathBuf::from("/tmp/workspace")),
21041 external_id: Some("seed-conv".into()),
21042 title: Some("Historical seed".into()),
21043 source_path: PathBuf::from("/tmp/historical-seed.jsonl"),
21044 started_at: Some(1_700_000_000_000),
21045 ended_at: Some(1_700_000_000_100),
21046 approx_tokens: Some(42),
21047 metadata_json: serde_json::json!({"seed": true}),
21048 messages: vec![Message {
21049 id: None,
21050 idx: 0,
21051 role: MessageRole::Agent,
21052 author: Some("assistant".into()),
21053 created_at: Some(1_700_000_000_050),
21054 content: "seeded message".into(),
21055 extra_json: serde_json::json!({"usage": {"total_tokens": 12}}),
21056 snippets: Vec::new(),
21057 }],
21058 source_id: LOCAL_SOURCE_ID.into(),
21059 origin_host: None,
21060 };
21061 source
21062 .insert_conversation_tree(agent_id, None, &conversation)
21063 .unwrap();
21064 source.set_last_scan_ts(123).unwrap();
21065 source.set_last_indexed_at(456).unwrap();
21066 source.set_last_embedded_message_id(789).unwrap();
21067 source
21068 .conn
21069 .execute_compat(
21070 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
21071 fparams!["historical_bundle_salvaged:stale", "{\"stale\":true}"],
21072 )
21073 .unwrap();
21074 drop(source);
21075
21076 #[cfg(not(windows))]
21077 {
21078 let legacy_v13_fts_sql = "CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, content='', tokenize='porter')";
21089 let duplicate_legacy_fts_sql = "CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, message_id UNINDEXED, tokenize='porter')";
21090 let legacy = rusqlite_test_fixture_conn(&source_db);
21091 legacy
21092 .execute_batch(
21093 "UPDATE meta SET value = '13' WHERE key = 'schema_version';
21094 DELETE FROM _schema_migrations WHERE version = 14;
21095 PRAGMA writable_schema = ON;",
21096 )
21097 .unwrap();
21098 legacy
21099 .execute(
21100 "DELETE FROM meta WHERE key = ?1",
21101 [FTS_FRANKEN_REBUILD_META_KEY],
21102 )
21103 .unwrap();
21104 legacy
21106 .execute(
21107 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
21108 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
21109 [legacy_v13_fts_sql],
21110 )
21111 .unwrap();
21112 legacy
21114 .execute(
21115 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
21116 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
21117 [duplicate_legacy_fts_sql],
21118 )
21119 .unwrap();
21120 legacy
21121 .execute_batch("PRAGMA writable_schema = OFF;")
21122 .unwrap();
21123 drop(legacy);
21124
21125 {
21128 let verify = rusqlite_test_fixture_conn(&source_db);
21129 verify
21130 .execute_batch("PRAGMA writable_schema = ON;")
21131 .unwrap();
21132 let fts_entries: i64 = verify
21133 .query_row(
21134 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
21135 [],
21136 |row| row.get(0),
21137 )
21138 .unwrap();
21139 assert_eq!(
21140 fts_entries, 2,
21141 "test fixture should reproduce the duplicate legacy fts_messages rows"
21142 );
21143 let msg_count: i64 = verify
21144 .query_row("SELECT COUNT(*) FROM messages", [], |row| row.get(0))
21145 .unwrap();
21146 assert_eq!(msg_count, 1);
21147 }
21148 }
21149
21150 let fresh = SqliteStorage::open(&canonical_db).unwrap();
21151 drop(fresh);
21152
21153 let outcome = seed_canonical_from_best_historical_bundle(&canonical_db)
21154 .unwrap()
21155 .unwrap();
21156 assert_eq!(outcome.bundles_imported, 1);
21157 assert_eq!(outcome.conversations_imported, 1);
21158 assert_eq!(outcome.messages_imported, 1);
21159
21160 let readonly = open_franken_with_flags(
21161 &canonical_db.to_string_lossy(),
21162 FrankenOpenFlags::SQLITE_OPEN_READ_ONLY,
21163 )
21164 .unwrap();
21165 let readonly_message_count: i64 = readonly
21166 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
21167 row.get_typed(0)
21168 })
21169 .unwrap();
21170 assert_eq!(readonly_message_count, 1);
21171
21172 let seeded = SqliteStorage::open(&canonical_db).unwrap();
21173 assert_eq!(
21174 seeded
21175 .count_sessions_in_range(None, None, None, None)
21176 .unwrap()
21177 .0,
21178 1
21179 );
21180 let message_count: i64 = seeded
21181 .conn
21182 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
21183 row.get_typed(0)
21184 })
21185 .unwrap();
21186 assert_eq!(message_count, 1);
21187 assert_eq!(seeded.get_last_scan_ts().unwrap(), None);
21188 assert_eq!(seeded.get_last_embedded_message_id().unwrap(), None);
21189
21190 let last_indexed: Option<String> = seeded
21191 .conn
21192 .query_row_map(
21193 "SELECT value FROM meta WHERE key = 'last_indexed_at'",
21194 fparams![],
21195 |row| row.get_typed(0),
21196 )
21197 .optional()
21198 .unwrap();
21199 assert!(last_indexed.is_none());
21200
21201 let salvage_keys: Vec<String> = seeded
21202 .conn
21203 .query_map_collect(
21204 "SELECT key FROM meta WHERE key LIKE 'historical_bundle_salvaged:%' ORDER BY key",
21205 fparams![],
21206 |row| row.get_typed(0),
21207 )
21208 .unwrap();
21209 assert_eq!(salvage_keys.len(), 1);
21210
21211 let reopened_readonly = open_franken_with_flags(
21212 &canonical_db.to_string_lossy(),
21213 FrankenOpenFlags::SQLITE_OPEN_READ_ONLY,
21214 )
21215 .unwrap();
21216 let reopened_fts_entries: i64 = reopened_readonly
21217 .query_row_map(
21218 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
21219 fparams![],
21220 |row| row.get_typed(0),
21221 )
21222 .unwrap();
21223 assert_eq!(
21224 reopened_fts_entries, 1,
21225 "seeded canonical db should keep a single stock-SQLite fts_messages schema row"
21226 );
21227 let reopened_message_count: i64 = reopened_readonly
21228 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
21229 row.get_typed(0)
21230 })
21231 .unwrap();
21232 assert_eq!(reopened_message_count, 1);
21233
21234 let franken_seeded = FrankenStorage::open(&canonical_db).unwrap();
21235 assert_eq!(
21236 franken_seeded.schema_version().unwrap(),
21237 CURRENT_SCHEMA_VERSION
21238 );
21239 franken_seeded
21246 .ensure_search_fallback_fts_consistency()
21247 .expect("ensure FTS consistency after seed");
21248 let post_franken_schema_rows: i64 = franken_seeded
21249 .raw()
21250 .query_row_map(
21251 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
21252 fparams![],
21253 |row| row.get_typed(0),
21254 )
21255 .unwrap();
21256 assert_eq!(post_franken_schema_rows, 1);
21257 let fts_probe = franken_seeded
21258 .raw()
21259 .query("SELECT COUNT(*) FROM fts_messages");
21260 assert!(
21261 fts_probe.is_ok(),
21262 "expected post-seed FTS to be queryable, got {fts_probe:?}"
21263 );
21264 }
21265
21266 #[test]
21267 fn failed_baseline_seed_preserves_existing_canonical_bundle() {
21268 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
21269 use std::path::PathBuf;
21270
21271 let dir = TempDir::new().unwrap();
21272 let canonical_db = dir.path().join("agent_search.db");
21273 let source_db = dir
21274 .path()
21275 .join("backups/agent_search.db.20260325T120000Z.bad-seed.bak");
21276
21277 fs::create_dir_all(source_db.parent().unwrap()).unwrap();
21278
21279 let canonical = SqliteStorage::open(&canonical_db).unwrap();
21280 canonical
21281 .conn
21282 .execute_compat(
21283 "INSERT OR REPLACE INTO meta(key, value) VALUES(?1, ?2)",
21284 fparams!["sentinel", "keep-me"],
21285 )
21286 .unwrap();
21287 drop(canonical);
21288
21289 let source = SqliteStorage::open(&source_db).unwrap();
21290 let agent = Agent {
21291 id: None,
21292 slug: "codex".into(),
21293 name: "Codex".into(),
21294 version: Some("0.2.3".into()),
21295 kind: AgentKind::Cli,
21296 };
21297 let agent_id = source.ensure_agent(&agent).unwrap();
21298 let conversation = Conversation {
21299 id: None,
21300 agent_slug: "codex".into(),
21301 workspace: Some(PathBuf::from("/tmp/workspace")),
21302 external_id: Some("bad-seed-conv".into()),
21303 title: Some("Bad seed".into()),
21304 source_path: PathBuf::from("/tmp/bad-seed.jsonl"),
21305 started_at: Some(1_700_000_000_000),
21306 ended_at: Some(1_700_000_000_100),
21307 approx_tokens: Some(42),
21308 metadata_json: serde_json::json!({"seed": "bad"}),
21309 messages: vec![Message {
21310 id: None,
21311 idx: 0,
21312 role: MessageRole::Agent,
21313 author: Some("assistant".into()),
21314 created_at: Some(1_700_000_000_050),
21315 content: "this seed should fail".into(),
21316 extra_json: serde_json::Value::Null,
21317 snippets: Vec::new(),
21318 }],
21319 source_id: LOCAL_SOURCE_ID.into(),
21320 origin_host: None,
21321 };
21322 source
21323 .insert_conversation_tree(agent_id, None, &conversation)
21324 .unwrap();
21325 drop(source);
21326
21327 let legacy = FrankenConnection::open(source_db.to_string_lossy().into_owned()).unwrap();
21328 legacy
21329 .execute("UPDATE meta SET value = '12' WHERE key = 'schema_version'")
21330 .unwrap();
21331 drop(legacy);
21332
21333 let err = seed_canonical_from_best_historical_bundle(&canonical_db).unwrap_err();
21334 assert!(
21335 err.to_string()
21336 .contains("schema_version 12 is too old for baseline import"),
21337 "unexpected seed error: {err:#}"
21338 );
21339
21340 let reopened = SqliteStorage::open(&canonical_db).unwrap();
21341 let sentinel: Option<String> = reopened
21342 .conn
21343 .query_row_map(
21344 "SELECT value FROM meta WHERE key = 'sentinel'",
21345 fparams![],
21346 |row| row.get_typed(0),
21347 )
21348 .optional()
21349 .unwrap();
21350 assert_eq!(sentinel.as_deref(), Some("keep-me"));
21351
21352 let conversation_count: i64 = reopened
21353 .conn
21354 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
21355 row.get_typed(0)
21356 })
21357 .unwrap();
21358 assert_eq!(conversation_count, 0);
21359
21360 let readonly = open_franken_with_flags(
21361 &canonical_db.to_string_lossy(),
21362 FrankenOpenFlags::SQLITE_OPEN_READ_ONLY,
21363 )
21364 .unwrap();
21365 let readonly_conversation_count: i64 = readonly
21366 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |row| {
21367 row.get_typed(0)
21368 })
21369 .unwrap();
21370 assert_eq!(readonly_conversation_count, 0);
21371 }
21372
21373 #[test]
21374 fn fetch_messages_for_lexical_rebuild_skips_extra_json() {
21375 let dir = TempDir::new().unwrap();
21376 let db_path = dir.path().join("test.db");
21377 let storage = SqliteStorage::open(&db_path).unwrap();
21378
21379 let agent = Agent {
21380 id: None,
21381 slug: "codex".into(),
21382 name: "Codex".into(),
21383 version: Some("0.2.3".into()),
21384 kind: AgentKind::Cli,
21385 };
21386 let agent_id = storage.ensure_agent(&agent).unwrap();
21387
21388 let conversation = Conversation {
21389 id: None,
21390 agent_slug: "codex".into(),
21391 workspace: Some(PathBuf::from("/tmp/workspace")),
21392 external_id: Some("lexical-rebuild-test".into()),
21393 title: Some("Lexical rebuild".into()),
21394 source_path: PathBuf::from("/tmp/lexical-rebuild.jsonl"),
21395 started_at: Some(1_700_000_000_000),
21396 ended_at: Some(1_700_000_000_100),
21397 approx_tokens: Some(42),
21398 metadata_json: serde_json::Value::Null,
21399 messages: vec![Message {
21400 id: None,
21401 idx: 0,
21402 role: MessageRole::Agent,
21403 author: Some("assistant".into()),
21404 created_at: Some(1_700_000_000_050),
21405 content: "indexed text".into(),
21406 extra_json: serde_json::json!({
21407 "usage": { "total_tokens": 1234 },
21408 "irrelevant_blob": "still preserved in canonical storage"
21409 }),
21410 snippets: Vec::new(),
21411 }],
21412 source_id: LOCAL_SOURCE_ID.into(),
21413 origin_host: None,
21414 };
21415
21416 let inserted = storage
21417 .insert_conversation_tree(agent_id, None, &conversation)
21418 .unwrap();
21419 let conversation_id = inserted.conversation_id;
21420
21421 let stored = storage.fetch_messages(conversation_id).unwrap();
21422 assert_eq!(stored.len(), 1);
21423 assert!(!stored[0].extra_json.is_null());
21424
21425 let lexical = storage
21426 .fetch_messages_for_lexical_rebuild(conversation_id)
21427 .unwrap();
21428 assert_eq!(lexical.len(), 1);
21429 assert_eq!(lexical[0].content, "indexed text");
21430 assert_eq!(lexical[0].author.as_deref(), Some("assistant"));
21431 assert!(lexical[0].extra_json.is_null());
21432 }
21433
21434 #[test]
21435 fn fetch_messages_for_lexical_rebuild_batch_groups_and_orders_messages() {
21436 let dir = TempDir::new().unwrap();
21437 let db_path = dir.path().join("test.db");
21438 let storage = SqliteStorage::open(&db_path).unwrap();
21439
21440 let agent = Agent {
21441 id: None,
21442 slug: "codex".into(),
21443 name: "Codex".into(),
21444 version: Some("0.2.3".into()),
21445 kind: AgentKind::Cli,
21446 };
21447 let agent_id = storage.ensure_agent(&agent).unwrap();
21448
21449 let first = Conversation {
21450 id: None,
21451 agent_slug: "codex".into(),
21452 workspace: Some(PathBuf::from("/tmp/workspace")),
21453 external_id: Some("lexical-batch-1".into()),
21454 title: Some("Lexical batch 1".into()),
21455 source_path: PathBuf::from("/tmp/lexical-batch-1.jsonl"),
21456 started_at: Some(1_700_000_000_000),
21457 ended_at: Some(1_700_000_000_100),
21458 approx_tokens: Some(42),
21459 metadata_json: serde_json::Value::Null,
21460 messages: vec![
21461 Message {
21462 id: None,
21463 idx: 0,
21464 role: MessageRole::User,
21465 author: Some("user".into()),
21466 created_at: Some(1_700_000_000_010),
21467 content: "first-a".into(),
21468 extra_json: serde_json::json!({"opaque": true}),
21469 snippets: Vec::new(),
21470 },
21471 Message {
21472 id: None,
21473 idx: 1,
21474 role: MessageRole::Agent,
21475 author: Some("assistant".into()),
21476 created_at: Some(1_700_000_000_020),
21477 content: "first-b".into(),
21478 extra_json: serde_json::json!({"opaque": true}),
21479 snippets: Vec::new(),
21480 },
21481 ],
21482 source_id: LOCAL_SOURCE_ID.into(),
21483 origin_host: None,
21484 };
21485
21486 let second = Conversation {
21487 id: None,
21488 agent_slug: "codex".into(),
21489 workspace: Some(PathBuf::from("/tmp/workspace")),
21490 external_id: Some("lexical-batch-2".into()),
21491 title: Some("Lexical batch 2".into()),
21492 source_path: PathBuf::from("/tmp/lexical-batch-2.jsonl"),
21493 started_at: Some(1_700_000_000_200),
21494 ended_at: Some(1_700_000_000_300),
21495 approx_tokens: Some(84),
21496 metadata_json: serde_json::Value::Null,
21497 messages: vec![Message {
21498 id: None,
21499 idx: 0,
21500 role: MessageRole::Tool,
21501 author: Some("tool".into()),
21502 created_at: Some(1_700_000_000_210),
21503 content: "second-a".into(),
21504 extra_json: serde_json::json!({"opaque": true}),
21505 snippets: Vec::new(),
21506 }],
21507 source_id: LOCAL_SOURCE_ID.into(),
21508 origin_host: None,
21509 };
21510 let third = Conversation {
21511 external_id: Some("lexical-batch-3".into()),
21512 title: Some("Lexical batch 3".into()),
21513 source_path: PathBuf::from("/tmp/lexical-batch-3.jsonl"),
21514 messages: vec![Message {
21515 id: None,
21516 idx: 0,
21517 role: MessageRole::System,
21518 author: Some("system".into()),
21519 created_at: Some(1_700_000_000_410),
21520 content: "third-a".into(),
21521 extra_json: serde_json::json!({"opaque": true}),
21522 snippets: Vec::new(),
21523 }],
21524 ..second.clone()
21525 };
21526
21527 let first_id = storage
21528 .insert_conversation_tree(agent_id, None, &first)
21529 .unwrap()
21530 .conversation_id;
21531 let second_id = storage
21532 .insert_conversation_tree(agent_id, None, &second)
21533 .unwrap()
21534 .conversation_id;
21535 let third_id = storage
21536 .insert_conversation_tree(agent_id, None, &third)
21537 .unwrap()
21538 .conversation_id;
21539
21540 let lexical = storage
21541 .fetch_messages_for_lexical_rebuild_batch(&[third_id, first_id], None, None)
21542 .unwrap();
21543
21544 let first_messages = lexical.get(&first_id).expect("first conversation");
21545 assert_eq!(first_messages.len(), 2);
21546 assert_eq!(first_messages[0].content, "first-a");
21547 assert_eq!(first_messages[1].content, "first-b");
21548 assert!(
21549 first_messages
21550 .iter()
21551 .all(|message| message.extra_json.is_null())
21552 );
21553
21554 assert!(
21555 !lexical.contains_key(&second_id),
21556 "batch fetch must exclude conversations not requested by the caller"
21557 );
21558
21559 let third_messages = lexical.get(&third_id).expect("third conversation");
21560 assert_eq!(third_messages.len(), 1);
21561 assert_eq!(third_messages[0].content, "third-a");
21562 assert!(third_messages[0].extra_json.is_null());
21563 }
21564
21565 #[test]
21566 fn fetch_messages_for_lexical_rebuild_batch_enforces_content_byte_guardrail() {
21567 let dir = TempDir::new().unwrap();
21568 let db_path = dir.path().join("test.db");
21569 let storage = SqliteStorage::open(&db_path).unwrap();
21570
21571 let agent = Agent {
21572 id: None,
21573 slug: "codex".into(),
21574 name: "Codex".into(),
21575 version: Some("0.2.3".into()),
21576 kind: AgentKind::Cli,
21577 };
21578 let agent_id = storage.ensure_agent(&agent).unwrap();
21579
21580 let conversation = Conversation {
21581 id: None,
21582 agent_slug: "codex".into(),
21583 workspace: Some(PathBuf::from("/tmp/workspace")),
21584 external_id: Some("lexical-batch-guard".into()),
21585 title: Some("Lexical batch guard".into()),
21586 source_path: PathBuf::from("/tmp/lexical-batch-guard.jsonl"),
21587 started_at: Some(1_700_000_000_000),
21588 ended_at: Some(1_700_000_000_100),
21589 approx_tokens: Some(42),
21590 metadata_json: serde_json::Value::Null,
21591 messages: vec![
21592 Message {
21593 id: None,
21594 idx: 0,
21595 role: MessageRole::User,
21596 author: Some("user".into()),
21597 created_at: Some(1_700_000_000_010),
21598 content: "123456".into(),
21599 extra_json: serde_json::Value::Null,
21600 snippets: Vec::new(),
21601 },
21602 Message {
21603 id: None,
21604 idx: 1,
21605 role: MessageRole::Agent,
21606 author: Some("assistant".into()),
21607 created_at: Some(1_700_000_000_020),
21608 content: "abcdef".into(),
21609 extra_json: serde_json::Value::Null,
21610 snippets: Vec::new(),
21611 },
21612 ],
21613 source_id: LOCAL_SOURCE_ID.into(),
21614 origin_host: None,
21615 };
21616
21617 let conversation_id = storage
21618 .insert_conversation_tree(agent_id, None, &conversation)
21619 .unwrap()
21620 .conversation_id;
21621
21622 let error = storage
21623 .fetch_messages_for_lexical_rebuild_batch(&[conversation_id], Some(10), Some(8))
21624 .expect_err("guardrail should reject oversized batch content");
21625
21626 let message = format!("{error:#}");
21627 assert!(
21628 message.contains("content-byte guardrail"),
21629 "expected guardrail reason in error, got {message}"
21630 );
21631 }
21632
21633 #[test]
21634 fn fetch_messages_handles_manual_rows_inserted_via_raw_connection() {
21635 let dir = TempDir::new().unwrap();
21636 let db_path = dir.path().join("manual-rows.db");
21637 let storage = FrankenStorage::open(&db_path).unwrap();
21638 let conn = storage.raw();
21639
21640 conn.execute(
21641 "INSERT INTO agents (id, slug, name, kind, created_at, updated_at)
21642 VALUES (1, 'claude_code', 'Claude Code', 'local', 0, 0)",
21643 )
21644 .unwrap();
21645 conn.execute(
21646 "INSERT INTO conversations
21647 (id, agent_id, external_id, title, source_path, source_id, started_at)
21648 VALUES (1, 1, 'manual-ext', 'Manual Session', '/tmp/manual.jsonl', 'local', 200)",
21649 )
21650 .unwrap();
21651 conn.execute(
21652 "INSERT INTO messages
21653 (id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin)
21654 VALUES (1, 1, 0, 'user', 'tester', 1700000000000, 'manual body', '{\"k\":1}', NULL)",
21655 )
21656 .unwrap();
21657
21658 let lexical = storage.fetch_messages_for_lexical_rebuild(1).unwrap();
21659 assert_eq!(lexical.len(), 1);
21660 assert_eq!(lexical[0].content, "manual body");
21661
21662 let full = storage.fetch_messages(1).unwrap();
21663 assert_eq!(full.len(), 1);
21664 assert_eq!(full[0].content, "manual body");
21665 assert_eq!(full[0].author.as_deref(), Some("tester"));
21666 assert_eq!(full[0].extra_json, serde_json::json!({ "k": 1 }));
21667 }
21668
21669 #[test]
21670 fn lexical_rebuild_batch_messages_query_avoids_sorter_temp_btrees() {
21671 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
21672 use std::path::PathBuf;
21673
21674 let dir = TempDir::new().unwrap();
21675 let db_path = dir.path().join("agent_search.db");
21676 let storage = SqliteStorage::open(&db_path).unwrap();
21677
21678 let agent = Agent {
21679 id: None,
21680 slug: "claude_code".into(),
21681 name: "Claude Code".into(),
21682 version: None,
21683 kind: AgentKind::Cli,
21684 };
21685 let agent_id = storage.ensure_agent(&agent).unwrap();
21686
21687 for (external_id, base_ts) in [
21688 ("conv-1", 1_700_000_000_000_i64),
21689 ("conv-2", 1_700_000_001_000_i64),
21690 ] {
21691 let conversation = Conversation {
21692 id: None,
21693 agent_slug: "claude_code".into(),
21694 workspace: Some(PathBuf::from("/tmp/workspace")),
21695 external_id: Some(external_id.to_string()),
21696 title: Some("Lexical rebuild".into()),
21697 source_path: PathBuf::from(format!("/tmp/{external_id}.jsonl")),
21698 started_at: Some(base_ts),
21699 ended_at: Some(base_ts + 100),
21700 approx_tokens: None,
21701 metadata_json: serde_json::Value::Null,
21702 messages: vec![
21703 Message {
21704 id: None,
21705 idx: 0,
21706 role: MessageRole::User,
21707 author: Some("user".into()),
21708 created_at: Some(base_ts + 10),
21709 content: format!("{external_id}-first"),
21710 extra_json: serde_json::Value::Null,
21711 snippets: Vec::new(),
21712 },
21713 Message {
21714 id: None,
21715 idx: 1,
21716 role: MessageRole::Agent,
21717 author: Some("assistant".into()),
21718 created_at: Some(base_ts + 20),
21719 content: format!("{external_id}-second"),
21720 extra_json: serde_json::Value::Null,
21721 snippets: Vec::new(),
21722 },
21723 ],
21724 source_id: LOCAL_SOURCE_ID.into(),
21725 origin_host: None,
21726 };
21727 storage
21728 .insert_conversation_tree(agent_id, None, &conversation)
21729 .unwrap();
21730 }
21731
21732 let conversation_ids: Vec<i64> = storage
21733 .conn
21734 .query_map_collect(
21735 "SELECT id FROM conversations ORDER BY id",
21736 fparams![],
21737 |row| row.get_typed(0),
21738 )
21739 .unwrap();
21740 assert_eq!(conversation_ids.len(), 2);
21741
21742 let plan_details: Vec<String> = storage
21743 .conn
21744 .query_map_collect(
21745 "EXPLAIN QUERY PLAN \
21746 SELECT conversation_id, id, idx, role, author, created_at, content \
21747 FROM messages \
21748 WHERE conversation_id IN (?1, ?2) \
21749 ORDER BY conversation_id ASC, idx ASC",
21750 fparams![conversation_ids[0], conversation_ids[1]],
21751 |row| row.get_typed(3),
21752 )
21753 .unwrap();
21754
21755 assert!(
21756 plan_details
21757 .iter()
21758 .any(|detail| detail.contains("sqlite_autoindex_messages_1")),
21759 "expected batched lexical rebuild fetch to use the conversation_id/idx composite index, got {plan_details:?}"
21760 );
21761 assert!(
21762 !plan_details
21763 .iter()
21764 .any(|detail| detail.contains("TEMP B-TREE")),
21765 "expected batched lexical rebuild fetch to avoid sorter temp b-trees, got {plan_details:?}"
21766 );
21767 }
21768
21769 #[test]
21770 fn stream_messages_for_lexical_rebuild_groups_and_orders_messages() {
21771 let dir = TempDir::new().unwrap();
21772 let db_path = dir.path().join("test.db");
21773 let storage = SqliteStorage::open(&db_path).unwrap();
21774
21775 let agent = Agent {
21776 id: None,
21777 slug: "codex".into(),
21778 name: "Codex".into(),
21779 version: Some("0.2.3".into()),
21780 kind: AgentKind::Cli,
21781 };
21782 let agent_id = storage.ensure_agent(&agent).unwrap();
21783
21784 let first = Conversation {
21785 id: None,
21786 agent_slug: "codex".into(),
21787 workspace: Some(PathBuf::from("/tmp/workspace")),
21788 external_id: Some("lexical-stream-1".into()),
21789 title: Some("Lexical stream 1".into()),
21790 source_path: PathBuf::from("/tmp/lexical-stream-1.jsonl"),
21791 started_at: Some(1_700_000_000_000),
21792 ended_at: Some(1_700_000_000_100),
21793 approx_tokens: Some(42),
21794 metadata_json: serde_json::Value::Null,
21795 messages: vec![
21796 Message {
21797 id: None,
21798 idx: 0,
21799 role: MessageRole::User,
21800 author: Some("user".into()),
21801 created_at: Some(1_700_000_000_010),
21802 content: "first-a".into(),
21803 extra_json: serde_json::json!({"opaque": true}),
21804 snippets: Vec::new(),
21805 },
21806 Message {
21807 id: None,
21808 idx: 1,
21809 role: MessageRole::Agent,
21810 author: Some("assistant".into()),
21811 created_at: Some(1_700_000_000_020),
21812 content: "first-b".into(),
21813 extra_json: serde_json::json!({"opaque": true}),
21814 snippets: Vec::new(),
21815 },
21816 ],
21817 source_id: LOCAL_SOURCE_ID.into(),
21818 origin_host: None,
21819 };
21820
21821 let second = Conversation {
21822 id: None,
21823 agent_slug: "codex".into(),
21824 workspace: Some(PathBuf::from("/tmp/workspace")),
21825 external_id: Some("lexical-stream-2".into()),
21826 title: Some("Lexical stream 2".into()),
21827 source_path: PathBuf::from("/tmp/lexical-stream-2.jsonl"),
21828 started_at: Some(1_700_000_000_200),
21829 ended_at: Some(1_700_000_000_300),
21830 approx_tokens: Some(84),
21831 metadata_json: serde_json::Value::Null,
21832 messages: vec![Message {
21833 id: None,
21834 idx: 0,
21835 role: MessageRole::Tool,
21836 author: Some("tool".into()),
21837 created_at: Some(1_700_000_000_210),
21838 content: "second-a".into(),
21839 extra_json: serde_json::json!({"opaque": true}),
21840 snippets: Vec::new(),
21841 }],
21842 source_id: LOCAL_SOURCE_ID.into(),
21843 origin_host: None,
21844 };
21845
21846 let first_id = storage
21847 .insert_conversation_tree(agent_id, None, &first)
21848 .unwrap()
21849 .conversation_id;
21850 let second_id = storage
21851 .insert_conversation_tree(agent_id, None, &second)
21852 .unwrap()
21853 .conversation_id;
21854
21855 let mut streamed = Vec::new();
21856 storage
21857 .stream_messages_for_lexical_rebuild_from_conversation_id(first_id, |row| {
21858 streamed.push((
21859 row.conversation_id,
21860 row.idx,
21861 row.role,
21862 row.author,
21863 row.content,
21864 ));
21865 Ok(())
21866 })
21867 .unwrap();
21868
21869 assert_eq!(
21870 streamed,
21871 vec![
21872 (
21873 first_id,
21874 0,
21875 "user".to_string(),
21876 Some("user".to_string()),
21877 "first-a".to_string(),
21878 ),
21879 (
21880 first_id,
21881 1,
21882 "agent".to_string(),
21883 Some("assistant".to_string()),
21884 "first-b".to_string(),
21885 ),
21886 (
21887 second_id,
21888 0,
21889 "tool".to_string(),
21890 Some("tool".to_string()),
21891 "second-a".to_string(),
21892 ),
21893 ]
21894 );
21895 }
21896
21897 #[test]
21898 fn stream_messages_for_lexical_rebuild_between_conversation_ids_respects_upper_bound() {
21899 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
21900 use std::path::PathBuf;
21901
21902 let dir = TempDir::new().unwrap();
21903 let db_path = dir.path().join("agent_search.db");
21904 let storage = SqliteStorage::open(&db_path).unwrap();
21905
21906 let agent = Agent {
21907 id: None,
21908 slug: "claude_code".into(),
21909 name: "Claude Code".into(),
21910 version: Some("1.2.3".into()),
21911 kind: AgentKind::Cli,
21912 };
21913 let agent_id = storage.ensure_agent(&agent).unwrap();
21914
21915 let first = Conversation {
21916 id: None,
21917 agent_slug: "claude_code".into(),
21918 workspace: Some(PathBuf::from("/tmp/workspace")),
21919 external_id: Some("lexical-range-1".into()),
21920 title: Some("Lexical range 1".into()),
21921 source_path: PathBuf::from("/tmp/lexical-range-1.jsonl"),
21922 started_at: Some(1_700_000_000_000),
21923 ended_at: Some(1_700_000_000_100),
21924 approx_tokens: Some(42),
21925 metadata_json: serde_json::Value::Null,
21926 messages: vec![Message {
21927 id: None,
21928 idx: 0,
21929 role: MessageRole::User,
21930 author: Some("user".into()),
21931 created_at: Some(1_700_000_000_010),
21932 content: "first-only".into(),
21933 extra_json: serde_json::json!({"opaque": true}),
21934 snippets: Vec::new(),
21935 }],
21936 source_id: LOCAL_SOURCE_ID.into(),
21937 origin_host: None,
21938 };
21939
21940 let second = Conversation {
21941 id: None,
21942 agent_slug: "claude_code".into(),
21943 workspace: Some(PathBuf::from("/tmp/workspace")),
21944 external_id: Some("lexical-range-2".into()),
21945 title: Some("Lexical range 2".into()),
21946 source_path: PathBuf::from("/tmp/lexical-range-2.jsonl"),
21947 started_at: Some(1_700_000_000_200),
21948 ended_at: Some(1_700_000_000_300),
21949 approx_tokens: Some(84),
21950 metadata_json: serde_json::Value::Null,
21951 messages: vec![Message {
21952 id: None,
21953 idx: 0,
21954 role: MessageRole::Tool,
21955 author: Some("tool".into()),
21956 created_at: Some(1_700_000_000_210),
21957 content: "second-should-not-appear".into(),
21958 extra_json: serde_json::json!({"opaque": true}),
21959 snippets: Vec::new(),
21960 }],
21961 source_id: LOCAL_SOURCE_ID.into(),
21962 origin_host: None,
21963 };
21964
21965 let first_id = storage
21966 .insert_conversation_tree(agent_id, None, &first)
21967 .unwrap()
21968 .conversation_id;
21969 let second_id = storage
21970 .insert_conversation_tree(agent_id, None, &second)
21971 .unwrap()
21972 .conversation_id;
21973
21974 let mut streamed = Vec::new();
21975 storage
21976 .stream_messages_for_lexical_rebuild_between_conversation_ids(
21977 first_id,
21978 first_id,
21979 |row| {
21980 streamed.push((row.conversation_id, row.idx, row.content));
21981 Ok(())
21982 },
21983 )
21984 .unwrap();
21985
21986 assert_eq!(streamed, vec![(first_id, 0, "first-only".to_string())]);
21987 assert!(
21988 streamed
21989 .iter()
21990 .all(|(conversation_id, _, _)| *conversation_id != second_id),
21991 "upper bound should exclude later conversation ids"
21992 );
21993 }
21994
21995 #[test]
21996 fn stream_messages_for_lexical_rebuild_between_conversation_ids_handles_mixed_ranges() {
21997 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
21998 use std::path::PathBuf;
21999
22000 let dir = TempDir::new().unwrap();
22001 let db_path = dir.path().join("agent_search.db");
22002 let storage = SqliteStorage::open(&db_path).unwrap();
22003
22004 let claude_agent_id = storage
22005 .ensure_agent(&Agent {
22006 id: None,
22007 slug: "claude_code".into(),
22008 name: "Claude Code".into(),
22009 version: None,
22010 kind: AgentKind::Cli,
22011 })
22012 .unwrap();
22013 let aider_agent_id = storage
22014 .ensure_agent(&Agent {
22015 id: None,
22016 slug: "aider".into(),
22017 name: "Aider".into(),
22018 version: None,
22019 kind: AgentKind::Cli,
22020 })
22021 .unwrap();
22022
22023 type MessageSpec = (i64, MessageRole, Option<String>, Option<i64>, String);
22024
22025 let mut expected = Vec::new();
22026 let mut first_conversation_id = None;
22027 let mut last_conversation_id = None;
22028 let mut insert_conversation =
22029 |agent_id: i64,
22030 external_id: &str,
22031 title: &str,
22032 source_path: &str,
22033 started_at: i64,
22034 message_specs: Vec<MessageSpec>| {
22035 let conversation = Conversation {
22036 id: None,
22037 agent_slug: if agent_id == aider_agent_id {
22038 "aider".into()
22039 } else {
22040 "claude_code".into()
22041 },
22042 workspace: Some(PathBuf::from("/tmp/workspace")),
22043 external_id: Some(external_id.to_string()),
22044 title: Some(title.to_string()),
22045 source_path: PathBuf::from(source_path),
22046 started_at: Some(started_at),
22047 ended_at: Some(started_at + 100),
22048 approx_tokens: None,
22049 metadata_json: serde_json::Value::Null,
22050 messages: message_specs
22051 .iter()
22052 .map(|(idx, role, author, created_at, content)| Message {
22053 id: None,
22054 idx: *idx,
22055 role: role.clone(),
22056 author: author.clone(),
22057 created_at: *created_at,
22058 content: content.clone(),
22059 extra_json: serde_json::Value::Null,
22060 snippets: Vec::new(),
22061 })
22062 .collect(),
22063 source_id: LOCAL_SOURCE_ID.into(),
22064 origin_host: None,
22065 };
22066 let conversation_id = storage
22067 .insert_conversation_tree(agent_id, None, &conversation)
22068 .unwrap()
22069 .conversation_id;
22070 if first_conversation_id.is_none() {
22071 first_conversation_id = Some(conversation_id);
22072 }
22073 last_conversation_id = Some(conversation_id);
22074 expected.extend(message_specs.into_iter().map(
22075 |(idx, role, author, created_at, content)| {
22076 (
22077 conversation_id,
22078 idx,
22079 match role {
22080 MessageRole::User => "user".to_string(),
22081 MessageRole::Agent => "agent".to_string(),
22082 MessageRole::Tool => "tool".to_string(),
22083 MessageRole::System => "system".to_string(),
22084 MessageRole::Other(other) => other,
22085 },
22086 author,
22087 created_at,
22088 content,
22089 )
22090 },
22091 ));
22092 };
22093
22094 for (label, base_ts) in [
22095 ("alpha", 1_700_000_000_000_i64),
22096 ("beta", 1_700_000_001_000_i64),
22097 ("gamma", 1_700_000_002_000_i64),
22098 ("delta", 1_700_000_003_000_i64),
22099 ("epsilon", 1_700_000_004_000_i64),
22100 ] {
22101 insert_conversation(
22102 claude_agent_id,
22103 &format!("lexical-{label}"),
22104 &format!("Lexical {label}"),
22105 &format!("/tmp/{label}.jsonl"),
22106 base_ts,
22107 vec![
22108 (
22109 0,
22110 MessageRole::User,
22111 None,
22112 Some(base_ts + 10),
22113 format!("{label}_content"),
22114 ),
22115 (
22116 1,
22117 MessageRole::Agent,
22118 None,
22119 Some(base_ts + 20),
22120 format!("{label}_content_response"),
22121 ),
22122 ],
22123 );
22124 }
22125
22126 insert_conversation(
22127 aider_agent_id,
22128 "lexical-aider-history",
22129 "Aider Chat: coding_agent_session_search",
22130 "/tmp/.aider.chat.history.md",
22131 1_764_619_673_394,
22132 vec![
22133 (
22134 0,
22135 MessageRole::System,
22136 Some("system".to_string()),
22137 None,
22138 "# aider chat started at 2025-12-01 20:07:47".to_string(),
22139 ),
22140 (
22141 1,
22142 MessageRole::User,
22143 Some("user".to_string()),
22144 None,
22145 "/tmp/workspace/.venv/bin/aider --no-git --message hello world".to_string(),
22146 ),
22147 ],
22148 );
22149 insert_conversation(
22150 aider_agent_id,
22151 "lexical-aider-fixture",
22152 "Aider Chat: aider",
22153 "/tmp/tests/fixtures/aider/.aider.chat.history.md",
22154 1_764_621_401_399,
22155 vec![
22156 (
22157 0,
22158 MessageRole::User,
22159 Some("user".to_string()),
22160 None,
22161 "/add src/main.rs".to_string(),
22162 ),
22163 (
22164 1,
22165 MessageRole::Agent,
22166 Some("assistant".to_string()),
22167 None,
22168 "Added src/main.rs to the chat.
22169
22170#### /add src/main.rs"
22171 .to_string(),
22172 ),
22173 (
22174 2,
22175 MessageRole::User,
22176 Some("user".to_string()),
22177 None,
22178 "Please refactor.".to_string(),
22179 ),
22180 (
22181 3,
22182 MessageRole::Agent,
22183 Some("assistant".to_string()),
22184 None,
22185 "Sure, here is the code.".to_string(),
22186 ),
22187 ],
22188 );
22189
22190 let mut streamed = Vec::new();
22191 storage
22192 .stream_messages_for_lexical_rebuild_between_conversation_ids(
22193 first_conversation_id.unwrap(),
22194 last_conversation_id.unwrap(),
22195 |row| {
22196 streamed.push((
22197 row.conversation_id,
22198 row.idx,
22199 row.role,
22200 row.author,
22201 row.created_at,
22202 row.content,
22203 ));
22204 Ok(())
22205 },
22206 )
22207 .unwrap();
22208
22209 assert_eq!(streamed, expected);
22210 }
22211
22212 #[test]
22213 fn lexical_rebuild_stream_queries_use_rowid_and_per_conversation_probes() {
22214 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
22215 use std::path::PathBuf;
22216
22217 let dir = TempDir::new().unwrap();
22218 let db_path = dir.path().join("agent_search.db");
22219 let storage = SqliteStorage::open(&db_path).unwrap();
22220
22221 let agent = Agent {
22222 id: None,
22223 slug: "claude_code".into(),
22224 name: "Claude Code".into(),
22225 version: None,
22226 kind: AgentKind::Cli,
22227 };
22228 let agent_id = storage.ensure_agent(&agent).unwrap();
22229
22230 for (external_id, base_ts) in [
22231 ("conv-1", 1_700_000_000_000_i64),
22232 ("conv-2", 1_700_000_001_000_i64),
22233 ] {
22234 let conversation = Conversation {
22235 id: None,
22236 agent_slug: "claude_code".into(),
22237 workspace: Some(PathBuf::from("/tmp/workspace")),
22238 external_id: Some(external_id.to_string()),
22239 title: Some("Lexical rebuild".into()),
22240 source_path: PathBuf::from(format!("/tmp/{external_id}.jsonl")),
22241 started_at: Some(base_ts),
22242 ended_at: Some(base_ts + 100),
22243 approx_tokens: None,
22244 metadata_json: serde_json::Value::Null,
22245 messages: vec![
22246 Message {
22247 id: None,
22248 idx: 0,
22249 role: MessageRole::User,
22250 author: Some("user".into()),
22251 created_at: Some(base_ts + 10),
22252 content: format!("{external_id}-first"),
22253 extra_json: serde_json::Value::Null,
22254 snippets: Vec::new(),
22255 },
22256 Message {
22257 id: None,
22258 idx: 1,
22259 role: MessageRole::Agent,
22260 author: Some("assistant".into()),
22261 created_at: Some(base_ts + 20),
22262 content: format!("{external_id}-second"),
22263 extra_json: serde_json::Value::Null,
22264 snippets: Vec::new(),
22265 },
22266 ],
22267 source_id: LOCAL_SOURCE_ID.into(),
22268 origin_host: None,
22269 };
22270 storage
22271 .insert_conversation_tree(agent_id, None, &conversation)
22272 .unwrap();
22273 }
22274
22275 let first_id: i64 = storage
22276 .conn
22277 .query_row_map(
22278 "SELECT id FROM conversations ORDER BY id LIMIT 1",
22279 fparams![],
22280 |row| row.get_typed(0),
22281 )
22282 .unwrap();
22283 let last_id: i64 = storage
22284 .conn
22285 .query_row_map(
22286 "SELECT id FROM conversations ORDER BY id DESC LIMIT 1",
22287 fparams![],
22288 |row| row.get_typed(0),
22289 )
22290 .unwrap();
22291
22292 let conversation_plan_details: Vec<String> = storage
22293 .conn
22294 .query_map_collect(
22295 "EXPLAIN QUERY PLAN SELECT id FROM conversations WHERE id >= ?1 AND id <= ?2 ORDER BY id ASC",
22296 fparams![first_id, last_id],
22297 |row| row.get_typed(3),
22298 )
22299 .unwrap();
22300 assert!(
22301 !conversation_plan_details
22302 .iter()
22303 .any(|detail| detail.contains("TEMP B-TREE")),
22304 "expected streamed lexical rebuild conversation listing to avoid sorter temp b-trees, got {conversation_plan_details:?}"
22305 );
22306
22307 let message_plan_details: Vec<String> = storage
22308 .conn
22309 .query_map_collect(
22310 "EXPLAIN QUERY PLAN SELECT id, idx, role, author, created_at, content FROM messages INDEXED BY sqlite_autoindex_messages_1 WHERE conversation_id = ?1 ORDER BY idx",
22311 fparams![first_id],
22312 |row| row.get_typed(3),
22313 )
22314 .unwrap();
22315 assert!(
22316 message_plan_details
22317 .iter()
22318 .any(|detail| detail.contains("sqlite_autoindex_messages_1")
22319 || detail.contains("idx_messages_conv_idx")),
22320 "expected per-conversation lexical rebuild fetch to use the conversation_id/idx index, got {message_plan_details:?}"
22321 );
22322 assert!(
22323 !message_plan_details
22324 .iter()
22325 .any(|detail| detail.contains("TEMP B-TREE")),
22326 "expected per-conversation lexical rebuild fetch to avoid sorter temp b-trees, got {message_plan_details:?}"
22327 );
22328 }
22329
22330 #[test]
22331 fn discover_historical_database_bundles_prefers_larger_archives_first() {
22332 let dir = TempDir::new().unwrap();
22333 let canonical_db = dir.path().join("agent_search.db");
22334 fs::write(&canonical_db, b"canonical").unwrap();
22335
22336 let smaller = dir.path().join("agent_search.corrupt.small");
22337 fs::write(&smaller, vec![0_u8; 32]).unwrap();
22338
22339 let backups_dir = dir.path().join("backups");
22340 fs::create_dir_all(&backups_dir).unwrap();
22341 let larger = backups_dir.join("agent_search.db.20260322T020200.bak");
22342 fs::write(&larger, vec![0_u8; 128]).unwrap();
22343
22344 let bundles = discover_historical_database_bundles(&canonical_db);
22345 let ordered_paths: Vec<PathBuf> =
22346 bundles.into_iter().map(|bundle| bundle.root_path).collect();
22347
22348 assert_eq!(ordered_paths, vec![larger, smaller]);
22349 }
22350
22351 #[test]
22352 fn discover_historical_database_bundles_prefers_queryable_direct_bundles_first() {
22353 let dir = TempDir::new().unwrap();
22354 let canonical_db = dir.path().join("agent_search.db");
22355 fs::write(&canonical_db, b"canonical").unwrap();
22356
22357 let larger_corrupt = dir.path().join("agent_search.corrupt.20260324_212907");
22358 fs::write(&larger_corrupt, vec![0_u8; 4096]).unwrap();
22359
22360 let backups_dir = dir.path().join("backups");
22361 fs::create_dir_all(&backups_dir).unwrap();
22362 let smaller_healthy = backups_dir.join("agent_search.db.20260322T020200.bak");
22363 let conn = FrankenConnection::open(smaller_healthy.to_string_lossy().into_owned()).unwrap();
22364 conn.execute_batch(
22365 "CREATE TABLE conversations (id INTEGER PRIMARY KEY, source_path TEXT);
22366 CREATE TABLE messages (
22367 id INTEGER PRIMARY KEY,
22368 conversation_id INTEGER NOT NULL,
22369 idx INTEGER NOT NULL,
22370 content TEXT
22371 );
22372 INSERT INTO conversations(id, source_path) VALUES (1, '/tmp/history.jsonl');
22373 INSERT INTO messages(id, conversation_id, idx, content)
22374 VALUES (1, 1, 0, 'seed');",
22375 )
22376 .unwrap();
22377 drop(conn);
22378
22379 let bundles = discover_historical_database_bundles(&canonical_db);
22380 let ordered_paths: Vec<PathBuf> = bundles
22381 .iter()
22382 .map(|bundle| bundle.root_path.clone())
22383 .collect();
22384
22385 assert_eq!(ordered_paths, vec![smaller_healthy, larger_corrupt]);
22386 assert!(bundles[0].supports_direct_readonly);
22387 assert!(!bundles[1].supports_direct_readonly);
22388 }
22389
22390 #[test]
22391 fn salvage_historical_databases_skips_unreadable_quarantined_bundles() {
22392 let dir = TempDir::new().unwrap();
22393 let canonical_db = dir.path().join("agent_search.db");
22394 let storage = SqliteStorage::open(&canonical_db).unwrap();
22395
22396 let quarantined = dir.path().join("agent_search.corrupt.20260324_212907");
22397 fs::write(&quarantined, b"not a sqlite database").unwrap();
22398
22399 let discovered: Vec<PathBuf> = discover_historical_database_bundles(&canonical_db)
22400 .into_iter()
22401 .map(|bundle| bundle.root_path)
22402 .collect();
22403 assert_eq!(discovered, vec![quarantined]);
22404
22405 let outcome = storage.salvage_historical_databases(&canonical_db).unwrap();
22406 assert_eq!(outcome.bundles_considered, 1);
22407 assert_eq!(outcome.bundles_imported, 0);
22408 assert_eq!(outcome.conversations_imported, 0);
22409 assert_eq!(outcome.messages_imported, 0);
22410 assert!(storage.list_conversations(10, 0).unwrap().is_empty());
22411 }
22412
22413 #[test]
22414 fn discover_historical_database_bundles_includes_repair_lab_and_snapshots_named_roots() {
22415 let dir = TempDir::new().unwrap();
22416 let canonical_db = dir.path().join("agent_search.db");
22417 fs::write(&canonical_db, b"canonical").unwrap();
22418
22419 let repair_lab_dir = dir.path().join("repair-lab").join("live-copy");
22420 fs::create_dir_all(&repair_lab_dir).unwrap();
22421 let repair_lab_db = repair_lab_dir.join("agent_search.db");
22422 fs::write(&repair_lab_db, vec![0_u8; 96]).unwrap();
22423 fs::write(
22424 repair_lab_dir.join("agent_search.rebuild-test.db"),
22425 vec![0_u8; 192],
22426 )
22427 .unwrap();
22428
22429 let snapshots_dir = dir.path().join("snapshots").join("20260324T013201Z");
22430 fs::create_dir_all(&snapshots_dir).unwrap();
22431 let snapshot_db = snapshots_dir.join("agent_search.db");
22432 fs::write(&snapshot_db, vec![0_u8; 64]).unwrap();
22433
22434 let bundles = discover_historical_database_bundles(&canonical_db);
22435 let ordered_paths: Vec<PathBuf> =
22436 bundles.into_iter().map(|bundle| bundle.root_path).collect();
22437
22438 assert!(ordered_paths.contains(&repair_lab_db));
22439 assert!(ordered_paths.contains(&snapshot_db));
22440 assert!(
22441 !ordered_paths
22442 .iter()
22443 .any(|path| path.file_name().and_then(|name| name.to_str())
22444 == Some("agent_search.rebuild-test.db"))
22445 );
22446 }
22447
22448 #[test]
22449 fn discover_historical_database_bundles_prefers_healthy_backup_over_replay_priority() {
22450 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
22451
22452 let dir = TempDir::new().unwrap();
22453 let canonical_db = dir.path().join("agent_search.db");
22454 fs::write(&canonical_db, b"canonical").unwrap();
22455
22456 let replay_dir = dir
22457 .path()
22458 .join("repair-lab")
22459 .join("replay-20260324T070101Z");
22460 fs::create_dir_all(&replay_dir).unwrap();
22461 let replay_db = replay_dir.join("agent_search.db");
22462 let replay_storage = SqliteStorage::open(&replay_db).unwrap();
22463 let agent = Agent {
22464 id: None,
22465 slug: "codex".into(),
22466 name: "Codex".into(),
22467 version: Some("0.2.3".into()),
22468 kind: AgentKind::Cli,
22469 };
22470 let agent_id = replay_storage.ensure_agent(&agent).unwrap();
22471 let conversation = Conversation {
22472 id: None,
22473 agent_slug: "codex".into(),
22474 workspace: Some(PathBuf::from("/tmp/workspace")),
22475 external_id: Some("replay-conv".into()),
22476 title: Some("Replay bundle".into()),
22477 source_path: PathBuf::from("/tmp/replay.jsonl"),
22478 started_at: Some(1_700_000_000_000),
22479 ended_at: Some(1_700_000_000_100),
22480 approx_tokens: Some(42),
22481 metadata_json: serde_json::Value::Null,
22482 messages: vec![Message {
22483 id: None,
22484 idx: 0,
22485 role: MessageRole::Agent,
22486 author: Some("assistant".into()),
22487 created_at: Some(1_700_000_000_050),
22488 content: "replay message".into(),
22489 extra_json: serde_json::Value::Null,
22490 snippets: Vec::new(),
22491 }],
22492 source_id: LOCAL_SOURCE_ID.into(),
22493 origin_host: None,
22494 };
22495 replay_storage
22496 .insert_conversation_tree(agent_id, None, &conversation)
22497 .unwrap();
22498 drop(replay_storage);
22499
22500 let replay_legacy = rusqlite_test_fixture_conn(&replay_db);
22501 replay_legacy
22502 .execute_batch(
22503 "UPDATE meta SET value = '13' WHERE key = 'schema_version';
22504 DELETE FROM _schema_migrations WHERE version = 14;
22505 PRAGMA writable_schema = ON;",
22506 )
22507 .unwrap();
22508 replay_legacy
22509 .execute(
22510 "DELETE FROM meta WHERE key = ?1",
22511 [FTS_FRANKEN_REBUILD_META_KEY],
22512 )
22513 .unwrap();
22514 #[cfg(not(windows))]
22515 {
22516 let duplicate_legacy_fts_sql = "CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, message_id UNINDEXED, tokenize='porter')";
22517 replay_legacy
22518 .execute(
22519 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
22520 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
22521 [duplicate_legacy_fts_sql],
22522 )
22523 .unwrap();
22524 }
22525 replay_legacy
22526 .execute_batch("PRAGMA writable_schema = OFF;")
22527 .unwrap();
22528 drop(replay_legacy);
22529
22530 let backups_dir = dir.path().join("backups");
22531 fs::create_dir_all(&backups_dir).unwrap();
22532 let clean_backup = backups_dir.join("agent_search.db.20260322T020200.bak");
22533 let clean_storage = SqliteStorage::open(&clean_backup).unwrap();
22534 let clean_agent_id = clean_storage.ensure_agent(&agent).unwrap();
22535 clean_storage
22536 .insert_conversation_tree(clean_agent_id, None, &conversation)
22537 .unwrap();
22538 drop(clean_storage);
22539
22540 let bundles = discover_historical_database_bundles(&canonical_db);
22541 let ordered_paths: Vec<PathBuf> = bundles
22542 .iter()
22543 .map(|bundle| bundle.root_path.clone())
22544 .collect();
22545
22546 assert_eq!(ordered_paths[0], clean_backup);
22547 assert_eq!(ordered_paths[1], replay_db);
22548 assert_eq!(
22549 bundles[0].probe.schema_version,
22550 Some(CURRENT_SCHEMA_VERSION)
22551 );
22552 assert_eq!(bundles[0].probe.fts_schema_rows, Some(0));
22558 assert!(!bundles[0].probe.fts_queryable);
22561 assert_eq!(bundles[1].probe.schema_version, Some(13));
22562 let expected_fts_schema_rows = if cfg!(windows) { Some(0) } else { Some(1) };
22567 assert_eq!(bundles[1].probe.fts_schema_rows, expected_fts_schema_rows);
22568 }
22569
22570 #[test]
22571 fn ensure_fts_consistency_via_rusqlite_catches_up_missing_rows() {
22572 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
22573
22574 let dir = TempDir::new().unwrap();
22575 let db_path = dir.path().join("fts-catchup.db");
22576 let storage = SqliteStorage::open(&db_path).unwrap();
22577 let agent = Agent {
22578 id: None,
22579 slug: "codex".into(),
22580 name: "Codex".into(),
22581 version: Some("0.2.3".into()),
22582 kind: AgentKind::Cli,
22583 };
22584 let agent_id = storage.ensure_agent(&agent).unwrap();
22585 let conversation = Conversation {
22586 id: None,
22587 agent_slug: "codex".into(),
22588 workspace: Some(PathBuf::from("/tmp/workspace")),
22589 external_id: Some("fts-catchup".into()),
22590 title: Some("FTS catchup".into()),
22591 source_path: PathBuf::from("/tmp/fts-catchup.jsonl"),
22592 started_at: Some(1_700_000_000_000),
22593 ended_at: Some(1_700_000_000_100),
22594 approx_tokens: Some(42),
22595 metadata_json: serde_json::Value::Null,
22596 messages: vec![Message {
22597 id: None,
22598 idx: 0,
22599 role: MessageRole::User,
22600 author: Some("user".into()),
22601 created_at: Some(1_700_000_000_050),
22602 content: "initial message".into(),
22603 extra_json: serde_json::Value::Null,
22604 snippets: Vec::new(),
22605 }],
22606 source_id: LOCAL_SOURCE_ID.into(),
22607 origin_host: None,
22608 };
22609 storage
22610 .insert_conversation_tree(agent_id, None, &conversation)
22611 .unwrap();
22612 drop(storage);
22613
22614 rebuild_fts_via_rusqlite(&db_path).unwrap();
22615
22616 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
22617 let conversation_id: i64 = conn
22618 .query_row_map("SELECT id FROM conversations LIMIT 1", fparams![], |row| {
22619 row.get_typed(0)
22620 })
22621 .unwrap();
22622 conn.execute_compat(
22623 "INSERT INTO messages(id, conversation_id, idx, role, author, created_at, content, extra_json, extra_bin)
22624 VALUES(2, ?1, 1, 'assistant', 'assistant', 1700000000060, 'authentication catchup', NULL, NULL)",
22625 fparams![conversation_id],
22626 )
22627 .unwrap();
22628 drop(conn);
22629
22630 let repair = ensure_fts_consistency_via_rusqlite(&db_path).unwrap();
22631 assert_eq!(
22632 repair,
22633 FtsConsistencyRepair::IncrementalCatchUp {
22634 inserted_rows: 1,
22635 total_rows: 2
22636 }
22637 );
22638
22639 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
22640 let auth_rows: i64 = conn
22641 .query_row_map(
22642 "SELECT COUNT(*) FROM fts_messages WHERE rowid = 2",
22643 fparams![],
22644 |row| row.get_typed(0),
22645 )
22646 .unwrap();
22647 assert_eq!(auth_rows, 1);
22648 }
22649
22650 #[test]
22651 fn rebuild_fts_via_rusqlite_cleans_duplicate_legacy_schema_rows() {
22652 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
22653
22654 let dir = TempDir::new().unwrap();
22655 let db_path = dir.path().join("fts-duplicate-rebuild.db");
22656
22657 let storage = SqliteStorage::open(&db_path).unwrap();
22658 let agent = Agent {
22659 id: None,
22660 slug: "codex".into(),
22661 name: "Codex".into(),
22662 version: Some("0.2.3".into()),
22663 kind: AgentKind::Cli,
22664 };
22665 let agent_id = storage.ensure_agent(&agent).unwrap();
22666 let conversation = Conversation {
22667 id: None,
22668 agent_slug: "codex".into(),
22669 workspace: Some(PathBuf::from("/ws")),
22670 external_id: Some("retro".into()),
22671 title: Some("retro".into()),
22672 source_path: PathBuf::from("/tmp/retro.jsonl"),
22673 started_at: Some(42),
22674 ended_at: Some(42),
22675 approx_tokens: None,
22676 metadata_json: serde_json::Value::Null,
22677 messages: vec![Message {
22678 id: None,
22679 idx: 0,
22680 role: MessageRole::User,
22681 author: None,
22682 created_at: Some(42),
22683 content: "retro investigation".into(),
22684 extra_json: serde_json::Value::Null,
22685 snippets: Vec::new(),
22686 }],
22687 source_id: LOCAL_SOURCE_ID.into(),
22688 origin_host: None,
22689 };
22690 storage
22691 .insert_conversation_tree(agent_id, None, &conversation)
22692 .unwrap();
22693 drop(storage);
22694 materialize_fresh_fts_schema_via_rusqlite(&db_path).unwrap();
22695
22696 let conn = rusqlite_test_fixture_conn(&db_path);
22697 conn.execute_batch("PRAGMA writable_schema = ON;").unwrap();
22698 conn.execute(
22699 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
22700 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
22701 ["CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, message_id UNINDEXED, tokenize='porter')"],
22702 )
22703 .unwrap();
22704 conn.execute_batch("PRAGMA writable_schema = OFF;").unwrap();
22705 let duplicate_rows: i64 = conn
22706 .query_row(
22707 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
22708 [],
22709 |row| row.get(0),
22710 )
22711 .unwrap();
22712 assert_eq!(duplicate_rows, 2);
22713 drop(conn);
22714
22715 let inserted = rebuild_fts_via_rusqlite(&db_path).unwrap();
22716 assert_eq!(inserted, 1);
22717
22718 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
22719 let schema_rows = franken_fts_schema_rows(&conn).unwrap();
22720 assert_eq!(
22721 schema_rows, 1,
22722 "DROP TABLE should leave one clean FTS schema"
22723 );
22724 let match_count: i64 = conn
22725 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
22726 row.get_typed(0)
22727 })
22728 .unwrap();
22729 assert_eq!(match_count, 1);
22730 }
22731
22732 #[test]
22737 fn ensure_agent_creates_new() {
22738 let dir = TempDir::new().unwrap();
22739 let db_path = dir.path().join("test.db");
22740 let storage = SqliteStorage::open(&db_path).unwrap();
22741
22742 let agent = Agent {
22743 id: None,
22744 slug: "test_agent".into(),
22745 name: "Test Agent".into(),
22746 version: Some("1.0".into()),
22747 kind: AgentKind::Cli,
22748 };
22749
22750 let id = storage.ensure_agent(&agent).unwrap();
22751 assert!(id > 0);
22752 }
22753
22754 #[test]
22755 fn ensure_agent_returns_existing_id() {
22756 let dir = TempDir::new().unwrap();
22757 let db_path = dir.path().join("test.db");
22758 let storage = SqliteStorage::open(&db_path).unwrap();
22759
22760 let agent = Agent {
22761 id: None,
22762 slug: "codex".into(),
22763 name: "Codex".into(),
22764 version: None,
22765 kind: AgentKind::Cli,
22766 };
22767
22768 let id1 = storage.ensure_agent(&agent).unwrap();
22769 let id2 = storage.ensure_agent(&agent).unwrap();
22770 assert_eq!(id1, id2);
22771 }
22772
22773 #[test]
22774 fn ensure_agent_unchanged_preserves_updated_at() {
22775 let dir = TempDir::new().unwrap();
22776 let db_path = dir.path().join("test.db");
22777 let storage = SqliteStorage::open(&db_path).unwrap();
22778
22779 let agent = Agent {
22780 id: None,
22781 slug: "codex".into(),
22782 name: "Codex".into(),
22783 version: Some("1.0".into()),
22784 kind: AgentKind::Cli,
22785 };
22786
22787 storage.ensure_agent(&agent).unwrap();
22788 let initial_updated_at: i64 = storage
22789 .conn
22790 .query_row_map(
22791 "SELECT updated_at FROM agents WHERE slug = ?1",
22792 fparams![agent.slug.as_str()],
22793 |row| row.get_typed(0),
22794 )
22795 .unwrap();
22796 std::thread::sleep(std::time::Duration::from_millis(5));
22797
22798 storage.ensure_agent(&agent).unwrap();
22799 let fetched_updated_at: i64 = storage
22800 .conn
22801 .query_row_map(
22802 "SELECT updated_at FROM agents WHERE slug = ?1",
22803 fparams![agent.slug.as_str()],
22804 |row| row.get_typed(0),
22805 )
22806 .unwrap();
22807
22808 assert_eq!(fetched_updated_at, initial_updated_at);
22809 }
22810
22811 #[test]
22812 fn ensure_agent_changed_metadata_updates_cached_slug() {
22813 let dir = TempDir::new().unwrap();
22814 let db_path = dir.path().join("test.db");
22815 let storage = SqliteStorage::open(&db_path).unwrap();
22816
22817 let mut agent = Agent {
22818 id: None,
22819 slug: "codex".into(),
22820 name: "Codex".into(),
22821 version: Some("1.0".into()),
22822 kind: AgentKind::Cli,
22823 };
22824
22825 let id1 = storage.ensure_agent(&agent).unwrap();
22826 agent.name = "Codex CLI".into();
22827 agent.version = Some("1.1".into());
22828 let id2 = storage.ensure_agent(&agent).unwrap();
22829
22830 let fetched: (String, Option<String>) = storage
22831 .conn
22832 .query_row_map(
22833 "SELECT name, version FROM agents WHERE slug = ?1",
22834 fparams![agent.slug.as_str()],
22835 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
22836 )
22837 .unwrap();
22838
22839 assert_eq!(id1, id2);
22840 assert_eq!(fetched, ("Codex CLI".into(), Some("1.1".into())));
22841 }
22842
22843 #[test]
22844 fn list_agents_returns_inserted() {
22845 let dir = TempDir::new().unwrap();
22846 let db_path = dir.path().join("test.db");
22847 let storage = SqliteStorage::open(&db_path).unwrap();
22848
22849 let agent = Agent {
22850 id: None,
22851 slug: "new_agent".into(),
22852 name: "New Agent".into(),
22853 version: None,
22854 kind: AgentKind::VsCode,
22855 };
22856 storage.ensure_agent(&agent).unwrap();
22857
22858 let agents = storage.list_agents().unwrap();
22859 assert!(agents.iter().any(|a| a.slug == "new_agent"));
22860 }
22861
22862 #[test]
22867 fn ensure_workspace_creates_new() {
22868 let dir = TempDir::new().unwrap();
22869 let db_path = dir.path().join("test.db");
22870 let storage = SqliteStorage::open(&db_path).unwrap();
22871
22872 let id = storage
22873 .ensure_workspace(Path::new("/home/user/project"), Some("My Project"))
22874 .unwrap();
22875 assert!(id > 0);
22876 }
22877
22878 #[test]
22879 fn ensure_workspace_returns_existing() {
22880 let dir = TempDir::new().unwrap();
22881 let db_path = dir.path().join("test.db");
22882 let storage = SqliteStorage::open(&db_path).unwrap();
22883
22884 let path = Path::new("/home/user/myproject");
22885 let id1 = storage.ensure_workspace(path, None).unwrap();
22886 let id2 = storage.ensure_workspace(path, None).unwrap();
22887 assert_eq!(id1, id2);
22888 }
22889
22890 #[test]
22891 fn ensure_workspace_changed_display_name_updates_cached_path() {
22892 let dir = TempDir::new().unwrap();
22893 let db_path = dir.path().join("test.db");
22894 let storage = SqliteStorage::open(&db_path).unwrap();
22895
22896 let path = Path::new("/home/user/myproject");
22897 let id1 = storage.ensure_workspace(path, Some("Before")).unwrap();
22898 let id2 = storage.ensure_workspace(path, Some("After")).unwrap();
22899
22900 let display_name: Option<String> = storage
22901 .conn
22902 .query_row_map(
22903 "SELECT display_name FROM workspaces WHERE path = ?1",
22904 fparams![path.to_string_lossy().as_ref()],
22905 |row| row.get_typed(0),
22906 )
22907 .unwrap();
22908
22909 assert_eq!(id1, id2);
22910 assert_eq!(display_name.as_deref(), Some("After"));
22911 }
22912
22913 #[test]
22914 fn list_workspaces_returns_inserted() {
22915 let dir = TempDir::new().unwrap();
22916 let db_path = dir.path().join("test.db");
22917 let storage = SqliteStorage::open(&db_path).unwrap();
22918
22919 storage
22920 .ensure_workspace(Path::new("/test/workspace"), Some("Test WS"))
22921 .unwrap();
22922
22923 let workspaces = storage.list_workspaces().unwrap();
22924 assert!(
22925 workspaces
22926 .iter()
22927 .any(|w| w.path.to_str() == Some("/test/workspace"))
22928 );
22929 }
22930
22931 #[test]
22936 fn upsert_source_creates_new() {
22937 let dir = TempDir::new().unwrap();
22938 let db_path = dir.path().join("test.db");
22939 let storage = SqliteStorage::open(&db_path).unwrap();
22940
22941 let source = Source {
22942 id: "test-laptop".into(),
22943 kind: SourceKind::Ssh,
22944 host_label: Some("test.local".into()),
22945 machine_id: Some("test-machine-id".into()),
22946 platform: None,
22947 config_json: None,
22948 created_at: Some(SqliteStorage::now_millis()),
22949 updated_at: None,
22950 };
22951
22952 storage.upsert_source(&source).unwrap();
22953 let fetched = storage.get_source("test-laptop").unwrap();
22954 assert!(fetched.is_some());
22955 assert_eq!(fetched.unwrap().host_label, Some("test.local".into()));
22956 }
22957
22958 #[test]
22959 fn upsert_source_updates_existing() {
22960 let dir = TempDir::new().unwrap();
22961 let db_path = dir.path().join("test.db");
22962 let storage = SqliteStorage::open(&db_path).unwrap();
22963
22964 let source1 = Source {
22965 id: "my-source".into(),
22966 kind: SourceKind::Ssh,
22967 host_label: Some("Original Label".into()),
22968 machine_id: None,
22969 platform: None,
22970 config_json: None,
22971 created_at: Some(SqliteStorage::now_millis()),
22972 updated_at: None,
22973 };
22974 storage.upsert_source(&source1).unwrap();
22975
22976 let source2 = Source {
22977 id: "my-source".into(),
22978 kind: SourceKind::Ssh,
22979 host_label: Some("Updated Label".into()),
22980 machine_id: None,
22981 platform: Some("linux".into()),
22982 config_json: None,
22983 created_at: Some(SqliteStorage::now_millis()),
22984 updated_at: Some(SqliteStorage::now_millis()),
22985 };
22986 storage.upsert_source(&source2).unwrap();
22987
22988 let fetched = storage.get_source("my-source").unwrap().unwrap();
22989 assert_eq!(fetched.host_label, Some("Updated Label".into()));
22990 assert!(fetched.platform.is_some());
22991 }
22992
22993 #[test]
22994 fn upsert_source_unchanged_preserves_updated_at() {
22995 let dir = TempDir::new().unwrap();
22996 let db_path = dir.path().join("test.db");
22997 let storage = SqliteStorage::open(&db_path).unwrap();
22998
22999 let source = Source {
23000 id: "stable-source".into(),
23001 kind: SourceKind::Ssh,
23002 host_label: Some("builder.local".into()),
23003 machine_id: None,
23004 platform: Some("linux".into()),
23005 config_json: Some(serde_json::json!({"role": "bench"})),
23006 created_at: None,
23007 updated_at: None,
23008 };
23009
23010 storage.upsert_source(&source).unwrap();
23011 let initial = storage.get_source("stable-source").unwrap().unwrap();
23012 std::thread::sleep(std::time::Duration::from_millis(5));
23013
23014 storage.upsert_source(&source).unwrap();
23015 let fetched = storage.get_source("stable-source").unwrap().unwrap();
23016
23017 assert_eq!(fetched.created_at, initial.created_at);
23018 assert_eq!(fetched.updated_at, initial.updated_at);
23019 assert_eq!(fetched.host_label, initial.host_label);
23020 assert_eq!(fetched.platform, initial.platform);
23021 assert_eq!(fetched.config_json, initial.config_json);
23022 }
23023
23024 #[test]
23025 fn ensure_source_for_conversation_recreates_remote_source_after_delete() {
23026 let dir = TempDir::new().unwrap();
23027 let db_path = dir.path().join("test.db");
23028 let storage = SqliteStorage::open(&db_path).unwrap();
23029
23030 let conversation = Conversation {
23031 id: None,
23032 agent_slug: "codex".into(),
23033 workspace: Some(PathBuf::from("/ws/cache-recreate")),
23034 external_id: Some("cache-recreate".into()),
23035 title: Some("Cache Recreate".into()),
23036 source_path: PathBuf::from("/log/cache-recreate.jsonl"),
23037 started_at: Some(1_700_000_000_000),
23038 ended_at: Some(1_700_000_000_001),
23039 approx_tokens: Some(16),
23040 metadata_json: serde_json::json!({}),
23041 messages: vec![Message {
23042 id: None,
23043 idx: 0,
23044 role: MessageRole::User,
23045 author: Some("tester".into()),
23046 created_at: Some(1_700_000_000_000),
23047 content: "cache recreate".into(),
23048 extra_json: serde_json::json!({}),
23049 snippets: Vec::new(),
23050 }],
23051 source_id: "cache-remote-source".into(),
23052 origin_host: Some("builder-cache".into()),
23053 };
23054
23055 storage
23056 .ensure_source_for_conversation(&conversation)
23057 .unwrap();
23058 assert!(storage.get_source("cache-remote-source").unwrap().is_some());
23059
23060 let deleted = storage.delete_source("cache-remote-source", false).unwrap();
23061 assert!(deleted);
23062 assert!(storage.get_source("cache-remote-source").unwrap().is_none());
23063
23064 storage
23065 .ensure_source_for_conversation(&conversation)
23066 .unwrap();
23067 let recreated = storage.get_source("cache-remote-source").unwrap();
23068 assert!(recreated.is_some());
23069 assert_eq!(
23070 recreated.unwrap().host_label.as_deref(),
23071 Some("builder-cache")
23072 );
23073 }
23074
23075 #[test]
23076 fn delete_source_removes_entry() {
23077 let dir = TempDir::new().unwrap();
23078 let db_path = dir.path().join("test.db");
23079 let storage = SqliteStorage::open(&db_path).unwrap();
23080
23081 let source = Source {
23082 id: "to-delete".into(),
23083 kind: SourceKind::Local,
23084 host_label: None,
23085 machine_id: None,
23086 platform: None,
23087 config_json: None,
23088 created_at: Some(SqliteStorage::now_millis()),
23089 updated_at: None,
23090 };
23091 storage.upsert_source(&source).unwrap();
23092
23093 let deleted = storage.delete_source("to-delete", false).unwrap();
23094 assert!(deleted);
23095
23096 let fetched = storage.get_source("to-delete").unwrap();
23097 assert!(fetched.is_none());
23098 }
23099
23100 #[test]
23101 fn delete_source_cannot_delete_local() {
23102 let dir = TempDir::new().unwrap();
23103 let db_path = dir.path().join("test.db");
23104 let storage = SqliteStorage::open(&db_path).unwrap();
23105
23106 let result = storage.delete_source(LOCAL_SOURCE_ID, false);
23107 assert!(result.is_err());
23108 }
23109
23110 #[test]
23111 fn list_sources_includes_local() {
23112 let dir = TempDir::new().unwrap();
23113 let db_path = dir.path().join("test.db");
23114 let storage = SqliteStorage::open(&db_path).unwrap();
23115
23116 let sources = storage.list_sources().unwrap();
23117 assert!(sources.iter().any(|s| s.id == LOCAL_SOURCE_ID));
23118 }
23119
23120 #[test]
23121 fn insert_conversation_tree_blank_local_source_normalizes_to_local_id() {
23122 let dir = TempDir::new().unwrap();
23123 let db_path = dir.path().join("test.db");
23124 let storage = SqliteStorage::open(&db_path).unwrap();
23125
23126 let agent_id = storage
23127 .ensure_agent(&Agent {
23128 id: None,
23129 slug: "codex".into(),
23130 name: "Codex".into(),
23131 version: None,
23132 kind: AgentKind::Cli,
23133 })
23134 .unwrap();
23135
23136 let conversation = Conversation {
23137 id: None,
23138 agent_slug: "codex".into(),
23139 workspace: None,
23140 external_id: Some("blank-local-source".into()),
23141 title: Some("Blank local source".into()),
23142 source_path: dir.path().join("blank-local.jsonl"),
23143 started_at: Some(1_700_000_000_000),
23144 ended_at: Some(1_700_000_000_001),
23145 approx_tokens: None,
23146 metadata_json: serde_json::Value::Null,
23147 messages: vec![Message {
23148 id: None,
23149 idx: 0,
23150 role: MessageRole::User,
23151 author: None,
23152 created_at: Some(1_700_000_000_000),
23153 content: "hello".into(),
23154 extra_json: serde_json::Value::Null,
23155 snippets: Vec::new(),
23156 }],
23157 source_id: " ".into(),
23158 origin_host: None,
23159 };
23160
23161 storage
23162 .insert_conversation_tree(agent_id, None, &conversation)
23163 .unwrap();
23164
23165 assert!(storage.get_source(" ").unwrap().is_none());
23166 let source = storage
23167 .get_source(LOCAL_SOURCE_ID)
23168 .unwrap()
23169 .expect("local source row should exist");
23170 assert_eq!(source.kind, SourceKind::Local);
23171 assert_eq!(source.host_label, None);
23172
23173 let conversations = storage.list_conversations(10, 0).unwrap();
23174 assert_eq!(conversations.len(), 1);
23175 assert_eq!(conversations[0].source_id, LOCAL_SOURCE_ID);
23176 assert_eq!(conversations[0].origin_host, None);
23177 }
23178
23179 #[test]
23180 fn repeated_local_inserts_do_not_touch_bootstrap_source_row() {
23181 let dir = TempDir::new().unwrap();
23182 let db_path = dir.path().join("test.db");
23183 let storage = SqliteStorage::open(&db_path).unwrap();
23184
23185 let agent_id = storage
23186 .ensure_agent(&Agent {
23187 id: None,
23188 slug: "codex".into(),
23189 name: "Codex".into(),
23190 version: None,
23191 kind: AgentKind::Cli,
23192 })
23193 .unwrap();
23194
23195 let bootstrap_updated_at: i64 = storage
23196 .conn
23197 .query_row_map(
23198 "SELECT updated_at FROM sources WHERE id = ?1",
23199 fparams![LOCAL_SOURCE_ID],
23200 |row| row.get_typed(0),
23201 )
23202 .unwrap();
23203
23204 let make_conversation = |external_id: &str, suffix: &str| Conversation {
23205 id: None,
23206 agent_slug: "codex".into(),
23207 workspace: None,
23208 external_id: Some(external_id.into()),
23209 title: Some(format!("Local source {suffix}")),
23210 source_path: dir.path().join(format!("local-{suffix}.jsonl")),
23211 started_at: Some(1_700_000_000_000),
23212 ended_at: Some(1_700_000_000_001),
23213 approx_tokens: None,
23214 metadata_json: serde_json::Value::Null,
23215 messages: vec![Message {
23216 id: None,
23217 idx: 0,
23218 role: MessageRole::User,
23219 author: None,
23220 created_at: Some(1_700_000_000_000),
23221 content: format!("hello-{suffix}"),
23222 extra_json: serde_json::Value::Null,
23223 snippets: Vec::new(),
23224 }],
23225 source_id: LOCAL_SOURCE_ID.into(),
23226 origin_host: None,
23227 };
23228
23229 std::thread::sleep(std::time::Duration::from_millis(5));
23230 storage
23231 .insert_conversation_tree(agent_id, None, &make_conversation("local-source-1", "one"))
23232 .unwrap();
23233 let after_first_insert: i64 = storage
23234 .conn
23235 .query_row_map(
23236 "SELECT updated_at FROM sources WHERE id = ?1",
23237 fparams![LOCAL_SOURCE_ID],
23238 |row| row.get_typed(0),
23239 )
23240 .unwrap();
23241
23242 std::thread::sleep(std::time::Duration::from_millis(5));
23243 storage
23244 .insert_conversation_tree(agent_id, None, &make_conversation("local-source-2", "two"))
23245 .unwrap();
23246 let after_second_insert: i64 = storage
23247 .conn
23248 .query_row_map(
23249 "SELECT updated_at FROM sources WHERE id = ?1",
23250 fparams![LOCAL_SOURCE_ID],
23251 |row| row.get_typed(0),
23252 )
23253 .unwrap();
23254
23255 assert_eq!(after_first_insert, bootstrap_updated_at);
23256 assert_eq!(after_second_insert, bootstrap_updated_at);
23257 }
23258
23259 #[test]
23260 fn insert_conversation_tree_blank_remote_source_normalizes_to_origin_host() {
23261 let dir = TempDir::new().unwrap();
23262 let db_path = dir.path().join("test.db");
23263 let storage = SqliteStorage::open(&db_path).unwrap();
23264
23265 let agent_id = storage
23266 .ensure_agent(&Agent {
23267 id: None,
23268 slug: "codex".into(),
23269 name: "Codex".into(),
23270 version: None,
23271 kind: AgentKind::Cli,
23272 })
23273 .unwrap();
23274
23275 let conversation = Conversation {
23276 id: None,
23277 agent_slug: "codex".into(),
23278 workspace: None,
23279 external_id: Some("blank-remote-source".into()),
23280 title: Some("Blank remote source".into()),
23281 source_path: dir.path().join("blank-remote.jsonl"),
23282 started_at: Some(1_700_000_000_000),
23283 ended_at: Some(1_700_000_000_001),
23284 approx_tokens: None,
23285 metadata_json: serde_json::Value::Null,
23286 messages: vec![Message {
23287 id: None,
23288 idx: 0,
23289 role: MessageRole::User,
23290 author: None,
23291 created_at: Some(1_700_000_000_000),
23292 content: "hello".into(),
23293 extra_json: serde_json::Value::Null,
23294 snippets: Vec::new(),
23295 }],
23296 source_id: " ".into(),
23297 origin_host: Some("user@work-laptop".into()),
23298 };
23299
23300 storage
23301 .insert_conversation_tree(agent_id, None, &conversation)
23302 .unwrap();
23303
23304 assert!(storage.get_source(" ").unwrap().is_none());
23305 let source = storage
23306 .get_source("user@work-laptop")
23307 .unwrap()
23308 .expect("normalized remote source row should exist");
23309 assert_eq!(source.kind, SourceKind::Ssh);
23310 assert_eq!(source.host_label.as_deref(), Some("user@work-laptop"));
23311
23312 let conversations = storage.list_conversations(10, 0).unwrap();
23313 assert_eq!(conversations.len(), 1);
23314 assert_eq!(conversations[0].source_id, "user@work-laptop");
23315 assert_eq!(
23316 conversations[0].origin_host.as_deref(),
23317 Some("user@work-laptop")
23318 );
23319 }
23320
23321 #[test]
23322 fn insert_conversations_batched_normalizes_host_only_remote_source_id() {
23323 let dir = TempDir::new().unwrap();
23324 let db_path = dir.path().join("test.db");
23325 let storage = SqliteStorage::open(&db_path).unwrap();
23326
23327 let agent_id = storage
23328 .ensure_agent(&Agent {
23329 id: None,
23330 slug: "codex".into(),
23331 name: "Codex".into(),
23332 version: None,
23333 kind: AgentKind::Cli,
23334 })
23335 .unwrap();
23336
23337 let conversation = Conversation {
23338 id: None,
23339 agent_slug: "codex".into(),
23340 workspace: None,
23341 external_id: Some("batched-blank-remote-source".into()),
23342 title: Some("Batched blank remote source".into()),
23343 source_path: dir.path().join("batched-blank-remote.jsonl"),
23344 started_at: Some(1_700_000_000_000),
23345 ended_at: Some(1_700_000_000_001),
23346 approx_tokens: None,
23347 metadata_json: serde_json::Value::Null,
23348 messages: vec![Message {
23349 id: None,
23350 idx: 0,
23351 role: MessageRole::User,
23352 author: None,
23353 created_at: Some(1_700_000_000_000),
23354 content: "hello".into(),
23355 extra_json: serde_json::Value::Null,
23356 snippets: Vec::new(),
23357 }],
23358 source_id: " ".into(),
23359 origin_host: Some("user@batch-host".into()),
23360 };
23361
23362 storage
23363 .insert_conversations_batched(&[(agent_id, None, &conversation)])
23364 .unwrap();
23365
23366 assert!(storage.get_source(" ").unwrap().is_none());
23367 let source = storage
23368 .get_source("user@batch-host")
23369 .unwrap()
23370 .expect("normalized batched remote source row should exist");
23371 assert_eq!(source.kind, SourceKind::Ssh);
23372 assert_eq!(source.host_label.as_deref(), Some("user@batch-host"));
23373
23374 let conversations = storage.list_conversations(10, 0).unwrap();
23375 assert_eq!(conversations.len(), 1);
23376 assert_eq!(conversations[0].source_id, "user@batch-host");
23377 assert_eq!(
23378 conversations[0].origin_host.as_deref(),
23379 Some("user@batch-host")
23380 );
23381 }
23382
23383 #[test]
23384 fn get_source_ids_excludes_local() {
23385 let dir = TempDir::new().unwrap();
23386 let db_path = dir.path().join("test.db");
23387 let storage = SqliteStorage::open(&db_path).unwrap();
23388
23389 let source = Source {
23391 id: "remote-1".into(),
23392 kind: SourceKind::Ssh,
23393 host_label: Some("server".into()),
23394 machine_id: None,
23395 platform: None,
23396 config_json: None,
23397 created_at: Some(SqliteStorage::now_millis()),
23398 updated_at: None,
23399 };
23400 storage.upsert_source(&source).unwrap();
23401
23402 let ids = storage.get_source_ids().unwrap();
23403 assert!(!ids.contains(&LOCAL_SOURCE_ID.to_string()));
23404 assert!(ids.contains(&"remote-1".to_string()));
23405 }
23406
23407 #[test]
23412 fn get_last_scan_ts_returns_none_initially() {
23413 let dir = TempDir::new().unwrap();
23414 let db_path = dir.path().join("test.db");
23415 let storage = SqliteStorage::open(&db_path).unwrap();
23416
23417 let ts = storage.get_last_scan_ts().unwrap();
23418 assert!(ts.is_none());
23419 }
23420
23421 #[test]
23422 fn set_and_get_last_scan_ts() {
23423 let dir = TempDir::new().unwrap();
23424 let db_path = dir.path().join("test.db");
23425 let storage = SqliteStorage::open(&db_path).unwrap();
23426
23427 let expected_ts = 1700000000000_i64;
23428 storage.set_last_scan_ts(expected_ts).unwrap();
23429
23430 let actual_ts = storage.get_last_scan_ts().unwrap();
23431 assert_eq!(actual_ts, Some(expected_ts));
23432 }
23433
23434 #[test]
23439 fn now_millis_returns_reasonable_value() {
23440 let ts = SqliteStorage::now_millis();
23441 assert!(ts > 1577836800000);
23443 assert!(ts < 4102444800000);
23445 }
23446
23447 #[test]
23452 fn msgpack_roundtrip_basic_object() {
23453 let value = serde_json::json!({
23454 "key": "value",
23455 "number": 42,
23456 "nested": { "inner": true }
23457 });
23458
23459 let bytes = serialize_json_to_msgpack(&value).expect("should serialize");
23460 let recovered = deserialize_msgpack_to_json(&bytes);
23461
23462 assert_eq!(value, recovered);
23463 }
23464
23465 #[test]
23466 fn msgpack_returns_none_for_null() {
23467 let value = serde_json::Value::Null;
23468 assert!(serialize_json_to_msgpack(&value).is_none());
23469 }
23470
23471 #[test]
23472 fn message_insert_stores_null_extra_json_as_sql_null() {
23473 let dir = TempDir::new().unwrap();
23474 let db_path = dir.path().join("test.db");
23475 let storage = SqliteStorage::open(&db_path).unwrap();
23476 let agent_id = storage
23477 .ensure_agent(&Agent {
23478 id: None,
23479 slug: "codex".into(),
23480 name: "Codex".into(),
23481 version: None,
23482 kind: AgentKind::Cli,
23483 })
23484 .unwrap();
23485 let conversation = Conversation {
23486 id: None,
23487 agent_slug: "codex".into(),
23488 workspace: None,
23489 external_id: Some("null-extra-json".into()),
23490 title: Some("Null extra_json".into()),
23491 source_path: PathBuf::from("/tmp/null-extra-json.jsonl"),
23492 started_at: Some(1_700_000_000_000),
23493 ended_at: Some(1_700_000_000_001),
23494 approx_tokens: None,
23495 metadata_json: serde_json::Value::Null,
23496 messages: vec![Message {
23497 id: None,
23498 idx: 0,
23499 role: MessageRole::User,
23500 author: None,
23501 created_at: Some(1_700_000_000_000),
23502 content: "null metadata message".into(),
23503 extra_json: serde_json::Value::Null,
23504 snippets: Vec::new(),
23505 }],
23506 source_id: LOCAL_SOURCE_ID.into(),
23507 origin_host: None,
23508 };
23509
23510 let conversation_id = storage
23511 .insert_conversation_tree(agent_id, None, &conversation)
23512 .unwrap()
23513 .conversation_id;
23514
23515 let (extra_json, extra_bin): (Option<String>, Option<Vec<u8>>) = storage
23516 .conn
23517 .query_row_map(
23518 "SELECT extra_json, extra_bin FROM messages WHERE conversation_id = ?1",
23519 fparams![conversation_id],
23520 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
23521 )
23522 .unwrap();
23523 assert!(extra_json.is_none());
23524 assert!(extra_bin.is_none());
23525
23526 let stored = storage.fetch_messages(conversation_id).unwrap();
23527 assert!(stored[0].extra_json.is_null());
23528 }
23529
23530 #[test]
23531 fn message_insert_stores_nonempty_extra_json_as_msgpack_only() {
23532 let dir = TempDir::new().unwrap();
23533 let db_path = dir.path().join("test.db");
23534 let storage = SqliteStorage::open(&db_path).unwrap();
23535 let agent_id = storage
23536 .ensure_agent(&Agent {
23537 id: None,
23538 slug: "codex".into(),
23539 name: "Codex".into(),
23540 version: None,
23541 kind: AgentKind::Cli,
23542 })
23543 .unwrap();
23544 let extra_json = serde_json::json!({ "idx": 7, "kind": "profile" });
23545 let conversation = Conversation {
23546 id: None,
23547 agent_slug: "codex".into(),
23548 workspace: None,
23549 external_id: Some("msgpack-extra-json".into()),
23550 title: Some("MessagePack extra_json".into()),
23551 source_path: PathBuf::from("/tmp/msgpack-extra-json.jsonl"),
23552 started_at: Some(1_700_000_000_000),
23553 ended_at: Some(1_700_000_000_001),
23554 approx_tokens: None,
23555 metadata_json: serde_json::Value::Null,
23556 messages: vec![Message {
23557 id: None,
23558 idx: 0,
23559 role: MessageRole::User,
23560 author: None,
23561 created_at: Some(1_700_000_000_000),
23562 content: "msgpack metadata message".into(),
23563 extra_json: extra_json.clone(),
23564 snippets: Vec::new(),
23565 }],
23566 source_id: LOCAL_SOURCE_ID.into(),
23567 origin_host: None,
23568 };
23569
23570 let conversation_id = storage
23571 .insert_conversation_tree(agent_id, None, &conversation)
23572 .unwrap()
23573 .conversation_id;
23574
23575 let (extra_json_text, extra_bin): (Option<String>, Option<Vec<u8>>) = storage
23576 .conn
23577 .query_row_map(
23578 "SELECT extra_json, extra_bin FROM messages WHERE conversation_id = ?1",
23579 fparams![conversation_id],
23580 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
23581 )
23582 .unwrap();
23583 assert!(extra_json_text.is_none());
23584 assert!(extra_bin.is_some());
23585
23586 let stored = storage.fetch_messages(conversation_id).unwrap();
23587 assert_eq!(stored[0].extra_json, extra_json);
23588 }
23589
23590 #[test]
23591 fn conversation_insert_preserves_null_metadata_json_as_json_null() {
23592 let dir = TempDir::new().unwrap();
23593 let db_path = dir.path().join("test.db");
23594 let storage = SqliteStorage::open(&db_path).unwrap();
23595 let agent_id = storage
23596 .ensure_agent(&Agent {
23597 id: None,
23598 slug: "codex".into(),
23599 name: "Codex".into(),
23600 version: None,
23601 kind: AgentKind::Cli,
23602 })
23603 .unwrap();
23604 let conversation = Conversation {
23605 id: None,
23606 agent_slug: "codex".into(),
23607 workspace: None,
23608 external_id: Some("null-conversation-metadata".into()),
23609 title: Some("Null conversation metadata".into()),
23610 source_path: PathBuf::from("/tmp/null-conversation-metadata.jsonl"),
23611 started_at: Some(1_700_000_000_000),
23612 ended_at: Some(1_700_000_000_001),
23613 approx_tokens: None,
23614 metadata_json: serde_json::Value::Null,
23615 messages: vec![Message {
23616 id: None,
23617 idx: 0,
23618 role: MessageRole::User,
23619 author: None,
23620 created_at: Some(1_700_000_000_000),
23621 content: "null conversation metadata message".into(),
23622 extra_json: serde_json::Value::Null,
23623 snippets: Vec::new(),
23624 }],
23625 source_id: LOCAL_SOURCE_ID.into(),
23626 origin_host: None,
23627 };
23628
23629 storage
23630 .insert_conversation_tree(agent_id, None, &conversation)
23631 .unwrap();
23632
23633 let (metadata_json, metadata_bin): (Option<String>, Option<Vec<u8>>) = storage
23634 .conn
23635 .query_row_map(
23636 "SELECT metadata_json, metadata_bin FROM conversations WHERE external_id = ?1",
23637 fparams!["null-conversation-metadata"],
23638 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
23639 )
23640 .unwrap();
23641 assert_eq!(metadata_json.as_deref(), Some("null"));
23642 assert!(metadata_bin.is_none());
23643
23644 let listed = storage.list_conversations(10, 0).unwrap();
23645 assert!(listed[0].metadata_json.is_null());
23646 }
23647
23648 #[test]
23649 fn conversation_insert_stores_nonempty_metadata_as_msgpack_only() {
23650 let dir = TempDir::new().unwrap();
23651 let db_path = dir.path().join("test.db");
23652 let storage = SqliteStorage::open(&db_path).unwrap();
23653 let agent_id = storage
23654 .ensure_agent(&Agent {
23655 id: None,
23656 slug: "codex".into(),
23657 name: "Codex".into(),
23658 version: None,
23659 kind: AgentKind::Cli,
23660 })
23661 .unwrap();
23662 let metadata_json = serde_json::json!({ "bench": true, "source": "profile" });
23663 let conversation = Conversation {
23664 id: None,
23665 agent_slug: "codex".into(),
23666 workspace: None,
23667 external_id: Some("msgpack-conversation-metadata".into()),
23668 title: Some("MessagePack conversation metadata".into()),
23669 source_path: PathBuf::from("/tmp/msgpack-conversation-metadata.jsonl"),
23670 started_at: Some(1_700_000_000_000),
23671 ended_at: Some(1_700_000_000_001),
23672 approx_tokens: None,
23673 metadata_json: metadata_json.clone(),
23674 messages: vec![Message {
23675 id: None,
23676 idx: 0,
23677 role: MessageRole::User,
23678 author: None,
23679 created_at: Some(1_700_000_000_000),
23680 content: "msgpack conversation metadata message".into(),
23681 extra_json: serde_json::Value::Null,
23682 snippets: Vec::new(),
23683 }],
23684 source_id: LOCAL_SOURCE_ID.into(),
23685 origin_host: None,
23686 };
23687
23688 storage
23689 .insert_conversation_tree(agent_id, None, &conversation)
23690 .unwrap();
23691
23692 let (metadata_text, metadata_bin): (Option<String>, Option<Vec<u8>>) = storage
23693 .conn
23694 .query_row_map(
23695 "SELECT metadata_json, metadata_bin FROM conversations WHERE external_id = ?1",
23696 fparams!["msgpack-conversation-metadata"],
23697 |row| Ok((row.get_typed(0)?, row.get_typed(1)?)),
23698 )
23699 .unwrap();
23700 assert!(metadata_text.is_none());
23701 assert!(metadata_bin.is_some());
23702
23703 let listed = storage.list_conversations(10, 0).unwrap();
23704 assert_eq!(listed[0].metadata_json, metadata_json);
23705 }
23706
23707 #[test]
23708 fn msgpack_returns_none_for_empty_object() {
23709 let value = serde_json::json!({});
23710 assert!(serialize_json_to_msgpack(&value).is_none());
23711 }
23712
23713 #[test]
23714 fn parse_historical_json_column_preserves_large_payloads_as_raw_json() {
23715 let raw = format!("{{\"blob\":\"{}\"}}", "x".repeat(1_000_000));
23716
23717 let value = parse_historical_json_column(Some(raw.clone()));
23718
23719 assert_eq!(historical_raw_json(&value), Some(raw.as_str()));
23720 assert_eq!(json_value_size_hint(&value), raw.len());
23721 }
23722
23723 #[test]
23724 fn parse_historical_json_column_preserves_small_payloads_as_raw_json() {
23725 let raw = String::from("{\"ok\":true,\"n\":1}");
23726
23727 let value = parse_historical_json_column(Some(raw.clone()));
23728
23729 assert_eq!(historical_raw_json(&value), Some(raw.as_str()));
23730 }
23731
23732 #[test]
23733 fn msgpack_serializes_non_empty_array() {
23734 let value = serde_json::json!([1, 2, 3]);
23735 let bytes = serialize_json_to_msgpack(&value).expect("should serialize array");
23736 let recovered = deserialize_msgpack_to_json(&bytes);
23737 assert_eq!(value, recovered);
23738 }
23739
23740 #[test]
23741 fn msgpack_smaller_than_json() {
23742 let value = serde_json::json!({
23743 "field_name_one": "some_value",
23744 "field_name_two": 123456,
23745 "field_name_three": [1, 2, 3, 4, 5],
23746 "field_name_four": { "nested": true }
23747 });
23748
23749 let json_bytes = serde_json::to_vec(&value).unwrap();
23750 let msgpack_bytes = serialize_json_to_msgpack(&value).unwrap();
23751
23752 assert!(
23754 msgpack_bytes.len() < json_bytes.len(),
23755 "MessagePack ({} bytes) should be smaller than JSON ({} bytes)",
23756 msgpack_bytes.len(),
23757 json_bytes.len()
23758 );
23759 }
23760
23761 #[test]
23762 fn migration_v7_adds_binary_columns() {
23763 let dir = TempDir::new().unwrap();
23764 let db_path = dir.path().join("test.db");
23765 let storage = SqliteStorage::open(&db_path).unwrap();
23766
23767 let has_metadata_bin = storage
23769 .raw()
23770 .query("PRAGMA table_info(conversations)")
23771 .unwrap()
23772 .iter()
23773 .any(|row| row.get_typed::<String>(1).unwrap() == "metadata_bin");
23774 assert!(
23775 has_metadata_bin,
23776 "conversations should have metadata_bin column"
23777 );
23778
23779 let has_extra_bin = storage
23781 .raw()
23782 .query("PRAGMA table_info(messages)")
23783 .unwrap()
23784 .iter()
23785 .any(|row| row.get_typed::<String>(1).unwrap() == "extra_bin");
23786 assert!(has_extra_bin, "messages should have extra_bin column");
23787 }
23788
23789 #[test]
23790 fn insert_conversation_tree_rehydrates_append_tail_state_cache_after_manual_clear() {
23791 let dir = TempDir::new().unwrap();
23792 let db_path = dir.path().join("append-tail-state-cache.db");
23793 let storage = SqliteStorage::open(&db_path).unwrap();
23794 let agent_id = storage
23795 .ensure_agent(&Agent {
23796 id: None,
23797 slug: "codex".into(),
23798 name: "Codex".into(),
23799 version: None,
23800 kind: AgentKind::Cli,
23801 })
23802 .unwrap();
23803 let workspace = PathBuf::from("/ws/profiled-append-remote");
23804 let workspace_id = storage.ensure_workspace(&workspace, None).unwrap();
23805
23806 let initial = make_profiled_append_remote_merge_conversation(11, 5);
23807 let insert_outcome = storage
23808 .insert_conversation_tree(agent_id, Some(workspace_id), &initial)
23809 .unwrap();
23810 let conversation_id = insert_outcome.conversation_id;
23811
23812 let initial_tail: (Option<i64>, Option<i64>, Option<i64>) = storage
23813 .raw()
23814 .query_row_map(
23815 "SELECT ended_at, last_message_idx, last_message_created_at
23816 FROM conversation_tail_state
23817 WHERE conversation_id = ?1",
23818 fparams![conversation_id],
23819 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
23820 )
23821 .unwrap();
23822 assert_eq!(initial_tail, (Some(111_005), Some(4), Some(111_004)));
23823
23824 storage
23825 .raw()
23826 .execute_compat(
23827 "UPDATE conversations SET ended_at = ?1 WHERE id = ?2",
23828 fparams![111_999_i64, conversation_id],
23829 )
23830 .unwrap();
23831 storage
23832 .raw()
23833 .execute_compat(
23834 "DELETE FROM conversation_tail_state WHERE conversation_id = ?1",
23835 fparams![conversation_id],
23836 )
23837 .unwrap();
23838
23839 let appended = make_profiled_append_remote_merge_conversation(11, 10);
23840 let append_outcome = storage
23841 .insert_conversation_tree(agent_id, Some(workspace_id), &appended)
23842 .unwrap();
23843 assert_eq!(append_outcome.inserted_indices, vec![5, 6, 7, 8, 9]);
23844
23845 let final_tail: (Option<i64>, Option<i64>, Option<i64>) = storage
23846 .raw()
23847 .query_row_map(
23848 "SELECT ended_at, last_message_idx, last_message_created_at
23849 FROM conversation_tail_state
23850 WHERE conversation_id = ?1",
23851 fparams![conversation_id],
23852 |row| Ok((row.get_typed(0)?, row.get_typed(1)?, row.get_typed(2)?)),
23853 )
23854 .unwrap();
23855 assert_eq!(final_tail, (Some(111_999), Some(9), Some(111_009)));
23856 }
23857
23858 #[test]
23859 fn msgpack_deserialize_empty_returns_default() {
23860 let recovered = deserialize_msgpack_to_json(&[]);
23861 assert_eq!(recovered, serde_json::Value::Object(serde_json::Map::new()));
23862 }
23863
23864 #[test]
23865 fn msgpack_deserialize_garbage_returns_default() {
23866 let recovered = deserialize_msgpack_to_json(&[0x85]);
23869 assert_eq!(recovered, serde_json::Value::Object(serde_json::Map::new()));
23870 }
23871
23872 #[test]
23873 fn stats_aggregator_collects_and_expands() {
23874 let mut agg = StatsAggregator::new();
23875 assert!(agg.is_empty());
23876
23877 agg.record("claude", "local", 100, 5, 500);
23880 agg.record("codex", "local", 100, 3, 300);
23882 agg.record("claude", "local", 101, 2, 200);
23884
23885 assert!(!agg.is_empty());
23886 assert_eq!(agg.raw_entry_count(), 3);
23887
23888 let entries = agg.expand();
23889 assert_eq!(entries.len(), 10);
23917
23918 let day100_all = entries
23920 .iter()
23921 .find(|(d, a, s, _)| *d == 100 && a == "all" && s == "all")
23922 .unwrap();
23923 assert_eq!(day100_all.3.session_count_delta, 2);
23924 assert_eq!(day100_all.3.message_count_delta, 8);
23925 assert_eq!(day100_all.3.total_chars_delta, 800);
23926 }
23927
23928 #[test]
23933 fn lazy_franken_db_not_open_before_get() {
23934 let dir = TempDir::new().unwrap();
23935 let db_path = dir.path().join("lazy_test.db");
23936
23937 let _storage = SqliteStorage::open(&db_path).unwrap();
23939
23940 let lazy = LazyFrankenDb::new(db_path);
23941 assert!(
23942 !lazy.is_open(),
23943 "LazyFrankenDb must not open on construction"
23944 );
23945 }
23946
23947 #[test]
23948 fn lazy_franken_db_opens_on_first_get() {
23949 let dir = TempDir::new().unwrap();
23950 let db_path = dir.path().join("lazy_test.db");
23951
23952 let _storage = SqliteStorage::open(&db_path).unwrap();
23954 drop(_storage);
23955
23956 let lazy = LazyFrankenDb::new(db_path);
23957 assert!(!lazy.is_open());
23958
23959 let conn = lazy.get("test").expect("should open successfully");
23960 let count: i64 = conn
23961 .query_row_map("SELECT COUNT(*) FROM conversations", fparams![], |r| {
23962 r.get_typed(0)
23963 })
23964 .unwrap();
23965 assert_eq!(count, 0);
23966 drop(conn);
23967
23968 assert!(lazy.is_open(), "LazyFrankenDb must be open after get()");
23969 }
23970
23971 #[test]
23972 fn lazy_franken_db_reuses_connection() {
23973 let dir = TempDir::new().unwrap();
23974 let db_path = dir.path().join("lazy_test.db");
23975 let _storage = SqliteStorage::open(&db_path).unwrap();
23976 drop(_storage);
23977
23978 let lazy = LazyFrankenDb::new(db_path);
23979
23980 {
23982 let conn = lazy.get("first").unwrap();
23983 conn.execute_batch("CREATE TABLE IF NOT EXISTS test_tbl (id INTEGER)")
23984 .unwrap();
23985 }
23986
23987 {
23989 let conn = lazy.get("second").unwrap();
23990 let count: i64 = conn
23991 .query_row_map("SELECT COUNT(*) FROM test_tbl", fparams![], |r| {
23992 r.get_typed(0)
23993 })
23994 .unwrap();
23995 assert_eq!(count, 0);
23996 }
23997 }
23998
23999 #[test]
24000 fn lazy_franken_db_not_found_error() {
24001 let dir = TempDir::new().unwrap();
24002 let db_path = dir.path().join("nonexistent.db");
24003
24004 let lazy = LazyFrankenDb::new(db_path);
24005 let result = lazy.get("test");
24006 assert!(result.is_err());
24007 assert!(
24008 matches!(result.unwrap_err(), LazyDbError::NotFound(_)),
24009 "should return NotFound for missing DB"
24010 );
24011 }
24012
24013 #[test]
24014 fn lazy_franken_db_path_accessor() {
24015 let path = PathBuf::from("/tmp/test_lazy.db");
24016 let lazy = LazyFrankenDb::new(path.clone());
24017 assert_eq!(lazy.path(), path.as_path());
24018 }
24019
24020 #[test]
24025 fn sql_like_match_basic_patterns() {
24026 assert!(sql_like_match("claude-opus-4-20250101", "claude-opus-4%"));
24027 assert!(sql_like_match("claude-opus-4", "claude-opus-4%"));
24028 assert!(!sql_like_match("claude-sonnet-4", "claude-opus-4%"));
24029
24030 assert!(sql_like_match("gemini-2.0-flash-001", "gemini-2%flash%"));
24032 assert!(sql_like_match("gemini-2-flash", "gemini-2%flash%"));
24033 assert!(!sql_like_match("gemini-2-pro", "gemini-2%flash%"));
24034
24035 assert!(sql_like_match("hello", "hello"));
24037 assert!(!sql_like_match("hello!", "hello"));
24038
24039 assert!(sql_like_match("gpt-4o", "gpt-4_"));
24041 assert!(!sql_like_match("gpt-4oo", "gpt-4_"));
24042
24043 assert!(sql_like_match("Claude-Opus-4", "claude-opus-4%"));
24045 }
24046
24047 #[test]
24048 fn date_str_to_day_id_converts_correctly() {
24049 assert_eq!(date_str_to_day_id("2025-10-01").unwrap(), 2100);
24051 assert_eq!(date_str_to_day_id("2024-04-01").unwrap(), 1552);
24053 assert!(date_str_to_day_id("invalid").is_err());
24054 }
24055
24056 #[test]
24057 fn pricing_table_lookup_selects_matching_entry() {
24058 let effective_day = date_str_to_day_id("2025-10-01").unwrap();
24059 let lookup_day = date_str_to_day_id("2026-02-06").unwrap();
24060 let table = PricingTable {
24061 entries: vec![
24062 PricingEntry {
24063 model_pattern: "claude-opus-4%".into(),
24064 provider: "anthropic".into(),
24065 input_cost_per_mtok: 15.0,
24066 output_cost_per_mtok: 75.0,
24067 cache_read_cost_per_mtok: Some(1.5),
24068 cache_creation_cost_per_mtok: Some(18.75),
24069 effective_day_id: effective_day,
24070 },
24071 PricingEntry {
24072 model_pattern: "claude-sonnet-4%".into(),
24073 provider: "anthropic".into(),
24074 input_cost_per_mtok: 3.0,
24075 output_cost_per_mtok: 15.0,
24076 cache_read_cost_per_mtok: Some(0.3),
24077 cache_creation_cost_per_mtok: Some(3.75),
24078 effective_day_id: effective_day,
24079 },
24080 ],
24081 };
24082
24083 let result = table.lookup("claude-opus-4-20260101", lookup_day);
24084 assert!(result.is_some());
24085 assert_eq!(result.unwrap().input_cost_per_mtok, 15.0);
24086
24087 let result = table.lookup("claude-sonnet-4-latest", lookup_day);
24088 assert!(result.is_some());
24089 assert_eq!(result.unwrap().input_cost_per_mtok, 3.0);
24090
24091 assert!(table.lookup("unknown-model", lookup_day).is_none());
24092 }
24093
24094 #[test]
24095 fn pricing_table_lookup_respects_effective_date() {
24096 let effective_day_1 = date_str_to_day_id("2025-10-01").unwrap();
24097 let effective_day_2 = date_str_to_day_id("2026-01-01").unwrap();
24098 let table = PricingTable {
24099 entries: vec![
24100 PricingEntry {
24101 model_pattern: "claude-opus-4%".into(),
24102 provider: "anthropic".into(),
24103 input_cost_per_mtok: 15.0,
24104 output_cost_per_mtok: 75.0,
24105 cache_read_cost_per_mtok: None,
24106 cache_creation_cost_per_mtok: None,
24107 effective_day_id: effective_day_1,
24108 },
24109 PricingEntry {
24110 model_pattern: "claude-opus-4%".into(),
24111 provider: "anthropic".into(),
24112 input_cost_per_mtok: 12.0,
24113 output_cost_per_mtok: 60.0,
24114 cache_read_cost_per_mtok: None,
24115 cache_creation_cost_per_mtok: None,
24116 effective_day_id: effective_day_2,
24117 },
24118 ],
24119 };
24120
24121 let result = table.lookup("claude-opus-4", date_str_to_day_id("2025-11-01").unwrap());
24123 assert!(result.is_some());
24124 assert_eq!(result.unwrap().input_cost_per_mtok, 15.0);
24125
24126 let result = table.lookup("claude-opus-4", date_str_to_day_id("2026-02-01").unwrap());
24128 assert!(result.is_some());
24129 assert_eq!(result.unwrap().input_cost_per_mtok, 12.0);
24130
24131 assert!(
24133 table
24134 .lookup("claude-opus-4", date_str_to_day_id("2024-01-01").unwrap())
24135 .is_none()
24136 );
24137 }
24138
24139 #[test]
24140 fn pricing_table_lookup_specificity_tiebreak() {
24141 let effective_day = date_str_to_day_id("2025-01-01").unwrap();
24142 let lookup_day = date_str_to_day_id("2026-01-01").unwrap();
24143 let table = PricingTable {
24144 entries: vec![
24145 PricingEntry {
24146 model_pattern: "gpt-4%".into(),
24147 provider: "openai".into(),
24148 input_cost_per_mtok: 10.0,
24149 output_cost_per_mtok: 30.0,
24150 cache_read_cost_per_mtok: None,
24151 cache_creation_cost_per_mtok: None,
24152 effective_day_id: effective_day,
24153 },
24154 PricingEntry {
24155 model_pattern: "gpt-4-turbo%".into(),
24156 provider: "openai".into(),
24157 input_cost_per_mtok: 5.0,
24158 output_cost_per_mtok: 15.0,
24159 cache_read_cost_per_mtok: None,
24160 cache_creation_cost_per_mtok: None,
24161 effective_day_id: effective_day,
24162 },
24163 ],
24164 };
24165
24166 let result = table.lookup("gpt-4-turbo-2025", lookup_day);
24168 assert!(result.is_some());
24169 assert_eq!(result.unwrap().input_cost_per_mtok, 5.0);
24170
24171 let result = table.lookup("gpt-4o", lookup_day);
24173 assert!(result.is_some());
24174 assert_eq!(result.unwrap().input_cost_per_mtok, 10.0);
24175 }
24176
24177 #[test]
24178 fn pricing_table_compute_cost_basic() {
24179 let effective_day = date_str_to_day_id("2025-10-01").unwrap();
24180 let table = PricingTable {
24181 entries: vec![PricingEntry {
24182 model_pattern: "claude-opus-4%".into(),
24183 provider: "anthropic".into(),
24184 input_cost_per_mtok: 15.0,
24185 output_cost_per_mtok: 75.0,
24186 cache_read_cost_per_mtok: Some(1.5),
24187 cache_creation_cost_per_mtok: Some(18.75),
24188 effective_day_id: effective_day,
24189 }],
24190 };
24191
24192 let cost = table.compute_cost(
24193 Some("claude-opus-4-latest"),
24194 date_str_to_day_id("2026-02-06").unwrap(),
24195 Some(1000),
24196 Some(500),
24197 None,
24198 None,
24199 );
24200 assert!(cost.is_some());
24201 assert!((cost.unwrap() - 0.0525).abs() < 1e-10);
24203 }
24204
24205 #[test]
24206 fn pricing_table_compute_cost_with_cache() {
24207 let effective_day = date_str_to_day_id("2025-10-01").unwrap();
24208 let table = PricingTable {
24209 entries: vec![PricingEntry {
24210 model_pattern: "claude-opus-4%".into(),
24211 provider: "anthropic".into(),
24212 input_cost_per_mtok: 15.0,
24213 output_cost_per_mtok: 75.0,
24214 cache_read_cost_per_mtok: Some(1.5),
24215 cache_creation_cost_per_mtok: Some(18.75),
24216 effective_day_id: effective_day,
24217 }],
24218 };
24219
24220 let cost = table.compute_cost(
24221 Some("claude-opus-4-latest"),
24222 date_str_to_day_id("2026-02-06").unwrap(),
24223 Some(1_000_000),
24224 Some(100_000),
24225 Some(500_000),
24226 Some(200_000),
24227 );
24228 assert!(cost.is_some());
24229 assert!((cost.unwrap() - 16.5).abs() < 1e-10);
24235 }
24236
24237 #[test]
24238 fn pricing_table_compute_cost_returns_none_for_unknown_model() {
24239 let effective_day = date_str_to_day_id("2025-10-01").unwrap();
24240 let lookup_day = date_str_to_day_id("2026-02-06").unwrap();
24241 let table = PricingTable {
24242 entries: vec![PricingEntry {
24243 model_pattern: "claude-opus-4%".into(),
24244 provider: "anthropic".into(),
24245 input_cost_per_mtok: 15.0,
24246 output_cost_per_mtok: 75.0,
24247 cache_read_cost_per_mtok: None,
24248 cache_creation_cost_per_mtok: None,
24249 effective_day_id: effective_day,
24250 }],
24251 };
24252
24253 assert!(
24254 table
24255 .compute_cost(
24256 Some("unknown-model"),
24257 lookup_day,
24258 Some(1000),
24259 Some(500),
24260 None,
24261 None
24262 )
24263 .is_none()
24264 );
24265 assert!(
24266 table
24267 .compute_cost(None, lookup_day, Some(1000), Some(500), None, None)
24268 .is_none()
24269 );
24270 assert!(
24271 table
24272 .compute_cost(Some("claude-opus-4"), lookup_day, None, None, None, None)
24273 .is_none()
24274 );
24275 }
24276
24277 #[test]
24278 fn pricing_table_load_from_db() {
24279 let dir = TempDir::new().unwrap();
24280 let db_path = dir.path().join("test.db");
24281 let storage = SqliteStorage::open(&db_path).unwrap();
24282
24283 let table = PricingTable::load(&storage.conn).unwrap();
24284 assert!(!table.is_empty());
24285
24286 let lookup_day = date_str_to_day_id("2026-02-06").unwrap();
24287
24288 let opus = table.lookup("claude-opus-4-latest", lookup_day);
24289 assert!(opus.is_some());
24290 assert_eq!(opus.unwrap().input_cost_per_mtok, 15.0);
24291
24292 let flash = table.lookup("gemini-2.0-flash-001", lookup_day);
24293 assert!(flash.is_some());
24294 assert_eq!(flash.unwrap().input_cost_per_mtok, 0.075);
24295 }
24296
24297 #[test]
24298 fn pricing_table_load_rejects_invalid_effective_date() {
24299 let dir = TempDir::new().unwrap();
24300 let db_path = dir.path().join("test.db");
24301 let storage = SqliteStorage::open(&db_path).unwrap();
24302
24303 storage
24304 .conn
24305 .execute_compat(
24306 "INSERT INTO model_pricing (
24307 model_pattern, provider, input_cost_per_mtok, output_cost_per_mtok,
24308 cache_read_cost_per_mtok, cache_creation_cost_per_mtok, effective_date
24309 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
24310 fparams![
24311 "broken-model%",
24312 "test",
24313 1.0_f64,
24314 2.0_f64,
24315 Option::<f64>::None,
24316 Option::<f64>::None,
24317 "not-a-date"
24318 ],
24319 )
24320 .unwrap();
24321
24322 let err = PricingTable::load(&storage.conn).unwrap_err();
24323 assert!(err.to_string().contains("invalid effective_date"));
24324 }
24325
24326 #[test]
24327 fn pricing_diagnostics_tracks_coverage() {
24328 let mut diag = PricingDiagnostics::default();
24329 diag.record_priced();
24330 diag.record_priced();
24331 diag.record_unpriced(Some("custom-model-v1"));
24332 diag.record_unpriced(Some("custom-model-v1"));
24333 diag.record_unpriced(None);
24334
24335 assert_eq!(diag.priced_count, 2);
24336 assert_eq!(diag.unpriced_count, 3);
24337 assert_eq!(diag.unknown_models.len(), 2);
24338 assert_eq!(diag.unknown_models["custom-model-v1"], 2);
24339 assert_eq!(diag.unknown_models["(none)"], 1);
24340 }
24341
24342 fn franken_storage_in_memory() -> FrankenStorage {
24352 let conn = FrankenConnection::open(":memory:").unwrap();
24353 let storage = FrankenStorage::new(conn, PathBuf::from(":memory:"));
24354 storage.run_migrations().unwrap();
24355 storage.apply_config().unwrap();
24356 storage
24357 }
24358
24359 #[test]
24360 fn franken_migrations_create_all_tables() {
24361 let storage = franken_storage_in_memory();
24362
24363 let version = storage.schema_version().unwrap();
24365 assert_eq!(
24366 version, CURRENT_SCHEMA_VERSION,
24367 "fresh FrankenStorage should be at current schema version"
24368 );
24369
24370 let rows = storage
24372 .raw()
24373 .query("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;")
24374 .unwrap();
24375 let table_names: Vec<String> = rows
24376 .iter()
24377 .filter_map(|r| r.get_typed::<String>(0).ok())
24378 .collect();
24379
24380 for required in [
24381 "meta",
24382 "agents",
24383 "workspaces",
24384 "conversations",
24385 "messages",
24386 "snippets",
24387 "tags",
24388 "conversation_tags",
24389 ] {
24390 assert!(
24391 table_names.contains(&required.to_string()),
24392 "missing table: {required}"
24393 );
24394 }
24395
24396 assert!(
24398 table_names.contains(&"sources".to_string()),
24399 "missing sources table"
24400 );
24401
24402 assert!(
24404 table_names.contains(&"daily_stats".to_string()),
24405 "missing daily_stats table"
24406 );
24407
24408 assert!(
24410 table_names.contains(&"embedding_jobs".to_string()),
24411 "missing embedding_jobs table"
24412 );
24413
24414 for analytics_table in ["message_metrics", "usage_hourly", "usage_daily"] {
24416 assert!(
24417 table_names.contains(&analytics_table.to_string()),
24418 "missing table: {analytics_table}"
24419 );
24420 }
24421 assert!(
24422 table_names.contains(&"conversation_tail_state".to_string()),
24423 "missing conversation_tail_state table"
24424 );
24425 assert!(
24426 table_names.contains(&"conversation_external_lookup".to_string()),
24427 "missing conversation_external_lookup table"
24428 );
24429 assert!(
24430 table_names.contains(&"conversation_external_tail_lookup".to_string()),
24431 "missing conversation_external_tail_lookup table"
24432 );
24433
24434 let rows = storage
24437 .raw()
24438 .query("SELECT COUNT(*) FROM _schema_migrations;")
24439 .unwrap();
24440 let count: i64 = rows.first().unwrap().get_typed(0).unwrap();
24441 assert_eq!(
24442 count,
24443 (13..=CURRENT_SCHEMA_VERSION).count() as i64,
24444 "_schema_migrations should record the V13 base schema and post-V13 migrations"
24445 );
24446
24447 let rows = storage
24449 .raw()
24450 .query("SELECT version FROM _schema_migrations ORDER BY version;")
24451 .unwrap();
24452 let versions: Vec<i64> = rows
24453 .iter()
24454 .map(|row| row.get_typed(0))
24455 .collect::<std::result::Result<_, _>>()
24456 .unwrap();
24457 assert_eq!(
24458 versions,
24459 (13..=CURRENT_SCHEMA_VERSION).collect::<Vec<i64>>(),
24460 "_schema_migrations should contain v13 through current"
24461 );
24462 }
24463
24464 #[test]
24465 fn franken_migrations_idempotent() {
24466 let storage = franken_storage_in_memory();
24467 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24468
24469 storage.run_migrations().unwrap();
24471 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24472 }
24473
24474 #[test]
24475 fn migration_v20_backfills_conversation_external_tail_lookup() {
24476 let storage = franken_storage_in_memory();
24477 let agent_id = storage
24478 .ensure_agent(&Agent {
24479 id: None,
24480 slug: "codex".into(),
24481 name: "Codex".into(),
24482 version: None,
24483 kind: AgentKind::Cli,
24484 })
24485 .unwrap();
24486 let workspace_id = storage
24487 .ensure_workspace(&PathBuf::from("/ws/profiled-storage-remote"), None)
24488 .unwrap();
24489 let mut conv = make_profiled_storage_remote_conversation(1919, 2);
24490 conv.source_id = "profiled-storage-remote-source-東京".into();
24491 conv.external_id = Some("profiled-storage-remote-☃-1919".into());
24492 let outcome = storage
24493 .insert_conversation_tree(agent_id, Some(workspace_id), &conv)
24494 .unwrap();
24495 let external_id = conv.external_id.as_deref().unwrap();
24496 let lookup_key = conversation_external_lookup_key(&conv.source_id, agent_id, external_id);
24497
24498 storage
24499 .raw()
24500 .execute("DELETE FROM conversation_external_tail_lookup")
24501 .unwrap();
24502 storage
24503 .raw()
24504 .execute("DELETE FROM _schema_migrations WHERE version = 20")
24505 .unwrap();
24506 storage
24507 .raw()
24508 .execute_compat(
24509 "UPDATE meta SET value = ?1 WHERE key = 'schema_version'",
24510 fparams!["19"],
24511 )
24512 .unwrap();
24513
24514 storage.run_migrations().unwrap();
24515
24516 let backfilled: (i64, Option<i64>, Option<i64>, Option<i64>) = storage
24517 .raw()
24518 .query_row_map(
24519 "SELECT conversation_id, ended_at, last_message_idx, last_message_created_at
24520 FROM conversation_external_tail_lookup
24521 WHERE lookup_key = ?1",
24522 fparams![lookup_key.as_str()],
24523 |row| {
24524 Ok((
24525 row.get_typed(0)?,
24526 row.get_typed(1)?,
24527 row.get_typed(2)?,
24528 row.get_typed(3)?,
24529 ))
24530 },
24531 )
24532 .unwrap();
24533 assert_eq!(
24534 backfilled,
24535 (
24536 outcome.conversation_id,
24537 conv.ended_at,
24538 Some(1),
24539 conv.messages[1].created_at
24540 )
24541 );
24542 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24543 }
24544
24545 #[test]
24546 fn migration_v15_creates_lazy_tail_state_cache() {
24547 let conn = FrankenConnection::open(":memory:").unwrap();
24548 conn.execute_batch(
24549 "CREATE TABLE conversations (
24550 id INTEGER PRIMARY KEY,
24551 ended_at INTEGER
24552 );
24553 CREATE TABLE messages (
24554 id INTEGER PRIMARY KEY,
24555 conversation_id INTEGER NOT NULL,
24556 idx INTEGER NOT NULL,
24557 created_at INTEGER
24558 );
24559 INSERT INTO conversations(id, ended_at) VALUES
24560 (1, 1710000000300),
24561 (2, NULL);
24562 INSERT INTO messages(id, conversation_id, idx, created_at) VALUES
24563 (10, 1, 0, 1710000000100),
24564 (11, 1, 1, 1710000000200),
24565 (12, 2, 0, 1710000000400);",
24566 )
24567 .unwrap();
24568
24569 conn.execute(
24570 "CREATE TABLE _schema_migrations (
24571 version INTEGER PRIMARY KEY,
24572 name TEXT NOT NULL,
24573 applied_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now'))
24574 );",
24575 )
24576 .unwrap();
24577
24578 assert!(
24579 apply_conversation_tail_state_cache_migration(&conn).unwrap(),
24580 "v15 migration should apply once"
24581 );
24582 assert!(
24583 !apply_conversation_tail_state_cache_migration(&conn).unwrap(),
24584 "v15 migration should be idempotent once recorded"
24585 );
24586
24587 let columns = conn.query("PRAGMA table_info(conversations);").unwrap();
24588 let column_names: HashSet<String> = columns
24589 .iter()
24590 .map(|row| row.get_typed(1))
24591 .collect::<std::result::Result<_, frankensqlite::FrankenError>>()
24592 .unwrap();
24593 assert!(column_names.contains("last_message_idx"));
24594 assert!(column_names.contains("last_message_created_at"));
24595
24596 let tail_rows: i64 = conn
24597 .query("SELECT COUNT(*) FROM conversation_tail_state;")
24598 .unwrap()
24599 .first()
24600 .unwrap()
24601 .get_typed(0)
24602 .unwrap();
24603 assert_eq!(
24604 tail_rows, 0,
24605 "v15 should create the cache without an open-time message scan"
24606 );
24607
24608 let applied: i64 = conn
24609 .query("SELECT COUNT(*) FROM _schema_migrations WHERE version = 15;")
24610 .unwrap()
24611 .first()
24612 .unwrap()
24613 .get_typed(0)
24614 .unwrap();
24615 assert_eq!(applied, 1);
24616 }
24617
24618 #[test]
24619 fn schema_repair_adds_missing_conversations_token_columns() {
24620 let conn = FrankenConnection::open(":memory:").unwrap();
24621 conn.execute_batch(
24622 "CREATE TABLE conversations (
24623 id INTEGER PRIMARY KEY,
24624 agent_id INTEGER NOT NULL,
24625 source_path TEXT NOT NULL
24626 );",
24627 )
24628 .unwrap();
24629 let storage = FrankenStorage::new(conn, std::path::PathBuf::from(":memory:"));
24630
24631 storage.repair_missing_conversation_token_columns().unwrap();
24632 storage.repair_missing_conversation_token_columns().unwrap();
24633
24634 let columns = franken_table_column_names(&storage.conn, "conversations").unwrap();
24635 for &(column_name, _) in REQUIRED_CONVERSATION_TOKEN_COLUMNS {
24636 assert!(
24637 columns.contains(column_name),
24638 "schema repair should add conversations.{column_name}"
24639 );
24640 }
24641 }
24642
24643 #[test]
24644 fn franken_meta_schema_version_in_sync() {
24645 let storage = franken_storage_in_memory();
24646
24647 let rows = storage
24649 .raw()
24650 .query("SELECT value FROM meta WHERE key = 'schema_version';")
24651 .unwrap();
24652 let meta_version: String = rows.first().unwrap().get_typed(0).unwrap();
24653 assert_eq!(
24654 meta_version,
24655 CURRENT_SCHEMA_VERSION.to_string(),
24656 "meta.schema_version should match CURRENT_SCHEMA_VERSION"
24657 );
24658 }
24659
24660 #[test]
24661 fn franken_transition_from_meta_version() {
24662 let dir = TempDir::new().unwrap();
24663 let db_path = dir.path().join("test_transition.db");
24664
24665 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24668 conn.execute("CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);")
24669 .unwrap();
24670 conn.execute("INSERT INTO meta(key, value) VALUES('schema_version', '10');")
24671 .unwrap();
24672 conn.execute("CREATE TABLE conversations (id INTEGER PRIMARY KEY);")
24674 .unwrap();
24675 drop(conn);
24676
24677 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24679 transition_from_meta_version(&conn).unwrap();
24680
24681 let rows = conn
24685 .query("SELECT version FROM _schema_migrations ORDER BY version;")
24686 .unwrap();
24687 let versions: Vec<i64> = rows.iter().filter_map(|r| r.get_typed(0).ok()).collect();
24688 assert_eq!(
24689 versions,
24690 (1..=13).collect::<Vec<i64>>(),
24691 "transition should bridge legacy V10 databases through the combined V13 base marker"
24692 );
24693 }
24694
24695 #[test]
24696 fn franken_transition_from_current_meta_backfills_current_schema_marker() {
24697 let dir = TempDir::new().unwrap();
24698 let db_path = dir.path().join("test_current_transition.db");
24699
24700 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24701 conn.execute("CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);")
24702 .unwrap();
24703 conn.execute_compat(
24704 "INSERT INTO meta(key, value) VALUES('schema_version', ?1);",
24705 &[ParamValue::from(CURRENT_SCHEMA_VERSION.to_string())],
24706 )
24707 .unwrap();
24708 conn.execute("CREATE TABLE conversations (id INTEGER PRIMARY KEY);")
24709 .unwrap();
24710 drop(conn);
24711
24712 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24713 transition_from_meta_version(&conn).unwrap();
24714
24715 let rows = conn
24716 .query("SELECT version FROM _schema_migrations ORDER BY version;")
24717 .unwrap();
24718 let versions: Vec<i64> = rows.iter().filter_map(|r| r.get_typed(0).ok()).collect();
24719 assert_eq!(
24720 versions,
24721 (1..=CURRENT_SCHEMA_VERSION).collect::<Vec<i64>>(),
24722 "current meta schema marker should backfill every known migration"
24723 );
24724 }
24725
24726 #[test]
24727 fn franken_transition_skips_when_already_done() {
24728 let dir = TempDir::new().unwrap();
24729 let db_path = dir.path().join("test_transition_skip.db");
24730
24731 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24733 conn.execute(
24734 "CREATE TABLE _schema_migrations (version INTEGER PRIMARY KEY, name TEXT NOT NULL, applied_at TEXT NOT NULL DEFAULT 'now');",
24735 ).unwrap();
24736 conn.execute("INSERT INTO _schema_migrations (version, name) VALUES (1, 'test');")
24737 .unwrap();
24738
24739 transition_from_meta_version(&conn).unwrap();
24741
24742 let rows = conn
24744 .query("SELECT COUNT(*) FROM _schema_migrations;")
24745 .unwrap();
24746 let count: i64 = rows.first().unwrap().get_typed(0).unwrap();
24747 assert_eq!(
24748 count, 1,
24749 "transition should not re-run on already-transitioned DB"
24750 );
24751 }
24752
24753 #[test]
24754 fn franken_transition_fresh_db_is_noop() {
24755 let dir = TempDir::new().unwrap();
24756 let db_path = dir.path().join("test_fresh_noop.db");
24757
24758 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24760 transition_from_meta_version(&conn).unwrap();
24761
24762 let res = conn.query("SELECT * FROM \"_schema_migrations\";");
24764 assert!(
24765 res.is_err(),
24766 "transition should not create _schema_migrations on fresh DB"
24767 );
24768 }
24769
24770 #[test]
24771 fn franken_transition_with_fts_virtual_table_succeeds() {
24772 let dir = TempDir::new().unwrap();
24773 let db_path = dir.path().join("test_transition_with_fts.db");
24774
24775 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
24776 conn.execute_batch(
24777 "CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);
24778 INSERT INTO meta(key, value) VALUES('schema_version', '13');
24779 CREATE TABLE conversations (id INTEGER PRIMARY KEY);
24780 CREATE VIRTUAL TABLE fts_messages USING fts5(
24781 content,
24782 title,
24783 agent,
24784 workspace,
24785 source_path,
24786 created_at,
24787 content='',
24788 tokenize='porter unicode61'
24789 );",
24790 )
24791 .unwrap();
24792 drop(conn);
24793
24794 let conn = FrankenConnection::open(db_path.to_string_lossy().to_string()).unwrap();
24795 transition_from_meta_version(&conn).unwrap();
24796
24797 let rows = conn
24798 .query("SELECT version FROM _schema_migrations ORDER BY version;")
24799 .unwrap();
24800 let versions: Vec<i64> = rows.iter().filter_map(|r| r.get_typed(0).ok()).collect();
24801 assert_eq!(versions, (1..=13).collect::<Vec<i64>>());
24802 }
24803
24804 #[test]
24805 fn franken_storage_open_legacy_v13_with_fts_virtual_table_succeeds() {
24806 let dir = TempDir::new().unwrap();
24807 let db_path = dir.path().join("test_open_legacy_v13_with_fts.db");
24808
24809 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
24810 conn.execute_batch(
24811 "CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL);
24812 INSERT INTO meta(key, value) VALUES('schema_version', '13');
24813 CREATE TABLE agents (
24814 id INTEGER PRIMARY KEY,
24815 slug TEXT NOT NULL
24816 );
24817 CREATE TABLE workspaces (
24818 id INTEGER PRIMARY KEY,
24819 path TEXT NOT NULL
24820 );
24821 CREATE TABLE sources (
24822 id TEXT PRIMARY KEY,
24823 kind TEXT NOT NULL,
24824 host_label TEXT,
24825 machine_id TEXT,
24826 platform TEXT,
24827 config_json TEXT,
24828 created_at INTEGER NOT NULL,
24829 updated_at INTEGER NOT NULL
24830 );
24831 CREATE TABLE conversations (
24832 id INTEGER PRIMARY KEY,
24833 agent_id INTEGER NOT NULL,
24834 workspace_id INTEGER,
24835 source_id TEXT NOT NULL DEFAULT 'local',
24836 external_id TEXT,
24837 title TEXT,
24838 source_path TEXT NOT NULL,
24839 started_at INTEGER,
24840 ended_at INTEGER
24841 );
24842 CREATE TABLE messages (
24843 id INTEGER PRIMARY KEY,
24844 conversation_id INTEGER NOT NULL,
24845 idx INTEGER NOT NULL,
24846 role TEXT NOT NULL,
24847 author TEXT,
24848 created_at INTEGER,
24849 content TEXT NOT NULL,
24850 extra_json TEXT,
24851 extra_bin BLOB
24852 );
24853 INSERT INTO agents(id, slug) VALUES (1, 'codex');
24854 INSERT INTO workspaces(id, path) VALUES (1, '/data/projects/coding_agent_session_search');
24855 INSERT INTO sources(id, kind, host_label, created_at, updated_at)
24856 VALUES ('local', 'local', NULL, 1710000000000, 1710000000000);
24857 INSERT INTO conversations(
24858 id,
24859 agent_id,
24860 workspace_id,
24861 source_id,
24862 external_id,
24863 title,
24864 source_path,
24865 started_at
24866 )
24867 VALUES (
24868 1,
24869 1,
24870 1,
24871 'local',
24872 'legacy-session',
24873 'legacy session',
24874 '/tmp/legacy.jsonl',
24875 1710000000000
24876 );
24877 INSERT INTO messages(id, conversation_id, idx, role, author, created_at, content)
24878 VALUES (1, 1, 0, 'user', 'tester', 1710000000000, 'legacy content');
24879 CREATE VIRTUAL TABLE fts_messages USING fts5(
24880 content,
24881 title,
24882 agent,
24883 workspace,
24884 source_path,
24885 created_at,
24886 message_id,
24887 content='',
24888 tokenize='porter unicode61'
24889 );",
24890 )
24891 .unwrap();
24892 drop(conn);
24893
24894 let storage = FrankenStorage::open(&db_path).unwrap();
24895 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24896
24897 let rows = storage
24898 .raw()
24899 .query("SELECT version FROM _schema_migrations ORDER BY version;")
24900 .unwrap();
24901 let versions: Vec<i64> = rows.iter().filter_map(|r| r.get_typed(0).ok()).collect();
24902 assert_eq!(versions, (1..=CURRENT_SCHEMA_VERSION).collect::<Vec<i64>>());
24903 }
24904
24905 #[test]
24906 fn franken_storage_open_repairs_duplicate_fts_messages_schema_rows() {
24907 let dir = TempDir::new().unwrap();
24908 let db_path = dir.path().join("test_open_repairs_duplicate_fts_schema.db");
24909
24910 let storage = FrankenStorage::open(&db_path).unwrap();
24911 let agent = Agent {
24912 id: None,
24913 slug: "codex".into(),
24914 name: "Codex".into(),
24915 version: None,
24916 kind: AgentKind::Cli,
24917 };
24918 let agent_id = storage.ensure_agent(&agent).unwrap();
24919 let conversation = Conversation {
24920 id: None,
24921 agent_slug: "codex".into(),
24922 workspace: Some(PathBuf::from("/tmp/workspace")),
24923 external_id: Some("dup-fts-schema".into()),
24924 title: Some("Duplicate FTS schema".into()),
24925 source_path: PathBuf::from("/tmp/dup-fts-schema.jsonl"),
24926 started_at: Some(1_700_000_000_000),
24927 ended_at: Some(1_700_000_000_100),
24928 approx_tokens: Some(42),
24929 metadata_json: serde_json::Value::Null,
24930 messages: vec![Message {
24931 id: None,
24932 idx: 0,
24933 role: MessageRole::User,
24934 author: Some("user".into()),
24935 created_at: Some(1_700_000_000_050),
24936 content: "message that should remain queryable".into(),
24937 extra_json: serde_json::Value::Null,
24938 snippets: Vec::new(),
24939 }],
24940 source_id: LOCAL_SOURCE_ID.into(),
24941 origin_host: None,
24942 };
24943 storage
24944 .insert_conversation_tree(agent_id, None, &conversation)
24945 .unwrap();
24946 drop(storage);
24947 materialize_fresh_fts_schema_via_rusqlite(&db_path).unwrap();
24948
24949 let duplicate_legacy_fts_sql = "CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, message_id UNINDEXED, tokenize='porter')";
24950 let conn = rusqlite_test_fixture_conn(&db_path);
24951 conn.execute_batch("PRAGMA writable_schema = ON;").unwrap();
24952 conn.execute(
24953 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
24954 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
24955 [duplicate_legacy_fts_sql],
24956 )
24957 .unwrap();
24958 conn.execute(
24959 "DELETE FROM meta WHERE key = ?1",
24960 [FTS_FRANKEN_REBUILD_META_KEY],
24961 )
24962 .unwrap();
24963 conn.execute_batch("PRAGMA writable_schema = OFF;").unwrap();
24966
24967 let duplicate_rows: i64 = conn
24968 .query_row(
24969 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'fts_messages'",
24970 [],
24971 |row| row.get(0),
24972 )
24973 .unwrap();
24974 assert_eq!(duplicate_rows, 2);
24975 drop(conn);
24976
24977 let reopened = FrankenStorage::open(&db_path).unwrap();
24978 assert_eq!(reopened.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
24979 let generation_rows: Vec<String> = reopened
24980 .raw()
24981 .query_map_collect(
24982 "SELECT value FROM meta WHERE key = ?1",
24983 fparams![FTS_FRANKEN_REBUILD_META_KEY],
24984 |row| row.get_typed(0),
24985 )
24986 .unwrap();
24987 assert_eq!(
24988 generation_rows.len(),
24989 0,
24990 "canonical open should not eagerly rewrite FTS repair metadata"
24991 );
24992 reopened.ensure_search_fallback_fts_consistency().unwrap();
24993 let repaired = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
24994 assert_eq!(franken_fts_schema_rows(&repaired).unwrap(), 1);
24995
24996 let total_messages: i64 = reopened
24997 .raw()
24998 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
24999 row.get_typed(0)
25000 })
25001 .unwrap();
25002 let total_fts_rows: i64 = reopened
25003 .raw()
25004 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
25005 row.get_typed(0)
25006 })
25007 .unwrap();
25008 assert_eq!(total_fts_rows, total_messages);
25009 }
25010
25011 #[test]
25012 fn fts_messages_integrity_reports_missing_shadow_tables() {
25013 let dir = TempDir::new().unwrap();
25014 let healthy_db_path = dir.path().join("healthy_fts.db");
25015
25016 {
25017 let storage = FrankenStorage::open(&healthy_db_path).unwrap();
25018 storage.ensure_search_fallback_fts_consistency().unwrap();
25019 storage
25020 .validate_fts_messages_integrity()
25021 .expect("freshly materialized fts_messages should pass integrity validation");
25022 }
25023
25024 let corrupt_db_path = dir.path().join("test_corrupt_fts_missing_shadows.db");
25025 {
25026 let conn = rusqlite_test_fixture_conn(&corrupt_db_path);
25027 conn.execute("CREATE TABLE schema_anchor(id INTEGER PRIMARY KEY)", [])
25028 .unwrap();
25029 let orphaned_fts_sql = "CREATE VIRTUAL TABLE fts_messages USING fts5(content, title, agent, workspace, source_path, created_at UNINDEXED, message_id UNINDEXED, tokenize='porter')";
25030 conn.execute_batch("PRAGMA writable_schema = ON;").unwrap();
25031 conn.execute(
25032 "INSERT INTO sqlite_master(type, name, tbl_name, rootpage, sql)
25033 VALUES('table', 'fts_messages', 'fts_messages', 0, ?1)",
25034 [orphaned_fts_sql],
25035 )
25036 .unwrap();
25037 conn.execute_batch("PRAGMA writable_schema = OFF;").unwrap();
25038 }
25039
25040 let open_err = FrankenConnection::open(corrupt_db_path.to_string_lossy().to_string())
25041 .expect_err("orphaned fts_messages schema should fail during connection open");
25042 let integrity = fts_messages_integrity_error_from_message(open_err.to_string())
25043 .expect("open-time FTS corruption should map to the typed FTS integrity kind");
25044 assert_eq!(integrity.missing_shadow_tables(), &["fts_messages_content"]);
25045 let rendered = integrity.to_string();
25046 assert!(
25047 rendered.contains("fts_messages")
25048 && rendered.contains("required FTS5 shadow tables")
25049 && rendered.contains("fts_messages_content"),
25050 "error should be an operator-facing FTS corruption diagnosis: {rendered}"
25051 );
25052 }
25053
25054 #[test]
25055 fn franken_storage_open_fresh_db_keeps_single_franken_fts_schema_row() {
25056 let dir = TempDir::new().unwrap();
25057 let db_path = dir.path().join("fresh-franken-storage-open.db");
25058
25059 let storage = FrankenStorage::open(&db_path).unwrap();
25060 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
25061
25062 storage
25069 .ensure_search_fallback_fts_consistency()
25070 .expect("ensure FTS consistency after fresh open");
25071 drop(storage);
25072
25073 let c_reader = FrankenConnection::open(db_path.to_string_lossy().into_owned())
25074 .expect("open DB via frankensqlite for sqlite_master inspection");
25075 assert_eq!(
25076 franken_fts_schema_rows(&c_reader).unwrap(),
25077 1,
25078 "exactly one fts_messages schema row should exist after ensure_search_fallback_fts_consistency"
25079 );
25080 drop(c_reader);
25081
25082 let storage = FrankenStorage::open(&db_path).unwrap();
25083 assert!(
25084 storage
25085 .raw()
25086 .query("SELECT COUNT(*) FROM fts_messages")
25087 .is_ok(),
25088 "fts_messages must be queryable through frankensqlite after open"
25089 );
25090 }
25091
25092 #[test]
25093 fn franken_storage_open_repairs_missing_analytics_tables_when_version_markers_lie() {
25094 let dir = TempDir::new().unwrap();
25095 let db_path = dir.path().join("test_repair_missing_analytics.db");
25096
25097 {
25098 let storage = FrankenStorage::open(&db_path).unwrap();
25099 assert_eq!(storage.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
25100 }
25101
25102 {
25103 let conn = FrankenConnection::open(db_path.to_string_lossy().into_owned()).unwrap();
25104 for table in &[
25105 "usage_models_daily",
25106 "usage_daily",
25107 "usage_hourly",
25108 "message_metrics",
25109 "token_daily_stats",
25110 "token_usage",
25111 "model_pricing",
25112 "embedding_jobs",
25113 "daily_stats",
25114 ] {
25115 conn.execute(&format!("DROP TABLE IF EXISTS {table}"))
25116 .unwrap();
25117 }
25118 conn.execute_compat(
25119 "UPDATE meta SET value = ?1 WHERE key = 'schema_version'",
25120 &[ParamValue::from(CURRENT_SCHEMA_VERSION.to_string())],
25121 )
25122 .unwrap();
25123 }
25124
25125 let repaired = FrankenStorage::open(&db_path).unwrap();
25126 assert_eq!(repaired.schema_version().unwrap(), CURRENT_SCHEMA_VERSION);
25127
25128 let analytics_count: i64 = repaired
25129 .raw()
25130 .query_row_map(
25131 "SELECT COUNT(*) FROM sqlite_master
25132 WHERE type='table'
25133 AND name IN (
25134 'daily_stats',
25135 'embedding_jobs',
25136 'token_usage',
25137 'token_daily_stats',
25138 'model_pricing',
25139 'message_metrics',
25140 'usage_hourly',
25141 'usage_daily',
25142 'usage_models_daily'
25143 )",
25144 &[],
25145 |row| row.get_typed(0),
25146 )
25147 .unwrap();
25148 assert_eq!(
25149 analytics_count, 9,
25150 "open() should recreate the missing analytics tables even when schema_version already says current"
25151 );
25152 }
25153
25154 #[test]
25155 fn current_schema_repair_batches_cover_every_required_probe() {
25156 let missing_tables: Vec<&'static str> = REQUIRED_CURRENT_SCHEMA_TABLE_PROBES
25157 .iter()
25158 .map(|(table_name, _)| *table_name)
25159 .collect();
25160
25161 let batches = current_schema_repair_batches_for_missing_tables(&missing_tables).unwrap();
25162 let covered_tables: HashSet<&'static str> = batches
25163 .iter()
25164 .flat_map(|batch| batch.tables.iter().copied())
25165 .collect();
25166
25167 for table_name in missing_tables {
25168 assert!(
25169 covered_tables.contains(table_name),
25170 "missing repair coverage for {table_name}"
25171 );
25172 }
25173 }
25174
25175 #[test]
25176 fn current_schema_repair_batches_do_not_replay_core_schema_bootstrap() {
25177 for batch in CURRENT_SCHEMA_REPAIR_BATCHES {
25178 assert!(
25179 !batch.sql.contains("CREATE TABLE IF NOT EXISTS meta"),
25180 "repair batch {} should not recreate meta",
25181 batch.name
25182 );
25183 assert!(
25184 !batch.sql.contains("CREATE TABLE IF NOT EXISTS agents"),
25185 "repair batch {} should not recreate agents",
25186 batch.name
25187 );
25188 assert!(
25189 !batch.sql.contains("CREATE TABLE IF NOT EXISTS workspaces"),
25190 "repair batch {} should not recreate workspaces",
25191 batch.name
25192 );
25193 assert!(
25194 !batch
25195 .sql
25196 .contains("CREATE TABLE IF NOT EXISTS conversations"),
25197 "repair batch {} should not recreate conversations",
25198 batch.name
25199 );
25200 assert!(
25201 !batch.sql.contains("CREATE TABLE IF NOT EXISTS messages"),
25202 "repair batch {} should not recreate messages",
25203 batch.name
25204 );
25205 assert!(
25206 !batch.sql.contains("CREATE TABLE IF NOT EXISTS snippets"),
25207 "repair batch {} should not recreate snippets",
25208 batch.name
25209 );
25210 assert!(
25211 !batch.sql.contains("CREATE VIRTUAL TABLE fts_messages"),
25212 "repair batch {} should not recreate FTS tables",
25213 batch.name
25214 );
25215 assert!(
25216 !batch.sql.contains("DROP TABLE"),
25217 "repair batch {} should never drop tables",
25218 batch.name
25219 );
25220 }
25221 }
25222
25223 #[test]
25224 fn build_cass_migrations_applies_combined_v13() {
25225 let conn = FrankenConnection::open(":memory:").unwrap();
25226 let base_result = build_cass_migrations_before_tail_cache()
25227 .run(&conn)
25228 .unwrap();
25229 assert!(apply_conversation_tail_state_cache_migration(&conn).unwrap());
25230 let post_result = build_cass_migrations_after_tail_cache().run(&conn).unwrap();
25231
25232 assert!(base_result.was_fresh);
25233 let mut applied = base_result.applied;
25234 applied.push(15);
25235 applied.extend(post_result.applied);
25236 assert_eq!(
25237 applied,
25238 (13..=CURRENT_SCHEMA_VERSION).collect::<Vec<i64>>(),
25239 "should apply combined V13 plus additive post-V13 migrations"
25240 );
25241 let current: i64 = conn
25242 .query("SELECT MAX(version) FROM _schema_migrations;")
25243 .unwrap()
25244 .first()
25245 .unwrap()
25246 .get_typed(0)
25247 .unwrap();
25248 assert_eq!(current, CURRENT_SCHEMA_VERSION);
25249 }
25250
25251 #[test]
25252 fn franken_insert_conversations_batched_populates_analytics_rollups() {
25253 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
25254 use frankensqlite::compat::{ConnectionExt, RowExt};
25255 use std::path::PathBuf;
25256
25257 let dir = TempDir::new().unwrap();
25258 let db_path = dir.path().join("franken-index.db");
25259 let storage = FrankenStorage::open(&db_path).unwrap();
25260
25261 let agent = Agent {
25262 id: None,
25263 slug: "claude_code".into(),
25264 name: "Claude Code".into(),
25265 version: Some("1.0".into()),
25266 kind: AgentKind::Cli,
25267 };
25268 let agent_id = storage.ensure_agent(&agent).unwrap();
25269
25270 let ts_ms = 1_770_551_400_000_i64;
25271 let usage_json = serde_json::json!({
25272 "message": {
25273 "model": "claude-opus-4-6",
25274 "usage": {
25275 "input_tokens": 100,
25276 "output_tokens": 50,
25277 "cache_read_input_tokens": 25,
25278 "cache_creation_input_tokens": 10,
25279 "service_tier": "standard"
25280 }
25281 }
25282 });
25283
25284 let conv = Conversation {
25285 id: None,
25286 agent_slug: "claude_code".into(),
25287 workspace: Some(PathBuf::from("/tmp/workspace")),
25288 external_id: Some("franken-batch-upsert".into()),
25289 title: Some("Franken batch upsert".into()),
25290 source_path: PathBuf::from("/tmp/franken.jsonl"),
25291 started_at: Some(ts_ms),
25292 ended_at: Some(ts_ms + 60_000),
25293 approx_tokens: None,
25294 metadata_json: serde_json::Value::Null,
25295 messages: vec![
25296 Message {
25297 id: None,
25298 idx: 0,
25299 role: MessageRole::User,
25300 author: None,
25301 created_at: Some(ts_ms),
25302 content: "Please make a plan.".into(),
25303 extra_json: serde_json::Value::Null,
25304 snippets: vec![],
25305 },
25306 Message {
25307 id: None,
25308 idx: 1,
25309 role: MessageRole::Agent,
25310 author: None,
25311 created_at: Some(ts_ms + 30_000),
25312 content: "## Plan\n\n1. Reproduce\n2. Patch\n3. Verify".into(),
25313 extra_json: usage_json,
25314 snippets: vec![],
25315 },
25316 ],
25317 source_id: "local".into(),
25318 origin_host: None,
25319 };
25320
25321 let outcomes = storage
25322 .insert_conversations_batched(&[(agent_id, None, &conv)])
25323 .unwrap();
25324 assert_eq!(outcomes.len(), 1);
25325 assert_eq!(outcomes[0].inserted_indices, vec![0, 1]);
25326
25327 let conn = storage.raw();
25328 let daily_stats_rows: i64 = conn
25329 .query_row_map("SELECT COUNT(*) FROM daily_stats", fparams![], |row| {
25330 row.get_typed(0)
25331 })
25332 .unwrap();
25333 let token_daily_rows: i64 = conn
25334 .query_row_map(
25335 "SELECT COUNT(*) FROM token_daily_stats",
25336 fparams![],
25337 |row| row.get_typed(0),
25338 )
25339 .unwrap();
25340 let usage_daily_rows: i64 = conn
25341 .query_row_map("SELECT COUNT(*) FROM usage_daily", fparams![], |row| {
25342 row.get_typed(0)
25343 })
25344 .unwrap();
25345 let model_daily_rows: i64 = conn
25346 .query_row_map(
25347 "SELECT COUNT(*) FROM usage_models_daily",
25348 fparams![],
25349 |row| row.get_typed(0),
25350 )
25351 .unwrap();
25352
25353 assert!(daily_stats_rows > 0, "daily_stats should be populated");
25354 assert!(
25355 token_daily_rows > 0,
25356 "token_daily_stats should be populated"
25357 );
25358 assert!(usage_daily_rows > 0, "usage_daily should be populated");
25359 assert!(
25360 model_daily_rows > 0,
25361 "usage_models_daily should be populated"
25362 );
25363 }
25364
25365 #[test]
25370 fn connection_manager_creates_readers() {
25371 let dir = TempDir::new().unwrap();
25372 let db_path = dir.path().join("cm.db");
25373
25374 let fs = FrankenStorage::open(&db_path).unwrap();
25376 drop(fs);
25377
25378 let config = ConnectionManagerConfig {
25379 reader_count: 3,
25380 max_writers: 2,
25381 };
25382 let mgr = FrankenConnectionManager::new(&db_path, config).unwrap();
25383 assert_eq!(mgr.reader_count(), 3);
25384 assert_eq!(mgr.max_writers(), 2);
25385 }
25386
25387 #[test]
25388 fn connection_manager_clamps_zero_writer_limit_to_prevent_deadlock() {
25389 let dir = TempDir::new().unwrap();
25390 let db_path = dir.path().join("cm.db");
25391
25392 let fs = FrankenStorage::open(&db_path).unwrap();
25393 drop(fs);
25394
25395 let mgr = std::sync::Arc::new(
25396 FrankenConnectionManager::new(
25397 &db_path,
25398 ConnectionManagerConfig {
25399 reader_count: 0,
25400 max_writers: 0,
25401 },
25402 )
25403 .unwrap(),
25404 );
25405 assert_eq!(mgr.reader_count(), 1);
25406 assert_eq!(mgr.max_writers(), 1);
25407
25408 let (tx, rx) = std::sync::mpsc::channel();
25409 let mgr_for_thread = std::sync::Arc::clone(&mgr);
25410 std::thread::spawn(move || {
25411 let result = mgr_for_thread.writer().map(|mut guard| {
25412 guard.mark_committed();
25413 });
25414 tx.send(result.is_ok()).expect("writer result send");
25415 });
25416
25417 assert!(
25418 rx.recv_timeout(Duration::from_secs(10)).unwrap(),
25419 "writer acquisition should not block forever when configured with zero writer slots"
25420 );
25421 }
25422
25423 #[test]
25424 fn connection_manager_reader_round_robin() {
25425 let dir = TempDir::new().unwrap();
25426 let db_path = dir.path().join("cm.db");
25427
25428 let fs = FrankenStorage::open(&db_path).unwrap();
25429 drop(fs);
25430
25431 let config = ConnectionManagerConfig {
25432 reader_count: 2,
25433 max_writers: 1,
25434 };
25435 let mgr = FrankenConnectionManager::new(&db_path, config).unwrap();
25436
25437 let idx_before = mgr.reader_idx.load(std::sync::atomic::Ordering::Relaxed);
25439 let _r1 = mgr.reader();
25440 let idx_after = mgr.reader_idx.load(std::sync::atomic::Ordering::Relaxed);
25441 assert_eq!(idx_after, idx_before + 1, "reader index should advance");
25442 }
25443
25444 #[test]
25445 fn connection_manager_writer_reads_and_writes() {
25446 use frankensqlite::compat::RowExt;
25447
25448 let dir = TempDir::new().unwrap();
25449 let db_path = dir.path().join("cm.db");
25450
25451 let fs = FrankenStorage::open(&db_path).unwrap();
25452 drop(fs);
25453
25454 let mgr = FrankenConnectionManager::new(&db_path, Default::default()).unwrap();
25455
25456 {
25458 let mut guard = mgr.writer().unwrap();
25459 guard
25460 .storage()
25461 .raw()
25462 .execute("CREATE TABLE IF NOT EXISTS cm_test (id INTEGER PRIMARY KEY, val TEXT)")
25463 .unwrap();
25464 guard
25465 .storage()
25466 .raw()
25467 .execute("INSERT INTO cm_test (val) VALUES ('hello')")
25468 .unwrap();
25469 guard.mark_committed();
25470 }
25471
25472 let reader_guard = mgr.reader();
25474 let rows = reader_guard.query("SELECT val FROM cm_test").unwrap();
25475 assert_eq!(rows.len(), 1);
25476 assert_eq!(rows[0].get_typed::<String>(0).unwrap(), "hello");
25477 }
25478
25479 #[test]
25480 fn connection_manager_writer_guard_drops_releases_slot() {
25481 let dir = TempDir::new().unwrap();
25482 let db_path = dir.path().join("cm.db");
25483
25484 let fs = FrankenStorage::open(&db_path).unwrap();
25485 drop(fs);
25486
25487 let config = ConnectionManagerConfig {
25488 reader_count: 1,
25489 max_writers: 1,
25490 };
25491 let mgr = FrankenConnectionManager::new(&db_path, config).unwrap();
25492
25493 {
25495 let mut guard = mgr.writer().unwrap();
25496 guard.mark_committed();
25497 }
25498
25499 let mut guard2 = mgr.writer().unwrap();
25501 guard2.mark_committed();
25502 }
25503
25504 #[test]
25505 fn connection_manager_concurrent_writer_works() {
25506 use frankensqlite::compat::RowExt;
25507
25508 let dir = TempDir::new().unwrap();
25509 let db_path = dir.path().join("cm.db");
25510
25511 let fs = FrankenStorage::open(&db_path).unwrap();
25512 drop(fs);
25513
25514 let config = ConnectionManagerConfig {
25515 reader_count: 1,
25516 max_writers: 2,
25517 };
25518 let mgr = FrankenConnectionManager::new(&db_path, config).unwrap();
25519
25520 {
25521 let mut guard = mgr.concurrent_writer().unwrap();
25522 guard
25523 .storage()
25524 .raw()
25525 .execute("CREATE TABLE IF NOT EXISTS cm_conc (id INTEGER PRIMARY KEY, val TEXT)")
25526 .unwrap();
25527 guard
25528 .storage()
25529 .raw()
25530 .execute("INSERT INTO cm_conc (val) VALUES ('concurrent')")
25531 .unwrap();
25532 guard.mark_committed();
25533 }
25534
25535 let reader_guard = mgr.reader();
25536 let rows = reader_guard.query("SELECT val FROM cm_conc").unwrap();
25537 assert_eq!(rows.len(), 1);
25538 assert_eq!(rows[0].get_typed::<String>(0).unwrap(), "concurrent");
25539 }
25540
25541 #[test]
25542 fn connection_manager_default_config() {
25543 let config = ConnectionManagerConfig::default();
25544 assert_eq!(config.reader_count, 4);
25545 assert!(config.max_writers > 0);
25546 }
25547
25548 #[test]
25549 fn purge_agent_archive_data_removes_only_target_agent_and_rebuilds_derived_tables() {
25550 use crate::model::types::{Agent, AgentKind, Conversation, Message, MessageRole};
25551 use std::path::PathBuf;
25552
25553 fn seed_conversation(storage: &FrankenStorage, agent_slug: &str, marker: &str) {
25554 let agent = Agent {
25555 id: None,
25556 slug: agent_slug.into(),
25557 name: agent_slug.into(),
25558 version: None,
25559 kind: AgentKind::Cli,
25560 };
25561 let agent_id = storage.ensure_agent(&agent).unwrap();
25562 let conversation = Conversation {
25563 id: None,
25564 agent_slug: agent_slug.into(),
25565 workspace: Some(PathBuf::from("/tmp/workspace")),
25566 external_id: Some(format!("{agent_slug}-{marker}")),
25567 title: Some(format!("{agent_slug} {marker}")),
25568 source_path: PathBuf::from(format!("/tmp/{agent_slug}-{marker}.jsonl")),
25569 started_at: Some(1_700_000_000_000),
25570 ended_at: Some(1_700_000_000_100),
25571 approx_tokens: None,
25572 metadata_json: serde_json::Value::Null,
25573 messages: vec![
25574 Message {
25575 id: None,
25576 idx: 0,
25577 role: MessageRole::User,
25578 author: Some("user".into()),
25579 created_at: Some(1_700_000_000_010),
25580 content: format!("{agent_slug} {marker} user"),
25581 extra_json: serde_json::Value::Null,
25582 snippets: Vec::new(),
25583 },
25584 Message {
25585 id: None,
25586 idx: 1,
25587 role: MessageRole::Agent,
25588 author: Some("assistant".into()),
25589 created_at: Some(1_700_000_000_020),
25590 content: format!("{agent_slug} {marker} assistant"),
25591 extra_json: serde_json::Value::Null,
25592 snippets: Vec::new(),
25593 },
25594 ],
25595 source_id: LOCAL_SOURCE_ID.into(),
25596 origin_host: None,
25597 };
25598 storage
25599 .insert_conversation_tree(agent_id, None, &conversation)
25600 .unwrap();
25601 }
25602
25603 let dir = TempDir::new().unwrap();
25604 let db_path = dir.path().join("agent_search.db");
25605 let storage = FrankenStorage::open(&db_path).unwrap();
25606
25607 seed_conversation(&storage, "openclaw", "purge-target");
25608 seed_conversation(&storage, "codex", "keep-target");
25609
25610 let purge = storage.purge_agent_archive_data("openclaw").unwrap();
25611 assert_eq!(purge.conversations_deleted, 1);
25612 assert_eq!(purge.messages_deleted, 2);
25613
25614 storage.rebuild_fts().unwrap();
25615 storage.rebuild_analytics().unwrap();
25616 storage.rebuild_daily_stats().unwrap();
25617 storage.rebuild_token_daily_stats().unwrap();
25618
25619 let agents = storage.list_agents().unwrap();
25620 assert_eq!(agents.len(), 1);
25621 assert_eq!(agents[0].slug, "codex");
25622 assert_eq!(storage.total_conversation_count().unwrap(), 1);
25623 assert_eq!(storage.total_message_count().unwrap(), 2);
25624
25625 let fts_rows: i64 = storage
25626 .raw()
25627 .query_row_map("SELECT COUNT(*) FROM fts_messages", fparams![], |row| {
25628 row.get_typed(0)
25629 })
25630 .unwrap();
25631 assert_eq!(fts_rows, 2);
25632
25633 let total_daily_sessions: i64 = storage
25634 .raw()
25635 .query_row_map(
25636 "SELECT COALESCE(SUM(session_count), 0)
25637 FROM daily_stats
25638 WHERE agent_slug = 'all' AND source_id = 'all'",
25639 fparams![],
25640 |row| row.get_typed(0),
25641 )
25642 .unwrap();
25643 assert_eq!(total_daily_sessions, 1);
25644
25645 let openclaw_token_rows: i64 = storage
25646 .raw()
25647 .query_row_map(
25648 "SELECT COUNT(*) FROM token_daily_stats WHERE agent_slug = 'openclaw'",
25649 fparams![],
25650 |row| row.get_typed(0),
25651 )
25652 .unwrap();
25653 assert_eq!(openclaw_token_rows, 0);
25654 }
25655
25656 #[test]
25663 fn cleanup_orphan_fk_rows_removes_orphans_and_is_noop_on_clean_db() {
25664 let dir = TempDir::new().unwrap();
25665 let db_path = dir.path().join("orphan_fk_self_heal.db");
25666 let storage = FrankenStorage::open(&db_path).unwrap();
25667
25668 storage.raw().execute("PRAGMA foreign_keys = OFF").unwrap();
25671
25672 storage
25675 .raw()
25676 .execute_compat(
25677 "INSERT INTO agents(id, slug, name, kind, created_at, updated_at) \
25678 VALUES(1, 'test-agent', 'Test Agent', 'cli', 0, 0)",
25679 fparams![],
25680 )
25681 .unwrap();
25682 storage
25683 .raw()
25684 .execute_compat(
25685 "INSERT INTO conversations(id, agent_id, source_id, source_path, started_at) \
25686 VALUES(1, 1, 'local', '/tmp/real.jsonl', 0)",
25687 fparams![],
25688 )
25689 .unwrap();
25690 storage
25691 .raw()
25692 .execute_compat(
25693 "INSERT INTO messages(id, conversation_id, idx, role, content) \
25694 VALUES(1, 1, 0, 'user', 'real message')",
25695 fparams![],
25696 )
25697 .unwrap();
25698
25699 for (mid, cid, idx) in [(101_i64, 99_999_i64, 0_i64), (102, 0, 0), (103, 0, 1)] {
25703 storage
25704 .raw()
25705 .execute_compat(
25706 "INSERT INTO messages(id, conversation_id, idx, role, content) \
25707 VALUES(?1, ?2, ?3, 'user', 'orphan message')",
25708 fparams![mid, cid, idx],
25709 )
25710 .unwrap();
25711 }
25712
25713 for message_id in [1_i64, 101_i64, 102_i64] {
25718 storage
25719 .raw()
25720 .execute_compat(
25721 "INSERT INTO message_metrics(
25722 message_id, created_at_ms, hour_id, day_id, agent_slug,
25723 role, content_chars, content_tokens_est
25724 ) VALUES(?1, 0, 0, 0, 'test-agent', 'user', 13, 2)",
25725 fparams![message_id],
25726 )
25727 .unwrap();
25728 storage
25729 .raw()
25730 .execute_compat(
25731 "INSERT INTO token_usage(
25732 message_id, conversation_id, agent_id, timestamp_ms, day_id,
25733 role, content_chars
25734 ) VALUES(?1, 1, 1, 0, 0, 'user', 13)",
25735 fparams![message_id],
25736 )
25737 .unwrap();
25738 }
25739
25740 storage
25744 .raw()
25745 .execute_compat(
25746 "INSERT INTO snippets(message_id, file_path, start_line, end_line, language, snippet_text) \
25747 VALUES(99999, '/tmp/orphan-snippet.rs', 1, 2, 'rust', 'fn main() {}')",
25748 fparams![],
25749 )
25750 .unwrap();
25751
25752 storage.raw().execute("PRAGMA foreign_keys = ON").unwrap();
25753
25754 let messages_before: i64 = storage
25756 .raw()
25757 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
25758 row.get_typed(0)
25759 })
25760 .unwrap();
25761 assert_eq!(messages_before, 4); let snippets_before: i64 = storage
25763 .raw()
25764 .query_row_map("SELECT COUNT(*) FROM snippets", fparams![], |row| {
25765 row.get_typed(0)
25766 })
25767 .unwrap();
25768 assert_eq!(snippets_before, 1);
25769 let metrics_before: i64 = storage
25770 .raw()
25771 .query_row_map("SELECT COUNT(*) FROM message_metrics", fparams![], |row| {
25772 row.get_typed(0)
25773 })
25774 .unwrap();
25775 assert_eq!(metrics_before, 3);
25776 let token_usage_before: i64 = storage
25777 .raw()
25778 .query_row_map("SELECT COUNT(*) FROM token_usage", fparams![], |row| {
25779 row.get_typed(0)
25780 })
25781 .unwrap();
25782 assert_eq!(token_usage_before, 3);
25783
25784 let report = storage.cleanup_orphan_fk_rows().unwrap();
25786
25787 let messages_after: i64 = storage
25792 .raw()
25793 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
25794 row.get_typed(0)
25795 })
25796 .unwrap();
25797 assert_eq!(messages_after, 1, "real message must be preserved");
25798 let snippets_after: i64 = storage
25799 .raw()
25800 .query_row_map("SELECT COUNT(*) FROM snippets", fparams![], |row| {
25801 row.get_typed(0)
25802 })
25803 .unwrap();
25804 assert_eq!(snippets_after, 0);
25805 let metrics_after: i64 = storage
25806 .raw()
25807 .query_row_map("SELECT COUNT(*) FROM message_metrics", fparams![], |row| {
25808 row.get_typed(0)
25809 })
25810 .unwrap();
25811 assert_eq!(metrics_after, 1, "real message metric must be preserved");
25812 let token_usage_after: i64 = storage
25813 .raw()
25814 .query_row_map("SELECT COUNT(*) FROM token_usage", fparams![], |row| {
25815 row.get_typed(0)
25816 })
25817 .unwrap();
25818 assert_eq!(token_usage_after, 1, "real token row must be preserved");
25819
25820 assert_eq!(report.total, 4, "report total: {:?}", report);
25821 let messages_count = report
25822 .per_table
25823 .iter()
25824 .find(|(t, _)| *t == "messages")
25825 .map(|(_, c)| *c);
25826 assert_eq!(messages_count, Some(3));
25827 let snippets_count = report
25828 .per_table
25829 .iter()
25830 .find(|(t, _)| *t == "snippets")
25831 .map(|(_, c)| *c);
25832 assert_eq!(snippets_count, Some(1));
25833
25834 let second = storage.cleanup_orphan_fk_rows().unwrap();
25836 assert_eq!(second.total, 0);
25837 assert!(second.per_table.is_empty());
25838 }
25839
25840 #[test]
25841 fn cleanup_orphan_fk_rows_handles_more_than_one_delete_chunk() {
25842 let dir = TempDir::new().unwrap();
25843 let db_path = dir.path().join("orphan_fk_chunked_self_heal.db");
25844 let storage = FrankenStorage::open(&db_path).unwrap();
25845 let orphan_count = ORPHAN_FK_ID_CHUNK_SIZE + 3;
25846
25847 storage.raw().execute("PRAGMA foreign_keys = OFF").unwrap();
25848 {
25849 let mut tx = storage.raw().transaction().unwrap();
25850 for idx in 0..orphan_count {
25851 let message_id = 10_000_i64 + i64::try_from(idx).unwrap();
25852 let conversation_id = 20_000_i64 + i64::try_from(idx).unwrap();
25853 tx.execute_compat(
25854 "INSERT INTO messages(id, conversation_id, idx, role, content) \
25855 VALUES(?1, ?2, 0, 'user', 'orphan message')",
25856 fparams![message_id, conversation_id],
25857 )
25858 .unwrap();
25859 tx.execute_compat(
25860 "INSERT INTO message_metrics(
25861 message_id, created_at_ms, hour_id, day_id, agent_slug,
25862 role, content_chars, content_tokens_est
25863 ) VALUES(?1, 0, 0, 0, 'test-agent', 'user', 14, 2)",
25864 fparams![message_id],
25865 )
25866 .unwrap();
25867 }
25868 tx.commit().unwrap();
25869 }
25870 storage.raw().execute("PRAGMA foreign_keys = ON").unwrap();
25871
25872 let report = storage.cleanup_orphan_fk_rows().unwrap();
25873
25874 assert_eq!(report.total, i64::try_from(orphan_count).unwrap());
25875 let messages_count = report
25876 .per_table
25877 .iter()
25878 .find(|(table, _)| *table == "messages")
25879 .map(|(_, count)| *count);
25880 assert_eq!(messages_count, Some(i64::try_from(orphan_count).unwrap()));
25881 let messages_after: i64 = storage
25882 .raw()
25883 .query_row_map("SELECT COUNT(*) FROM messages", fparams![], |row| {
25884 row.get_typed(0)
25885 })
25886 .unwrap();
25887 assert_eq!(messages_after, 0);
25888 let metrics_after: i64 = storage
25889 .raw()
25890 .query_row_map("SELECT COUNT(*) FROM message_metrics", fparams![], |row| {
25891 row.get_typed(0)
25892 })
25893 .unwrap();
25894 assert_eq!(metrics_after, 0);
25895 }
25896
25897 #[test]
25898 fn cleanup_orphan_fk_rows_pages_direct_child_orphans() {
25899 let dir = TempDir::new().unwrap();
25900 let db_path = dir.path().join("direct_orphan_fk_paged_self_heal.db");
25901 let storage = FrankenStorage::open(&db_path).unwrap();
25902 let orphan_count = (ORPHAN_FK_ID_CHUNK_SIZE * 2) + 5;
25903
25904 storage.raw().execute("PRAGMA foreign_keys = OFF").unwrap();
25905 {
25906 let mut tx = storage.raw().transaction().unwrap();
25907 for idx in 0..orphan_count {
25908 let message_id = 50_000_i64 + i64::try_from(idx).unwrap();
25909 tx.execute_compat(
25910 "INSERT INTO message_metrics(
25911 message_id, created_at_ms, hour_id, day_id, agent_slug,
25912 role, content_chars, content_tokens_est
25913 ) VALUES(?1, 0, 0, 0, 'test-agent', 'user', 21, 3)",
25914 fparams![message_id],
25915 )
25916 .unwrap();
25917 }
25918 tx.commit().unwrap();
25919 }
25920 storage.raw().execute("PRAGMA foreign_keys = ON").unwrap();
25921
25922 let report = storage.cleanup_orphan_fk_rows().unwrap();
25923
25924 assert_eq!(report.total, i64::try_from(orphan_count).unwrap());
25925 let metrics_count = report
25926 .per_table
25927 .iter()
25928 .filter(|(table, _)| *table == "message_metrics")
25929 .map(|(_, count)| *count)
25930 .sum::<i64>();
25931 assert_eq!(metrics_count, i64::try_from(orphan_count).unwrap());
25932 assert_eq!(
25933 report
25934 .per_table
25935 .iter()
25936 .filter(|(table, _)| *table == "message_metrics")
25937 .count(),
25938 1,
25939 "paged cleanup should aggregate report entries by table: {report:?}"
25940 );
25941 let metrics_after: i64 = storage
25942 .raw()
25943 .query_row_map("SELECT COUNT(*) FROM message_metrics", fparams![], |row| {
25944 row.get_typed(0)
25945 })
25946 .unwrap();
25947 assert_eq!(metrics_after, 0);
25948 }
25949}