use rustc_hash::FxHashSet as HashSet;
use super::*;
use crate::io::PlatformIO;
use crate::mvcc::clock::MvccClock;
use crate::mvcc::cursor::{CursorYieldPoint, MvccCursorType};
use crate::mvcc::database::checkpoint_state_machine::CheckpointYieldPoint;
use crate::mvcc::database::CommitYieldPoint;
use crate::mvcc::persistent_storage::logical_log::{
ENCRYPTED_PAYLOAD_CHUNK_SIZE, FRAME_MAGIC, LOG_HDR_SIZE,
};
use crate::mvcc::yield_hooks::YieldPointMarker;
use crate::mvcc::yield_points::{FailureInjector, YieldInjector, YieldPoint};
use crate::state_machine::{StateTransition, TransitionResult};
use crate::storage::sqlite3_ondisk::{
checksum_wal, read_varint, write_varint, DatabaseHeader, WalHeader, WAL_FRAME_HEADER_SIZE,
WAL_HEADER_SIZE,
};
use crate::sync::atomic::{AtomicBool, Ordering};
use crate::sync::Mutex;
use crate::sync::RwLock;
use crate::vdbe::execute::TransactionYieldPoint;
use crate::{
Buffer, Completion, DatabaseOpts, EncryptionKey, LimboError, OpenFlags, StatementStatusCounter,
};
use quickcheck::{Arbitrary, Gen};
use quickcheck_macros::quickcheck;
use rand::{Rng, SeedableRng};
use rand_chacha::ChaCha8Rng;
const TX_HEADER_SIZE: usize = 24;
const TX_TRAILER_SIZE: usize = 8;
pub(crate) struct MvccTestDbNoConn {
pub(crate) db: Option<Arc<Database>>,
path: Option<String>,
opts: DatabaseOpts,
enc_opts: Option<crate::EncryptionOpts>,
_temp_dir: Option<tempfile::TempDir>,
}
pub(crate) struct MvccTestDb {
pub(crate) mvcc_store: Arc<MvStore<MvccClock>>,
pub(crate) db: Arc<Database>,
pub(crate) conn: Arc<Connection>,
}
#[derive(Debug)]
struct FixedYieldInjector {
remaining: Mutex<HashSet<YieldPoint>>,
}
impl FixedYieldInjector {
fn new(points: impl IntoIterator<Item = YieldPoint>) -> Arc<Self> {
Arc::new(Self {
remaining: Mutex::new(points.into_iter().collect()),
})
}
fn is_empty(&self) -> bool {
self.remaining.lock().is_empty()
}
}
impl YieldInjector for FixedYieldInjector {
fn should_yield(&self, _instance_id: u64, _selection_key: u64, point: YieldPoint) -> bool {
self.remaining.lock().remove(&point)
}
}
#[derive(Debug)]
struct FixedFailureInjector {
remaining: Mutex<rustc_hash::FxHashMap<YieldPoint, LimboError>>,
}
impl FixedFailureInjector {
fn new(points: impl IntoIterator<Item = (YieldPoint, LimboError)>) -> Arc<Self> {
Arc::new(Self {
remaining: Mutex::new(points.into_iter().collect()),
})
}
fn is_empty(&self) -> bool {
self.remaining.lock().is_empty()
}
}
impl FailureInjector for FixedFailureInjector {
fn should_fail(
&self,
_instance_id: u64,
_selection_key: u64,
point: YieldPoint,
) -> Option<LimboError> {
self.remaining.lock().remove(&point)
}
}
impl MvccTestDb {
pub fn new() -> Self {
let io = Arc::new(MemoryIO::new());
let db = Database::open_file(io, ":memory:").unwrap();
let conn = db.connect().unwrap();
conn.execute("PRAGMA journal_mode = 'mvcc'").unwrap();
let mvcc_store = db.get_mv_store().clone().unwrap();
Self {
mvcc_store,
db,
conn,
}
}
}
#[test]
fn mvcc_active_read_tx_blocks_vacuum_gate() {
let db = MvccTestDb::new();
let pager = db.conn.pager.load().clone();
let tx_id = db.mvcc_store.begin_tx(pager).unwrap();
assert!(matches!(
db.mvcc_store.try_begin_vacuum_gate(),
Err(LimboError::Busy)
));
db.mvcc_store.remove_tx(tx_id);
db.mvcc_store.try_begin_vacuum_gate().unwrap();
db.mvcc_store.release_vacuum_gate();
}
#[test]
fn mvcc_active_write_tx_blocks_vacuum_gate() {
let db = MvccTestDb::new();
let pager = db.conn.pager.load().clone();
let tx_id = db
.mvcc_store
.begin_exclusive_tx(pager.clone(), None)
.unwrap();
assert!(matches!(
db.mvcc_store.try_begin_vacuum_gate(),
Err(LimboError::Busy)
));
db.mvcc_store
.rollback_tx(tx_id, pager, &db.conn, crate::MAIN_DB_ID);
db.mvcc_store.try_begin_vacuum_gate().unwrap();
db.mvcc_store.release_vacuum_gate();
}
#[test]
fn mvcc_vacuum_gate_blocks_new_read_and_write_tx() {
let db = MvccTestDb::new();
let pager = db.conn.pager.load().clone();
db.mvcc_store.try_begin_vacuum_gate().unwrap();
assert!(matches!(
db.mvcc_store.begin_tx(pager.clone()),
Err(LimboError::Busy)
));
assert!(matches!(
db.mvcc_store.begin_exclusive_tx(pager, None),
Err(LimboError::Busy)
));
db.mvcc_store.release_vacuum_gate();
}
#[test]
fn mvcc_pragma_page_size_propagates_to_global_header() {
let db = MvccTestDb::new();
let initial = db
.mvcc_store
.with_header(|h| h.page_size.get(), None)
.unwrap();
assert_eq!(
initial,
crate::storage::buffer_pool::BufferPool::DEFAULT_PAGE_SIZE as u32,
"global_header should start at the default page size"
);
db.conn.execute("PRAGMA page_size = 512").unwrap();
let after = db
.mvcc_store
.with_header(|h| h.page_size.get(), None)
.unwrap();
assert_eq!(
after, 512,
"PRAGMA page_size must propagate to MvStore.global_header"
);
}
#[test]
fn mvcc_reset_after_vacuum_installs_header_and_rootpages() {
let db = MvccTestDb::new();
db.conn
.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
db.conn.execute("CREATE INDEX idx_t_v ON t(v)").unwrap();
db.conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
db.conn.demote_to_mvcc_connection();
db.conn.reparse_schema().unwrap();
let schema = db.conn.schema.read().clone();
db.conn.promote_to_regular_connection();
let table_root = match schema.tables.get("t").expect("table t").as_ref() {
Table::BTree(btree) => btree.root_page,
_ => panic!("expected btree table"),
};
let index_root = schema
.indexes
.get("t")
.and_then(|indexes| indexes.front())
.map(|index| index.root_page)
.expect("index idx_t_v");
let mut header = DatabaseHeader::default();
header.schema_cookie = 77.into();
db.mvcc_store
.global_header
.write()
.replace(DatabaseHeader::default());
db.mvcc_store
.insert_table_id_to_rootpage(MVTableId::from(-999_i64), Some(999));
db.mvcc_store.try_begin_vacuum_gate().unwrap();
db.mvcc_store.reset_after_vacuum(header, schema.as_ref());
db.mvcc_store.release_vacuum_gate();
assert_eq!(
db.mvcc_store
.with_header(|header| header.schema_cookie.get(), None)
.unwrap(),
77
);
assert_eq!(
*db.mvcc_store
.table_id_to_rootpage
.get(&SQLITE_SCHEMA_MVCC_TABLE_ID)
.expect("sqlite_schema mapping")
.value(),
Some(1)
);
assert_eq!(
*db.mvcc_store
.table_id_to_rootpage
.get(&MVTableId::from(-(table_root)))
.expect("table root mapping")
.value(),
Some(table_root as u64)
);
assert_eq!(
*db.mvcc_store
.table_id_to_rootpage
.get(&MVTableId::from(-(index_root)))
.expect("index root mapping")
.value(),
Some(index_root as u64)
);
assert!(
db.mvcc_store
.table_id_to_rootpage
.get(&MVTableId::from(-999_i64))
.is_none(),
"stale root-page entries must be cleared"
);
}
#[test]
fn mvcc_reset_after_vacuum_clears_checkpointed_empty_version_buckets() {
let db = MvccTestDb::new();
db.conn
.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
db.conn.execute("CREATE INDEX idx_t_v ON t(v)").unwrap();
db.conn
.execute("INSERT INTO t VALUES (1, 'a'), (2, 'b'), (3, 'c')")
.unwrap();
db.conn
.execute("UPDATE t SET v = 'z' WHERE id = 1")
.unwrap();
db.conn.execute("DELETE FROM t WHERE id = 2").unwrap();
db.conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
let checkpointed_row_ids = db
.mvcc_store
.rows
.iter()
.filter(|entry| entry.value().read().is_empty())
.map(|entry| entry.key().clone())
.collect::<Vec<_>>();
let checkpointed_index_ids = db
.mvcc_store
.index_rows
.iter()
.filter(|entry| {
entry
.value()
.iter()
.all(|row_entry| row_entry.value().read().is_empty())
})
.map(|entry| *entry.key())
.collect::<Vec<_>>();
assert!(
!checkpointed_row_ids.is_empty(),
"checkpoint GC should leave empty table row buckets before VACUUM reset"
);
assert!(
!checkpointed_index_ids.is_empty(),
"checkpoint GC should leave empty index buckets before VACUUM reset"
);
db.conn.demote_to_mvcc_connection();
db.conn.reparse_schema().unwrap();
let schema = db.conn.schema.read().clone();
db.conn.promote_to_regular_connection();
db.mvcc_store.try_begin_vacuum_gate().unwrap();
db.mvcc_store
.reset_after_vacuum(DatabaseHeader::default(), schema.as_ref());
db.mvcc_store.release_vacuum_gate();
for row_id in checkpointed_row_ids {
assert!(
db.mvcc_store.rows.get(&row_id).is_none(),
"checkpointed empty table row buckets must be cleared across VACUUM reset"
);
}
for index_id in checkpointed_index_ids {
assert!(
db.mvcc_store.index_rows.get(&index_id).is_none(),
"checkpointed empty index buckets must be cleared across VACUUM reset"
);
}
}
impl MvccTestDbNoConn {
pub fn new() -> Self {
let io = Arc::new(MemoryIO::new());
let opts = DatabaseOpts::new();
let db = Database::open_file_with_flags(io, ":memory:", OpenFlags::default(), opts, None)
.unwrap();
let conn = db.connect().unwrap();
conn.execute("PRAGMA journal_mode = 'mvcc'").unwrap();
conn.close().unwrap();
Self {
db: Some(db),
path: None,
opts,
enc_opts: None,
_temp_dir: None,
}
}
pub fn new_with_random_db() -> Self {
Self::new_with_random_db_with_opts(DatabaseOpts::new())
}
pub fn new_with_random_db_with_opts(opts: DatabaseOpts) -> Self {
let temp_dir = tempfile::TempDir::new().unwrap();
let path = temp_dir
.path()
.join(format!("test_{}", rand::random::<u64>()));
std::fs::create_dir_all(path.parent().unwrap()).unwrap();
let io = Arc::new(PlatformIO::new().unwrap());
println!("path: {}", path.as_os_str().to_str().unwrap());
let db = Database::open_file_with_flags(
io,
path.as_os_str().to_str().unwrap(),
OpenFlags::default(),
opts,
None,
)
.unwrap();
let conn = db.connect().unwrap();
conn.execute("PRAGMA journal_mode = 'mvcc'").unwrap();
conn.close().unwrap();
Self {
db: Some(db),
path: Some(path.to_str().unwrap().to_string()),
opts,
enc_opts: None,
_temp_dir: Some(temp_dir),
}
}
pub fn new_encrypted(hex_key: &str) -> Self {
let opts = DatabaseOpts::new().with_encryption(true);
let enc_opts = crate::EncryptionOpts {
cipher: "aes256gcm".to_string(),
hexkey: hex_key.to_string(),
};
let temp_dir = tempfile::TempDir::new().unwrap();
let path = temp_dir.path().join("test.db");
let io = Arc::new(PlatformIO::new().unwrap());
let db = Database::open_file_with_flags(
io,
path.as_os_str().to_str().unwrap(),
OpenFlags::default(),
opts,
Some(enc_opts.clone()),
)
.unwrap();
let encryption_key = EncryptionKey::from_hex_string(hex_key).unwrap();
let conn = db.connect_with_encryption(Some(encryption_key)).unwrap();
conn.execute("PRAGMA journal_mode = 'mvcc'").unwrap();
conn.close().unwrap();
Self {
db: Some(db),
path: Some(path.to_str().unwrap().to_string()),
opts,
enc_opts: Some(enc_opts),
_temp_dir: Some(temp_dir),
}
}
pub fn restart(&mut self) {
self.restart_result().unwrap();
}
pub fn new_maybe_encrypted(encrypted: bool) -> Self {
if !encrypted {
return Self::new_with_random_db();
}
const KEY128: &str = "b1bbfda4f589dc9daaf004fe21111e00";
const KEY256: &str = "b1bbfda4f589dc9daaf004fe21111e00dc00c98237102f5c7002a5669fc76327";
let ciphers: &[(&str, &str)] = &[
("aes128gcm", KEY128),
("aes256gcm", KEY256),
("aegis128l", KEY128),
("aegis128x2", KEY128),
("aegis128x4", KEY128),
("aegis256", KEY256),
("aegis256x2", KEY256),
("aegis256x4", KEY256),
];
let (cipher, hexkey) = ciphers[rand::random_range(0..ciphers.len())];
Self::new_encrypted_with_cipher(hexkey, cipher)
}
fn new_encrypted_with_cipher(hex_key: &str, cipher: &str) -> Self {
let opts = DatabaseOpts::new().with_encryption(true);
let enc_opts = crate::EncryptionOpts {
cipher: cipher.to_string(),
hexkey: hex_key.to_string(),
};
let temp_dir = tempfile::TempDir::new().unwrap();
let path = temp_dir.path().join("test.db");
let io = Arc::new(PlatformIO::new().unwrap());
let db = Database::open_file_with_flags(
io,
path.as_os_str().to_str().unwrap(),
OpenFlags::default(),
opts,
Some(enc_opts.clone()),
)
.unwrap();
let encryption_key = EncryptionKey::from_hex_string(hex_key).unwrap();
let conn = db.connect_with_encryption(Some(encryption_key)).unwrap();
conn.execute("PRAGMA journal_mode = 'mvcc'").unwrap();
conn.close().unwrap();
Self {
db: Some(db),
path: Some(path.to_str().unwrap().to_string()),
opts,
enc_opts: Some(enc_opts),
_temp_dir: Some(temp_dir),
}
}
pub fn restart_result(&mut self) -> crate::Result<()> {
{
let mut manager = DATABASE_MANAGER.lock();
manager.clear();
}
let io = Arc::new(PlatformIO::new().unwrap());
let path = self.path.as_ref().unwrap();
let db = Database::open_file_with_flags(
io,
path,
OpenFlags::default(),
self.opts,
self.enc_opts.clone(),
)?;
self.db.replace(db);
Ok(())
}
pub fn get_db(&self) -> Arc<Database> {
self.db.as_ref().unwrap().clone()
}
pub fn connect(&self) -> Arc<Connection> {
let enc_key = self
.enc_opts
.as_ref()
.map(|e| EncryptionKey::from_hex_string(&e.hexkey).unwrap());
self.get_db().connect_with_encryption(enc_key).unwrap()
}
pub fn get_mvcc_store(&self) -> Arc<MvStore<MvccClock>> {
self.get_db().get_mv_store().clone().unwrap()
}
}
pub(crate) fn generate_simple_string_row(table_id: MVTableId, id: i64, data: &str) -> Row {
let record = ImmutableRecord::from_values(&[Value::Text(Text::new(data.to_string()))], 1);
Row::new_table_row(
RowID::new(table_id, RowKey::Int(id)),
record.as_blob().to_vec(),
1,
)
}
pub(crate) fn generate_simple_string_record(data: &str) -> ImmutableRecord {
ImmutableRecord::from_values(&[Value::Text(Text::new(data.to_string()))], 1)
}
fn advance_checkpoint_until_wal_has_commit_frame(
mvcc_store: Arc<MvStore<MvccClock>>,
conn: &Arc<Connection>,
) {
let pager = conn.pager.load().clone();
let initial_wal_max_frame = pager
.wal
.as_ref()
.expect("mvcc mode requires wal")
.get_max_frame_in_wal();
let mut checkpoint_sm = CheckpointStateMachine::new(
pager.clone(),
mvcc_store,
conn.clone(),
true,
conn.get_sync_mode(),
);
for _ in 0..10_000 {
if pager
.wal
.as_ref()
.expect("mvcc mode requires wal")
.get_max_frame_in_wal()
> initial_wal_max_frame
{
return;
}
match checkpoint_sm.step(&()).unwrap() {
TransitionResult::Io(io) => io.wait(pager.io.as_ref()).unwrap(),
TransitionResult::Continue => {}
TransitionResult::Done(_) => {
panic!("checkpoint finalized before WAL had committed frames")
}
}
}
panic!("checkpoint did not produce committed WAL frame in bounded steps");
}
fn overwrite_log_header_byte(path: &str, offset: u64, value: u8) {
let log_path = std::path::Path::new(path).with_extension("db-log");
let mut file = std::fs::OpenOptions::new()
.read(true)
.write(true)
.open(log_path)
.unwrap();
use std::io::{Seek, SeekFrom, Write};
file.seek(SeekFrom::Start(offset)).unwrap();
file.write_all(&[value]).unwrap();
file.sync_all().unwrap();
}
fn overwrite_file_with_junk(path: &std::path::Path, size: usize, byte: u8) {
let mut file = std::fs::OpenOptions::new()
.create(true)
.write(true)
.truncate(true)
.open(path)
.unwrap();
let payload = vec![byte; size];
use std::io::Write;
file.write_all(&payload).unwrap();
file.sync_all().unwrap();
}
fn wal_path_for_db(path: &str) -> std::path::PathBuf {
std::path::PathBuf::from(format!("{path}-wal"))
}
fn force_close_for_artifact_tamper(db: &mut MvccTestDbNoConn) {
db.db.take();
let mut manager = DATABASE_MANAGER.lock();
manager.clear();
}
fn read_db_page_size(path: &str) -> usize {
use std::io::{Read, Seek, SeekFrom};
let mut file = std::fs::OpenOptions::new().read(true).open(path).unwrap();
let mut header = [0u8; 100];
file.seek(SeekFrom::Start(0)).unwrap();
file.read_exact(&mut header).unwrap();
let raw = u16::from_be_bytes([header[16], header[17]]);
if raw == 1 {
65536
} else {
raw as usize
}
}
fn page_file_offset(page_no: u32, page_size: usize) -> u64 {
(page_no as u64 - 1) * page_size as u64
}
fn page_header_offset(page_no: u32) -> usize {
if page_no == 1 {
100
} else {
0
}
}
fn read_db_page(path: &str, page_no: u32, page_size: usize) -> Vec<u8> {
use std::io::{Read, Seek, SeekFrom};
let mut file = std::fs::OpenOptions::new().read(true).open(path).unwrap();
let mut page = vec![0u8; page_size];
file.seek(SeekFrom::Start(page_file_offset(page_no, page_size)))
.unwrap();
file.read_exact(&mut page).unwrap();
page
}
fn write_db_page(path: &str, page_no: u32, page_size: usize, page: &[u8]) {
use std::io::{Seek, SeekFrom, Write};
let mut file = std::fs::OpenOptions::new()
.read(true)
.write(true)
.open(path)
.unwrap();
file.seek(SeekFrom::Start(page_file_offset(page_no, page_size)))
.unwrap();
file.write_all(page).unwrap();
file.sync_all().unwrap();
}
#[derive(Debug, Clone, Copy)]
struct TableLeafCellLoc {
cell_offset: usize,
payload_varint_len: usize,
payload_len: usize,
payload_offset: usize,
}
fn table_leaf_cell_locs(page: &[u8], page_no: u32) -> Vec<TableLeafCellLoc> {
let hdr_off = page_header_offset(page_no);
assert_eq!(page[hdr_off], 0x0D, "expected table-leaf page type");
let cell_count = u16::from_be_bytes([page[hdr_off + 3], page[hdr_off + 4]]) as usize;
let ptr_base = hdr_off + 8;
let mut locs = Vec::with_capacity(cell_count);
for i in 0..cell_count {
let ptr_off = ptr_base + i * 2;
let cell_ptr = u16::from_be_bytes([page[ptr_off], page[ptr_off + 1]]) as usize;
let (payload_len_u64, payload_varint_len) = read_varint(&page[cell_ptr..]).unwrap();
let payload_len = payload_len_u64 as usize;
let (_, rowid_varint_len) = read_varint(&page[cell_ptr + payload_varint_len..]).unwrap();
let payload_offset = cell_ptr + payload_varint_len + rowid_varint_len;
locs.push(TableLeafCellLoc {
cell_offset: cell_ptr,
payload_varint_len,
payload_len,
payload_offset,
});
}
locs
}
fn table_leaf_first_cell_loc(page: &[u8], page_no: u32) -> TableLeafCellLoc {
let locs = table_leaf_cell_locs(page, page_no);
let cell_count = locs.len();
assert!(
cell_count > 0,
"expected at least one cell in metadata page"
);
locs[0]
}
fn rewrite_table_leaf_cell_payload(page: &mut [u8], loc: TableLeafCellLoc, new_payload: &[u8]) {
assert!(
new_payload.len() <= loc.payload_len,
"new payload {} exceeds existing payload {}",
new_payload.len(),
loc.payload_len
);
let mut varint_buf = [0u8; 9];
let n = write_varint(&mut varint_buf, new_payload.len() as u64);
assert_eq!(
n, loc.payload_varint_len,
"payload varint length changed; in-place rewrite is unsafe"
);
page[loc.cell_offset..loc.cell_offset + n].copy_from_slice(&varint_buf[..n]);
page[loc.payload_offset..loc.payload_offset + new_payload.len()].copy_from_slice(new_payload);
if new_payload.len() < loc.payload_len {
page[loc.payload_offset + new_payload.len()..loc.payload_offset + loc.payload_len].fill(0);
}
}
fn tamper_table_leaf_value_serial_type(page: &mut [u8], page_no: u32, new_serial_type: u8) -> bool {
let loc = table_leaf_first_cell_loc(page, page_no);
let payload = &mut page[loc.payload_offset..loc.payload_offset + loc.payload_len];
let (header_size, hs_len) = read_varint(payload).unwrap();
let header_size = header_size as usize;
if header_size < hs_len + 2 || header_size > payload.len() {
return false;
}
let mut idx = hs_len;
let (_serial_type0, n0) = read_varint(&payload[idx..header_size]).unwrap();
idx += n0;
if idx >= header_size {
return false;
}
payload[idx] = new_serial_type;
true
}
fn wipe_table_leaf_cells(page: &mut [u8], page_no: u32) -> bool {
let hdr_off = page_header_offset(page_no);
if page.len() <= hdr_off + 8 || page[hdr_off] != 0x0D {
return false;
}
let page_size = page.len();
page[hdr_off + 3..hdr_off + 5].copy_from_slice(&0u16.to_be_bytes()); page[hdr_off + 5..hdr_off + 7].copy_from_slice(&(page_size as u16).to_be_bytes()); page[hdr_off + 7] = 0; true
}
fn metadata_root_page(conn: &Arc<Connection>) -> u32 {
let rows = get_rows(
conn,
"SELECT rootpage FROM sqlite_schema
WHERE type = 'table' AND name = '__turso_internal_mvcc_meta'",
);
assert_eq!(rows.len(), 1, "expected exactly one metadata table row");
rows[0][0].as_int().unwrap() as u32
}
fn tamper_db_metadata_row_value(db_path: &str, metadata_root_page: u32, new_value: i64) {
let page_size = read_db_page_size(db_path);
let mut page = read_db_page(db_path, metadata_root_page, page_size);
let loc = table_leaf_first_cell_loc(&page, metadata_root_page);
let payload = &page[loc.payload_offset..loc.payload_offset + loc.payload_len];
let record = ImmutableRecord::from_bin_record(payload.to_vec());
let key = record
.get_value_opt(0)
.expect("metadata key column missing");
let ValueRef::Text(key) = key else {
panic!("metadata key must be text");
};
let new_record = ImmutableRecord::from_values(
&[
Value::Text(Text::new(key.as_str().to_string())),
Value::from_i64(new_value),
],
2,
);
rewrite_table_leaf_cell_payload(&mut page, loc, new_record.as_blob());
write_db_page(db_path, metadata_root_page, page_size, &page);
}
fn tamper_db_metadata_row_value_by_key(
db_path: &str,
metadata_root_page: u32,
target_key: &str,
new_value: i64,
) {
let page_size = read_db_page_size(db_path);
let mut page = read_db_page(db_path, metadata_root_page, page_size);
let mut updated = false;
for loc in table_leaf_cell_locs(&page, metadata_root_page) {
let payload = &page[loc.payload_offset..loc.payload_offset + loc.payload_len];
let record = ImmutableRecord::from_bin_record(payload.to_vec());
let key = record
.get_value_opt(0)
.expect("metadata key column missing");
let ValueRef::Text(key) = key else {
panic!("metadata key must be text");
};
if key.as_str() != target_key {
continue;
}
let new_record = ImmutableRecord::from_values(
&[
Value::Text(Text::new(target_key.to_string())),
Value::from_i64(new_value),
],
2,
);
rewrite_table_leaf_cell_payload(&mut page, loc, new_record.as_blob());
updated = true;
}
assert!(updated, "expected metadata key {target_key} to exist");
write_db_page(db_path, metadata_root_page, page_size, &page);
}
fn tamper_db_metadata_value_serial_type(
db_path: &str,
metadata_root_page: u32,
new_serial_type: u8,
) {
let page_size = read_db_page_size(db_path);
let mut page = read_db_page(db_path, metadata_root_page, page_size);
assert!(
tamper_table_leaf_value_serial_type(&mut page, metadata_root_page, new_serial_type),
"expected metadata serial-type tamper to succeed"
);
write_db_page(db_path, metadata_root_page, page_size, &page);
}
fn tamper_db_metadata_row_key(db_path: &str, metadata_root_page: u32, new_key: &str) {
let page_size = read_db_page_size(db_path);
let mut page = read_db_page(db_path, metadata_root_page, page_size);
let loc = table_leaf_first_cell_loc(&page, metadata_root_page);
let payload = &page[loc.payload_offset..loc.payload_offset + loc.payload_len];
let record = ImmutableRecord::from_bin_record(payload.to_vec());
let value = record
.get_value_opt(1)
.expect("metadata value column missing");
let ValueRef::Numeric(Numeric::Integer(value)) = value else {
panic!("metadata value must be integer");
};
let new_record = ImmutableRecord::from_values(
&[
Value::Text(Text::new(new_key.to_string())),
Value::from_i64(value),
],
2,
);
rewrite_table_leaf_cell_payload(&mut page, loc, new_record.as_blob());
write_db_page(db_path, metadata_root_page, page_size, &page);
}
fn tamper_wal_metadata_value_serial_type(
wal_path: &std::path::Path,
metadata_root_page: u32,
new_serial_type: u8,
) -> bool {
use std::io::{Read, Seek, SeekFrom, Write};
let mut file = std::fs::OpenOptions::new()
.read(true)
.write(true)
.open(wal_path)
.unwrap();
let mut bytes = Vec::new();
file.read_to_end(&mut bytes).unwrap();
if bytes.len() < WAL_HEADER_SIZE {
return false;
}
let header = WalHeader {
magic: u32::from_be_bytes(bytes[0..4].try_into().unwrap()),
file_format: u32::from_be_bytes(bytes[4..8].try_into().unwrap()),
page_size: u32::from_be_bytes(bytes[8..12].try_into().unwrap()),
checkpoint_seq: u32::from_be_bytes(bytes[12..16].try_into().unwrap()),
salt_1: u32::from_be_bytes(bytes[16..20].try_into().unwrap()),
salt_2: u32::from_be_bytes(bytes[20..24].try_into().unwrap()),
checksum_1: u32::from_be_bytes(bytes[24..28].try_into().unwrap()),
checksum_2: u32::from_be_bytes(bytes[28..32].try_into().unwrap()),
};
let use_native_endian = cfg!(target_endian = "big") == ((header.magic & 1) != 0);
let frame_size = WAL_FRAME_HEADER_SIZE + header.page_size as usize;
let mut frame_offset = WAL_HEADER_SIZE;
let mut prev_checksums = (header.checksum_1, header.checksum_2);
let mut mutated = false;
while frame_offset + frame_size <= bytes.len() {
let frame = &mut bytes[frame_offset..frame_offset + frame_size];
let page_no = u32::from_be_bytes(frame[0..4].try_into().unwrap());
if page_no == metadata_root_page {
let page_image = &mut frame
[WAL_FRAME_HEADER_SIZE..WAL_FRAME_HEADER_SIZE + header.page_size as usize];
let hdr_off = page_header_offset(metadata_root_page);
if page_image.len() > hdr_off + 5 && page_image[hdr_off] == 0x0D {
let cell_count =
u16::from_be_bytes([page_image[hdr_off + 3], page_image[hdr_off + 4]]);
if cell_count > 0
&& tamper_table_leaf_value_serial_type(
page_image,
metadata_root_page,
new_serial_type,
)
{
mutated = true;
}
}
}
let header_checksum =
checksum_wal(&frame[0..8], &header, prev_checksums, use_native_endian);
let final_checksum = checksum_wal(
&frame[WAL_FRAME_HEADER_SIZE..WAL_FRAME_HEADER_SIZE + header.page_size as usize],
&header,
header_checksum,
use_native_endian,
);
frame[16..20].copy_from_slice(&final_checksum.0.to_be_bytes());
frame[20..24].copy_from_slice(&final_checksum.1.to_be_bytes());
prev_checksums = final_checksum;
frame_offset += frame_size;
}
file.seek(SeekFrom::Start(0)).unwrap();
file.write_all(&bytes).unwrap();
file.sync_all().unwrap();
mutated
}
fn tamper_wal_metadata_page_empty(wal_path: &std::path::Path, metadata_root_page: u32) -> bool {
use std::io::{Read, Seek, SeekFrom, Write};
let mut file = std::fs::OpenOptions::new()
.read(true)
.write(true)
.open(wal_path)
.unwrap();
let mut bytes = Vec::new();
file.read_to_end(&mut bytes).unwrap();
if bytes.len() < WAL_HEADER_SIZE {
return false;
}
let header = WalHeader {
magic: u32::from_be_bytes(bytes[0..4].try_into().unwrap()),
file_format: u32::from_be_bytes(bytes[4..8].try_into().unwrap()),
page_size: u32::from_be_bytes(bytes[8..12].try_into().unwrap()),
checkpoint_seq: u32::from_be_bytes(bytes[12..16].try_into().unwrap()),
salt_1: u32::from_be_bytes(bytes[16..20].try_into().unwrap()),
salt_2: u32::from_be_bytes(bytes[20..24].try_into().unwrap()),
checksum_1: u32::from_be_bytes(bytes[24..28].try_into().unwrap()),
checksum_2: u32::from_be_bytes(bytes[28..32].try_into().unwrap()),
};
let use_native_endian = cfg!(target_endian = "big") == ((header.magic & 1) != 0);
let frame_size = WAL_FRAME_HEADER_SIZE + header.page_size as usize;
let mut frame_offset = WAL_HEADER_SIZE;
let mut prev_checksums = (header.checksum_1, header.checksum_2);
let mut mutated = false;
while frame_offset + frame_size <= bytes.len() {
let frame = &mut bytes[frame_offset..frame_offset + frame_size];
let page_no = u32::from_be_bytes(frame[0..4].try_into().unwrap());
if page_no == metadata_root_page {
let page_image = &mut frame
[WAL_FRAME_HEADER_SIZE..WAL_FRAME_HEADER_SIZE + header.page_size as usize];
if wipe_table_leaf_cells(page_image, metadata_root_page) {
mutated = true;
}
}
let header_checksum =
checksum_wal(&frame[0..8], &header, prev_checksums, use_native_endian);
let final_checksum = checksum_wal(
&frame[WAL_FRAME_HEADER_SIZE..WAL_FRAME_HEADER_SIZE + header.page_size as usize],
&header,
header_checksum,
use_native_endian,
);
frame[16..20].copy_from_slice(&final_checksum.0.to_be_bytes());
frame[20..24].copy_from_slice(&final_checksum.1.to_be_bytes());
prev_checksums = final_checksum;
frame_offset += frame_size;
}
file.seek(SeekFrom::Start(0)).unwrap();
file.write_all(&bytes).unwrap();
file.sync_all().unwrap();
mutated
}
fn rewrite_wal_frames_as_non_commit(path: &std::path::Path) {
use std::io::{Read, Seek, SeekFrom, Write};
let mut file = std::fs::OpenOptions::new()
.read(true)
.write(true)
.open(path)
.unwrap();
let mut bytes = Vec::new();
file.read_to_end(&mut bytes).unwrap();
assert!(bytes.len() >= WAL_HEADER_SIZE);
let header = WalHeader {
magic: u32::from_be_bytes(bytes[0..4].try_into().unwrap()),
file_format: u32::from_be_bytes(bytes[4..8].try_into().unwrap()),
page_size: u32::from_be_bytes(bytes[8..12].try_into().unwrap()),
checkpoint_seq: u32::from_be_bytes(bytes[12..16].try_into().unwrap()),
salt_1: u32::from_be_bytes(bytes[16..20].try_into().unwrap()),
salt_2: u32::from_be_bytes(bytes[20..24].try_into().unwrap()),
checksum_1: u32::from_be_bytes(bytes[24..28].try_into().unwrap()),
checksum_2: u32::from_be_bytes(bytes[28..32].try_into().unwrap()),
};
let use_native_endian = cfg!(target_endian = "big") == ((header.magic & 1) != 0);
let frame_size = WAL_FRAME_HEADER_SIZE + header.page_size as usize;
let mut frame_offset = WAL_HEADER_SIZE;
let mut prev_checksums = (header.checksum_1, header.checksum_2);
while frame_offset + frame_size <= bytes.len() {
let frame = &mut bytes[frame_offset..frame_offset + frame_size];
frame[4..8].copy_from_slice(&0u32.to_be_bytes());
let header_checksum =
checksum_wal(&frame[0..8], &header, prev_checksums, use_native_endian);
let final_checksum = checksum_wal(
&frame[WAL_FRAME_HEADER_SIZE..WAL_FRAME_HEADER_SIZE + header.page_size as usize],
&header,
header_checksum,
use_native_endian,
);
frame[16..20].copy_from_slice(&final_checksum.0.to_be_bytes());
frame[20..24].copy_from_slice(&final_checksum.1.to_be_bytes());
prev_checksums = final_checksum;
frame_offset += frame_size;
}
file.seek(SeekFrom::Start(0)).unwrap();
file.write_all(&bytes).unwrap();
file.sync_all().unwrap();
}
#[test]
fn test_recovery_clock_monotonicity() {
let mut db = MvccTestDbNoConn::new_with_random_db();
let max_commit_ts = {
let conn = db.connect();
conn.execute("CREATE TABLE test(id INTEGER PRIMARY KEY, data TEXT)")
.unwrap();
conn.execute("INSERT INTO test(id, data) VALUES (1, 'foo')")
.unwrap();
let mvcc_store = db.get_mvcc_store();
mvcc_store.last_committed_tx_ts.load(Ordering::SeqCst)
};
db.restart();
let conn = db.connect();
let pager = conn.pager.load().clone();
let mvcc_store = db.get_mvcc_store();
let tx_id = mvcc_store.begin_tx(pager).unwrap();
let tx_entry = mvcc_store
.txs
.get(&tx_id)
.expect("transaction should exist");
let tx = tx_entry.value();
assert!(
tx.begin_ts > max_commit_ts,
"expected begin_ts {} to be > max_commit_ts {}",
tx.begin_ts,
max_commit_ts
);
}
#[test]
fn test_recover_logical_log_short_file_ignored() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
let mvcc_store = db.get_mvcc_store();
let file = mvcc_store.get_logical_log_file();
let c = file.truncate(1, Completion::new_write(|_| {})).unwrap();
conn.db.io.wait_for_completion(c).unwrap();
let c = file
.pwrite(
0,
Arc::new(Buffer::new(vec![0xAB])),
Completion::new_write(|_| {}),
)
.unwrap();
conn.db.io.wait_for_completion(c).unwrap();
assert_eq!(file.size().unwrap(), 1);
let recovered = mvcc_store.maybe_recover_logical_log(conn).unwrap();
assert!(!recovered);
}
#[test]
fn test_journal_mode_switch_from_mvcc_to_wal_without_log_frames() {
let db = MvccTestDb::new();
let rows = get_rows(&db.conn, "PRAGMA journal_mode = 'wal'");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].to_string().to_lowercase(), "wal");
}
#[turso_macros::test(encryption)]
fn test_recovery_checkpoint_then_more_writes() {
let mut db = MvccTestDbNoConn::new_maybe_encrypted(encrypted);
{
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
conn.execute("INSERT INTO t VALUES (2, 'b')").unwrap();
conn.execute("INSERT INTO t VALUES (3, 'c')").unwrap();
}
db.restart();
let conn = db.connect();
let rows = get_rows(&conn, "SELECT id, v FROM t ORDER BY id");
assert_eq!(rows.len(), 3);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(rows[0][1].to_string(), "a");
assert_eq!(rows[1][0].as_int().unwrap(), 2);
assert_eq!(rows[1][1].to_string(), "b");
assert_eq!(rows[2][0].as_int().unwrap(), 3);
assert_eq!(rows[2][1].to_string(), "c");
}
#[test]
fn test_restart_preserves_autoindex_to_column_mapping() {
let mut db = MvccTestDbNoConn::new_with_random_db_with_opts(DatabaseOpts::new());
{
let conn = db.connect();
conn.execute("CREATE TABLE dummy(x)").unwrap();
conn.execute("CREATE TABLE t(a TEXT PRIMARY KEY, b TEXT UNIQUE)")
.unwrap();
conn.execute("INSERT INTO t VALUES('aa', 'bb')").unwrap();
conn.close().unwrap();
}
db.restart();
let conn = db.connect();
let a_rows = get_rows(&conn, "SELECT a FROM t");
assert_eq!(a_rows.len(), 1);
assert_eq!(a_rows[0][0].to_string(), "aa");
let b_rows = get_rows(&conn, "SELECT b FROM t");
assert_eq!(b_rows.len(), 1);
assert_eq!(b_rows[0][0].to_string(), "bb");
}
#[test]
fn test_concurrent_update_then_delete_serializes_correctly_across_restart() {
let mut db = MvccTestDbNoConn::new_with_random_db_with_opts(DatabaseOpts::new());
{
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT UNIQUE)")
.unwrap();
conn.execute("INSERT INTO t(id, v) VALUES (1, 'initial')")
.unwrap();
conn.close().unwrap();
}
{
let conn_a = db.connect();
let conn_b = db.connect();
conn_a.execute("BEGIN CONCURRENT").unwrap();
conn_a
.execute("UPDATE t SET v = 'a_value' WHERE id = 1")
.unwrap();
conn_a.set_yield_injector(Some(FixedYieldInjector::new([
CommitYieldPoint::CommitValidation.point(),
])));
let mut commit_stmt = conn_a.prepare("COMMIT").unwrap();
let mut yielded = false;
for _ in 0..100 {
match commit_stmt.step().unwrap() {
StepResult::IO => {
yielded = true;
break;
}
StepResult::Done => break,
_ => {}
}
}
assert!(
yielded,
"tx_a's COMMIT should yield at CommitYieldPoint::CommitValidation"
);
conn_b.execute("BEGIN CONCURRENT").unwrap();
conn_b.execute("DELETE FROM t WHERE id = 1").unwrap();
commit_stmt.run_collect_rows().unwrap();
drop(commit_stmt);
let rows = get_rows(&conn_a, "SELECT id, v FROM t");
assert_eq!(rows.len(), 1);
conn_b.execute("ROLLBACK").unwrap();
conn_a.close().unwrap();
conn_b.close().unwrap();
}
db.restart();
let conn = db.connect();
let rows = get_rows(&conn, "SELECT id, v FROM t");
assert_eq!(
rows.len(),
1,
"tx_a's committed row must survive recovery, got {rows:?}"
);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(rows[0][1].to_string(), "a_value");
}
#[test]
fn test_restart_with_trigger_rootpage_zero() {
let mut db = MvccTestDbNoConn::new_with_random_db_with_opts(DatabaseOpts::new());
{
let conn = db.connect();
conn.execute("CREATE TABLE t1(id INTEGER PRIMARY KEY, a TEXT)")
.unwrap();
conn.execute("CREATE TABLE audit(id INTEGER PRIMARY KEY, action TEXT)")
.unwrap();
conn.execute(
"CREATE TRIGGER trg_del AFTER DELETE ON t1 \
BEGIN INSERT INTO audit VALUES (NULL, 'deleted'); END;",
)
.unwrap();
conn.execute("INSERT INTO t1 VALUES (1, 'x')").unwrap();
conn.close().unwrap();
}
db.restart();
{
let conn = db.connect();
conn.execute("DELETE FROM t1 WHERE id = 1").unwrap();
let rows = get_rows(&conn, "SELECT action FROM audit ORDER BY id");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].to_string(), "deleted");
}
}
#[turso_macros::test(encryption)]
fn test_btree_resident_recovery_then_checkpoint_delete_stays_deleted() {
let mut db = MvccTestDbNoConn::new_maybe_encrypted(encrypted);
{
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'keep')").unwrap();
conn.execute("INSERT INTO t VALUES (2, 'gone')").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
}
{
let conn = db.connect();
conn.execute("DELETE FROM t WHERE id = 2").unwrap();
}
db.restart();
{
let conn = db.connect();
let rows = get_rows(&conn, "SELECT id FROM t ORDER BY id");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
let rows = get_rows(&conn, "SELECT id FROM t ORDER BY id");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
let rows = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
}
}
#[test]
fn test_recovery_overwrites_torn_tail_on_next_append() {
let mut db = MvccTestDbNoConn::new_with_random_db();
{
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
conn.execute("INSERT INTO t VALUES (2, 'b')").unwrap();
}
{
let conn = db.connect();
let mvcc_store = db.get_mvcc_store();
let file = mvcc_store.get_logical_log_file();
let size = file.size().unwrap();
assert!(size > 1);
let c = file
.truncate(size - 1, Completion::new_trunc(|_| {}))
.unwrap();
conn.db.io.wait_for_completion(c).unwrap();
}
db.restart();
{
let conn = db.connect();
let rows = get_rows(&conn, "SELECT id FROM t ORDER BY id");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
conn.execute("INSERT INTO t VALUES (3, 'c')").unwrap();
}
db.restart();
{
let conn = db.connect();
let rows = get_rows(&conn, "SELECT id, v FROM t ORDER BY id");
assert_eq!(rows.len(), 2);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(rows[0][1].to_string(), "a");
assert_eq!(rows[1][0].as_int().unwrap(), 3);
assert_eq!(rows[1][1].to_string(), "c");
}
}
#[test]
#[ignore = "Needs a dedicated bootstrap harness that can create header=MVCC + missing metadata + torn short log atomically"]
fn test_bootstrap_repairs_torn_short_log_before_metadata_init() {
let temp_dir = tempfile::TempDir::new().unwrap();
let db_path = temp_dir
.path()
.join(format!("bootstrap_torn_{}", rand::random::<u64>()));
let db_path_str = db_path.to_str().unwrap().to_string();
{
let io = Arc::new(PlatformIO::new().unwrap());
let db = Database::open_file(io, &db_path_str).unwrap();
let conn = db.connect().unwrap();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.close().unwrap();
}
let log_path = std::path::Path::new(&db_path_str).with_extension("db-log");
overwrite_file_with_junk(&log_path, LOG_HDR_SIZE / 2, 0xAB);
{
let mut manager = DATABASE_MANAGER.lock();
manager.clear();
}
{
let io = Arc::new(PlatformIO::new().unwrap());
let db = Database::open_file(io, &db_path_str).unwrap();
let conn = db.connect().unwrap();
conn.execute("PRAGMA journal_mode = 'mvcc'").unwrap();
conn.close().unwrap();
}
{
let mut manager = DATABASE_MANAGER.lock();
manager.clear();
}
let io = Arc::new(PlatformIO::new().unwrap());
let db = Database::open_file(io, &db_path_str).unwrap();
let conn = db.connect().unwrap();
let meta = get_rows(
&conn,
"SELECT v FROM __turso_internal_mvcc_meta WHERE k = 'persistent_tx_ts_max'",
);
assert_eq!(meta.len(), 1);
assert_eq!(meta[0][0].as_int().unwrap(), 0);
let log_len = std::fs::metadata(&log_path).map(|m| m.len()).unwrap_or(0);
assert!(
log_len >= LOG_HDR_SIZE as u64,
"expected bootstrap to rewrite durable logical-log header"
);
}
#[test]
fn test_bootstrap_completes_interrupted_checkpoint_with_committed_wal() {
let mut db = MvccTestDbNoConn::new_with_random_db();
let db_path = db.path.as_ref().unwrap().clone();
{
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
conn.execute("INSERT INTO t VALUES (2, 'b')").unwrap();
let mvcc_store = db.get_mvcc_store();
advance_checkpoint_until_wal_has_commit_frame(mvcc_store, &conn);
let pager = conn.pager.load().clone();
assert!(
pager
.wal
.as_ref()
.expect("wal must exist")
.get_max_frame_in_wal()
> 0
);
let log_file = db.get_mvcc_store().get_logical_log_file();
assert!(log_file.size().unwrap() > LOG_HDR_SIZE as u64);
}
db.restart();
let conn = db.connect();
let rows = get_rows(&conn, "SELECT id, v FROM t ORDER BY id");
assert_eq!(rows.len(), 2);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(rows[0][1].to_string(), "a");
assert_eq!(rows[1][0].as_int().unwrap(), 2);
assert_eq!(rows[1][1].to_string(), "b");
let log_size = db.get_mvcc_store().get_logical_log_file().size().unwrap();
assert!(
log_size >= LOG_HDR_SIZE as u64,
"logical log must be at least {LOG_HDR_SIZE} bytes after interrupted-checkpoint reconciliation"
);
let wal_path = wal_path_for_db(&db_path);
let wal_len = wal_path.metadata().map(|m| m.len()).unwrap_or(0);
assert_eq!(wal_len, 0);
}
#[test]
fn test_checkpoint_truncates_wal_last() {
let db = MvccTestDbNoConn::new_with_random_db();
let db_path = db.path.as_ref().unwrap().clone();
let wal_path = wal_path_for_db(&db_path);
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
conn.execute("INSERT INTO t VALUES (2, 'b')").unwrap();
let mvcc_store = db.get_mvcc_store();
let pager = conn.pager.load().clone();
let mut checkpoint_sm = CheckpointStateMachine::new(
pager.clone(),
mvcc_store.clone(),
conn.clone(),
true,
conn.get_sync_mode(),
);
let mut saw_truncate_log_state_with_wal = false;
let mut finished = false;
for _ in 0..50_000 {
let state = checkpoint_sm.state_for_test();
if state == CheckpointState::TruncateLogicalLog {
let wal_len = wal_path.metadata().map(|m| m.len()).unwrap_or(0);
assert!(wal_len > 0, "WAL must still exist before log truncation");
saw_truncate_log_state_with_wal = true;
}
if state == CheckpointState::TruncateWal {
assert!(
saw_truncate_log_state_with_wal,
"must truncate logical log before truncating WAL"
);
assert_eq!(
mvcc_store.get_logical_log_file().size().unwrap(),
0,
"logical log should be truncated to 0"
);
}
match checkpoint_sm.step(&()).unwrap() {
TransitionResult::Io(io) => io.wait(pager.io.as_ref()).unwrap(),
TransitionResult::Continue => {}
TransitionResult::Done(_) => {
finished = true;
break;
}
}
}
assert!(finished, "checkpoint state machine did not finish");
assert!(saw_truncate_log_state_with_wal);
let final_wal_len = wal_path.metadata().map(|m| m.len()).unwrap_or(0);
assert_eq!(final_wal_len, 0);
assert_eq!(
mvcc_store.get_logical_log_file().size().unwrap(),
0,
"logical log should be truncated to 0 after checkpoint"
);
}
#[test]
fn test_checkpoint_allows_index_schema_update_after_rename_column() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE t(a INTEGER, b INTEGER)")
.unwrap();
conn.execute("CREATE INDEX idx_t_a ON t(a)").unwrap();
conn.execute("INSERT INTO t VALUES (1, 2)").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
conn.execute("ALTER TABLE t RENAME COLUMN a TO c").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
let rows = get_rows(&conn, "SELECT c, b FROM t");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(rows[0][1].as_int().unwrap(), 2);
}
#[test]
fn test_bootstrap_rejects_committed_wal_without_log_file() {
let db = MvccTestDbNoConn::new_with_random_db();
let db_path = db.path.as_ref().unwrap().clone();
{
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'x')").unwrap();
let mvcc_store = db.get_mvcc_store();
advance_checkpoint_until_wal_has_commit_frame(mvcc_store, &conn);
}
{
let mut manager = DATABASE_MANAGER.lock();
manager.clear();
}
let log_path = std::path::Path::new(&db_path).with_extension("db-log");
std::fs::remove_file(&log_path).unwrap();
let io = Arc::new(PlatformIO::new().unwrap());
match Database::open_file(io, &db_path) {
Ok(db) => match db.connect() {
Ok(_) => panic!("expected connect to fail with Corrupt"),
Err(err) => assert!(matches!(err, LimboError::Corrupt(_))),
},
Err(err) => assert!(matches!(err, LimboError::Corrupt(_))),
}
}
#[test]
fn test_bootstrap_rejects_torn_log_header_with_committed_wal() {
let db = MvccTestDbNoConn::new_with_random_db();
let db_path = db.path.as_ref().unwrap().clone();
let wal_path = wal_path_for_db(&db_path);
{
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'x')").unwrap();
conn.execute("INSERT INTO t VALUES (2, 'y')").unwrap();
let mvcc_store = db.get_mvcc_store();
advance_checkpoint_until_wal_has_commit_frame(mvcc_store, &conn);
}
overwrite_log_header_byte(&db_path, 0, 0x00);
{
let mut manager = DATABASE_MANAGER.lock();
manager.clear();
}
let io = Arc::new(PlatformIO::new().unwrap());
match Database::open_file(io, &db_path) {
Ok(db) => match db.connect() {
Ok(_) => panic!("expected connect to fail with Corrupt"),
Err(err) => assert!(matches!(err, LimboError::Corrupt(_))),
},
Err(err) => assert!(matches!(err, LimboError::Corrupt(_))),
}
let wal_len = wal_path.metadata().map(|m| m.len()).unwrap_or(0);
assert!(
wal_len > 0,
"failed bootstrap must not truncate WAL before header validation"
);
}
#[test]
fn test_bootstrap_rejects_corrupt_log_header_without_wal() {
let db = MvccTestDbNoConn::new_with_random_db();
let db_path = db.path.as_ref().unwrap().clone();
{
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'x')").unwrap();
}
overwrite_log_header_byte(&db_path, 0, 0x00);
{
let wal_path = wal_path_for_db(&db_path);
let _ = std::fs::remove_file(&wal_path);
overwrite_file_with_junk(&wal_path, 0, 0x00);
}
{
let mut manager = DATABASE_MANAGER.lock();
manager.clear();
}
let io = Arc::new(PlatformIO::new().unwrap());
match Database::open_file(io, &db_path) {
Ok(db) => match db.connect() {
Ok(_) => panic!("expected connect to fail with Corrupt"),
Err(err) => assert!(matches!(err, LimboError::Corrupt(_))),
},
Err(err) => assert!(matches!(err, LimboError::Corrupt(_))),
}
}
#[test]
fn test_bootstrap_handles_committed_wal_when_log_truncated() {
let mut db = MvccTestDbNoConn::new_with_random_db();
let db_path = db.path.as_ref().unwrap().clone();
{
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
conn.execute("INSERT INTO t VALUES (2, 'b')").unwrap();
let mvcc_store = db.get_mvcc_store();
advance_checkpoint_until_wal_has_commit_frame(mvcc_store.clone(), &conn);
let log_file = mvcc_store.get_logical_log_file();
let c = log_file
.truncate(LOG_HDR_SIZE as u64, Completion::new_trunc(|_| {}))
.unwrap();
conn.db.io.wait_for_completion(c).unwrap();
}
db.restart();
let conn = db.connect();
let rows = get_rows(&conn, "SELECT id, v FROM t ORDER BY id");
assert_eq!(rows.len(), 2);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(rows[0][1].to_string(), "a");
assert_eq!(rows[1][0].as_int().unwrap(), 2);
assert_eq!(rows[1][1].to_string(), "b");
let log_size = db.get_mvcc_store().get_logical_log_file().size().unwrap();
assert_eq!(log_size, LOG_HDR_SIZE as u64);
let wal_path = wal_path_for_db(&db_path);
let wal_len = wal_path.metadata().map(|m| m.len()).unwrap_or(0);
assert_eq!(wal_len, 0);
}
#[test]
fn test_bootstrap_ignores_wal_frames_without_commit_marker() {
let db = MvccTestDbNoConn::new_with_random_db();
let db_path = db.path.as_ref().unwrap().clone();
let wal_path = wal_path_for_db(&db_path);
{
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'x')").unwrap();
let mvcc_store = db.get_mvcc_store();
advance_checkpoint_until_wal_has_commit_frame(mvcc_store, &conn);
}
rewrite_wal_frames_as_non_commit(&wal_path);
{
let mut manager = DATABASE_MANAGER.lock();
manager.clear();
}
let io = Arc::new(PlatformIO::new().unwrap());
let db2 = Database::open_file(io, &db_path).expect("open should succeed");
let conn2 = db2.connect().expect("connect should succeed");
let rows = get_rows(&conn2, "SELECT id, v FROM t ORDER BY id");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(rows[0][1].to_string(), "x");
}
#[test]
fn test_empty_log_recovery_loads_checkpoint_watermark() {
let mut db = MvccTestDbNoConn::new_with_random_db();
let persistent_tx_ts_max = {
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
conn.execute("INSERT INTO t VALUES (2, 'b')").unwrap();
let mvcc_store = db.get_mvcc_store();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
assert_eq!(
mvcc_store.get_logical_log_file().size().unwrap(),
0,
"logical log should be truncated to 0 after checkpoint"
);
let meta = get_rows(
&conn,
"SELECT v FROM __turso_internal_mvcc_meta WHERE k = 'persistent_tx_ts_max'",
);
assert_eq!(meta.len(), 1);
meta[0][0].as_int().unwrap() as u64
};
db.restart();
let conn = db.connect();
let pager = conn.pager.load().clone();
let mvcc_store = db.get_mvcc_store();
let tx_id = mvcc_store.begin_tx(pager).unwrap();
let tx_entry = mvcc_store
.txs
.get(&tx_id)
.expect("transaction should exist");
assert!(
tx_entry.value().begin_ts > persistent_tx_ts_max,
"expected begin_ts {} > persistent_tx_ts_max {}",
tx_entry.value().begin_ts,
persistent_tx_ts_max
);
}
#[test]
fn test_meta_recovery_case_1_no_wal_no_log_metadata_present_clean_boot() {
let mut db = MvccTestDbNoConn::new_with_random_db();
let db_path = db.path.as_ref().unwrap().clone();
let wal_path = wal_path_for_db(&db_path);
let log_path = std::path::Path::new(&db_path).with_extension("db-log");
{
let conn = db.connect();
let rows = get_rows(
&conn,
"SELECT k, v FROM __turso_internal_mvcc_meta ORDER BY rowid",
);
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].to_string(), "persistent_tx_ts_max");
assert_eq!(rows[0][1].as_int().unwrap(), 0);
}
db.restart();
let conn = db.connect();
let rows = get_rows(
&conn,
"SELECT k, v FROM __turso_internal_mvcc_meta ORDER BY rowid",
);
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].to_string(), "persistent_tx_ts_max");
assert_eq!(rows[0][1].as_int().unwrap(), 0);
let wal_len = wal_path.metadata().map(|m| m.len()).unwrap_or(0);
assert_eq!(
wal_len, 0,
"expected no committed WAL tail after clean boot"
);
let log_len = std::fs::metadata(&log_path).map(|m| m.len()).unwrap_or(0);
assert_eq!(
log_len, LOG_HDR_SIZE as u64,
"expected logical log to be {LOG_HDR_SIZE} bytes (bootstrap header) on clean boot"
);
}
#[turso_macros::test(encryption)]
fn test_meta_recovery_case_2_no_wal_replay_above_metadata_boundary() {
let mut db = MvccTestDbNoConn::new_maybe_encrypted(encrypted);
{
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
conn.execute("INSERT INTO t VALUES (2, 'b')").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
let meta = get_rows(
&conn,
"SELECT v FROM __turso_internal_mvcc_meta WHERE k = 'persistent_tx_ts_max'",
);
assert_eq!(meta.len(), 1);
let boundary = meta[0][0].as_int().unwrap();
assert!(
boundary >= 2,
"expected metadata boundary >= 2 after checkpoint, got {boundary}"
);
conn.execute("INSERT INTO t VALUES (3, 'c')").unwrap();
}
db.restart();
let conn = db.connect();
let rows = get_rows(&conn, "SELECT id, v FROM t ORDER BY id");
assert_eq!(rows.len(), 3);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(rows[0][1].to_string(), "a");
assert_eq!(rows[1][0].as_int().unwrap(), 2);
assert_eq!(rows[1][1].to_string(), "b");
assert_eq!(rows[2][0].as_int().unwrap(), 3);
assert_eq!(rows[2][1].to_string(), "c");
}
#[turso_macros::test(encryption)]
fn test_header_only_mutation_is_replayed_and_checkpointed() {
let mut db = MvccTestDbNoConn::new_maybe_encrypted(encrypted);
{
let conn = db.connect();
conn.execute("PRAGMA user_version = 42").unwrap();
let rows = get_rows(&conn, "PRAGMA user_version");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 42);
}
db.restart();
{
let conn = db.connect();
let rows = get_rows(&conn, "PRAGMA user_version");
assert_eq!(rows.len(), 1);
assert_eq!(
rows[0][0].as_int().unwrap(),
42,
"header mutation should recover from logical log before checkpoint",
);
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
}
db.restart();
{
let conn = db.connect();
let rows = get_rows(&conn, "PRAGMA user_version");
assert_eq!(rows.len(), 1);
assert_eq!(
rows[0][0].as_int().unwrap(),
42,
"header mutation should persist in DB header after checkpoint truncates logical log",
);
}
}
#[test]
fn test_mvcc_header_updates_require_exclusive_transaction() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("BEGIN CONCURRENT").unwrap();
let err = conn.execute("PRAGMA user_version = 42").unwrap_err();
assert!(
err.to_string().contains("exclusive transaction"),
"expected exclusive-transaction error, got: {err:?}"
);
conn.execute("ROLLBACK").unwrap();
conn.execute("BEGIN").unwrap();
conn.execute("PRAGMA user_version = 7").unwrap();
conn.execute("COMMIT").unwrap();
let rows = get_rows(&conn, "PRAGMA user_version");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 7);
}
#[test]
fn test_mvcc_header_updates_allow_autocommit_statement_tx() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("PRAGMA user_version = 19").unwrap();
let rows = get_rows(&conn, "PRAGMA user_version");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 19);
}
#[test]
#[cfg_attr(
feature = "checksum",
ignore = "byte-level tamper caught by checksum layer"
)]
fn test_meta_recovery_case_3_no_wal_log_frames_without_valid_metadata_fails_closed() {
let mut db = MvccTestDbNoConn::new_with_random_db();
let db_path = db.path.as_ref().unwrap().clone();
let metadata_root_page = {
let conn = db.connect();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
conn.execute("INSERT INTO t VALUES (2, 'b')").unwrap();
metadata_root_page(&conn)
};
force_close_for_artifact_tamper(&mut db);
tamper_db_metadata_row_value(&db_path, metadata_root_page, -1);
let wal_path = wal_path_for_db(&db_path);
let _ = std::fs::remove_file(&wal_path);
overwrite_file_with_junk(&wal_path, 0, 0);
{
let mut manager = DATABASE_MANAGER.lock();
manager.clear();
}
let io = Arc::new(PlatformIO::new().unwrap());
match Database::open_file(io, &db_path) {
Ok(db2) => match db2.connect() {
Ok(_) => panic!("expected connect to fail with Corrupt"),
Err(err) => assert!(
matches!(err, LimboError::Corrupt(_)),
"unexpected connect error: {err:?}"
),
},
Err(err) => assert!(
matches!(err, LimboError::Corrupt(_)),
"unexpected open error: {err:?}"
),
}
}
#[test]
fn test_meta_recovery_case_4_committed_wal_reconcile_before_metadata_boundary_replay() {
let mut db = MvccTestDbNoConn::new_with_random_db();
let db_path = db.path.as_ref().unwrap().clone();
let wal_path = wal_path_for_db(&db_path);
{
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
conn.execute("INSERT INTO t VALUES (2, 'b')").unwrap();
let mvcc_store = db.get_mvcc_store();
advance_checkpoint_until_wal_has_commit_frame(mvcc_store, &conn);
}
db.restart();
let conn = db.connect();
let rows = get_rows(&conn, "SELECT id, v FROM t ORDER BY id");
assert_eq!(rows.len(), 2);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(rows[1][0].as_int().unwrap(), 2);
let meta = get_rows(
&conn,
"SELECT v FROM __turso_internal_mvcc_meta WHERE k = 'persistent_tx_ts_max'",
);
assert_eq!(meta.len(), 1);
assert!(
meta[0][0].as_int().unwrap() >= 2,
"expected replay boundary to advance after committed-WAL reconciliation",
);
let wal_len = wal_path.metadata().map(|m| m.len()).unwrap_or(0);
assert_eq!(wal_len, 0, "reconciliation must truncate WAL at the end");
}
#[test]
#[cfg_attr(
feature = "checksum",
ignore = "byte-level tamper caught by checksum layer"
)]
fn test_meta_recovery_case_5_committed_wal_missing_metadata_fails_closed() {
let mut db = MvccTestDbNoConn::new_with_random_db();
let db_path = db.path.as_ref().unwrap().clone();
let wal_path = wal_path_for_db(&db_path);
let metadata_root_page = {
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
let mvcc_store = db.get_mvcc_store();
let root_page = metadata_root_page(&conn);
advance_checkpoint_until_wal_has_commit_frame(mvcc_store, &conn);
root_page
};
force_close_for_artifact_tamper(&mut db);
let mutated = tamper_wal_metadata_page_empty(&wal_path, metadata_root_page);
assert!(
mutated,
"expected metadata WAL frame to be mutated into missing-row shape"
);
{
let mut manager = DATABASE_MANAGER.lock();
manager.clear();
}
let io = Arc::new(PlatformIO::new().unwrap());
match Database::open_file(io, &db_path) {
Ok(db2) => match db2.connect() {
Ok(_) => panic!("expected connect to fail closed"),
Err(err) => assert!(matches!(err, LimboError::Corrupt(_))),
},
Err(err) => assert!(matches!(err, LimboError::Corrupt(_))),
}
}
#[test]
fn test_meta_recovery_case_6_committed_wal_corrupt_metadata_fails_closed() {
let mut db = MvccTestDbNoConn::new_with_random_db();
let db_path = db.path.as_ref().unwrap().clone();
let wal_path = wal_path_for_db(&db_path);
let metadata_root_page = {
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
let mvcc_store = db.get_mvcc_store();
let root_page = metadata_root_page(&conn);
advance_checkpoint_until_wal_has_commit_frame(mvcc_store, &conn);
root_page
};
force_close_for_artifact_tamper(&mut db);
let mutated = tamper_wal_metadata_value_serial_type(&wal_path, metadata_root_page, 0);
assert!(
mutated,
"expected at least one metadata WAL frame to be mutated"
);
{
let mut manager = DATABASE_MANAGER.lock();
manager.clear();
}
let io = Arc::new(PlatformIO::new().unwrap());
if Database::open_file(io, &db_path).is_ok_and(|db2| db2.connect().is_ok()) {
panic!("expected connect to fail closed")
}
}
#[test]
fn test_meta_recovery_case_7_metadata_table_shape_violation_fails_closed() {
let mut db = MvccTestDbNoConn::new_with_random_db();
let db_path = db.path.as_ref().unwrap().clone();
let metadata_root_page = {
let conn = db.connect();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
metadata_root_page(&conn)
};
force_close_for_artifact_tamper(&mut db);
tamper_db_metadata_value_serial_type(&db_path, metadata_root_page, 0);
let wal_path = wal_path_for_db(&db_path);
let _ = std::fs::remove_file(&wal_path);
overwrite_file_with_junk(&wal_path, 0, 0);
{
let mut manager = DATABASE_MANAGER.lock();
manager.clear();
}
let io = Arc::new(PlatformIO::new().unwrap());
if Database::open_file(io, &db_path).is_ok_and(|db2| db2.connect().is_ok()) {
panic!("expected connect to fail closed")
}
}
#[test]
#[cfg_attr(
feature = "checksum",
ignore = "byte-level tamper caught by checksum layer"
)]
fn test_meta_recovery_case_9_metadata_row_deleted_fails_closed() {
let mut db = MvccTestDbNoConn::new_with_random_db();
let db_path = db.path.as_ref().unwrap().clone();
let metadata_root_page = {
let conn = db.connect();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
conn.execute("INSERT INTO t VALUES (2, 'b')").unwrap();
metadata_root_page(&conn)
};
force_close_for_artifact_tamper(&mut db);
tamper_db_metadata_row_key(&db_path, metadata_root_page, "persistent_tx_ts_may");
let wal_path = wal_path_for_db(&db_path);
let _ = std::fs::remove_file(&wal_path);
overwrite_file_with_junk(&wal_path, 0, 0);
{
let mut manager = DATABASE_MANAGER.lock();
manager.clear();
}
let io = Arc::new(PlatformIO::new().unwrap());
match Database::open_file(io, &db_path) {
Ok(db2) => match db2.connect() {
Ok(_) => panic!("expected connect to fail closed"),
Err(err) => assert!(matches!(err, LimboError::Corrupt(_))),
},
Err(err) => assert!(matches!(err, LimboError::Corrupt(_))),
}
}
#[test]
fn test_meta_checkpoint_case_10_metadata_upsert_is_atomic_with_pager_commit() {
let mut db = MvccTestDbNoConn::new_with_random_db();
{
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
let mvcc_store = db.get_mvcc_store();
let committed_ts = mvcc_store.last_committed_tx_ts.load(Ordering::SeqCst);
assert!(committed_ts > 0);
let pager = conn.pager.load().clone();
let mut checkpoint_sm = CheckpointStateMachine::new(
pager.clone(),
mvcc_store,
conn.clone(),
true,
conn.get_sync_mode(),
);
for _ in 0..50_000 {
if checkpoint_sm.state_for_test() == CheckpointState::CheckpointWal {
break;
}
match checkpoint_sm.step(&()).unwrap() {
TransitionResult::Io(io) => io.wait(pager.io.as_ref()).unwrap(),
TransitionResult::Continue => {}
TransitionResult::Done(_) => {
panic!("checkpoint finished before expected stop window")
}
}
}
}
db.restart();
let conn = db.connect();
let rows = get_rows(&conn, "SELECT id, v FROM t ORDER BY id");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(rows[0][1].to_string(), "a");
let meta = get_rows(
&conn,
"SELECT v FROM __turso_internal_mvcc_meta WHERE k = 'persistent_tx_ts_max'",
);
assert_eq!(meta.len(), 1);
assert!(
meta[0][0].as_int().unwrap() >= 1,
"expected metadata boundary to persist with pager commit"
);
}
#[test]
fn test_prepared_select_reprepares_after_checkpoint_root_publish() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
let mut stmt = conn.prepare("SELECT id, v FROM t ORDER BY id").unwrap();
assert_eq!(stmt.stmt_status(StatementStatusCounter::Reprepare), 0);
conn.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
let rows = stmt.run_collect_rows().unwrap();
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(&rows[0][1].to_string(), "a");
assert_eq!(stmt.stmt_status(StatementStatusCounter::Reprepare), 1);
}
#[test]
fn test_prepared_select_does_not_reprepare_after_data_only_checkpoint() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
let mut stmt = conn.prepare("SELECT id, v FROM t ORDER BY id").unwrap();
assert_eq!(stmt.stmt_status(StatementStatusCounter::Reprepare), 0);
conn.execute("INSERT INTO t VALUES (2, 'b')").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
let rows = stmt.run_collect_rows().unwrap();
assert_eq!(rows.len(), 2);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(&rows[0][1].to_string(), "a");
assert_eq!(rows[1][0].as_int().unwrap(), 2);
assert_eq!(&rows[1][1].to_string(), "b");
assert_eq!(stmt.stmt_status(StatementStatusCounter::Reprepare), 0);
}
#[test]
fn test_prepared_index_lookup_reprepares_after_checkpoint_root_publish() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT, payload TEXT)")
.unwrap();
conn.execute("CREATE INDEX idx_t_v ON t(v)").unwrap();
let mut stmt = conn
.prepare("SELECT id, payload FROM t INDEXED BY idx_t_v WHERE v = 'b'")
.unwrap();
assert_eq!(stmt.stmt_status(StatementStatusCounter::Reprepare), 0);
conn.execute("INSERT INTO t VALUES (1, 'a', 'one')")
.unwrap();
conn.execute("INSERT INTO t VALUES (2, 'b', 'two')")
.unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
let rows = stmt.run_collect_rows().unwrap();
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 2);
assert_eq!(&rows[0][1].to_string(), "two");
assert_eq!(stmt.stmt_status(StatementStatusCounter::Reprepare), 1);
}
#[test]
fn test_integrity_check_after_checkpoint_io_yield_then_post_durable_failure_uses_user_apis() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT UNIQUE)")
.unwrap();
let stale_schema_conn = db.connect();
let mut stale_integrity_check = stale_schema_conn.prepare("PRAGMA integrity_check").unwrap();
let schema_version = get_rows(&conn, "PRAGMA schema_version");
assert_eq!(schema_version.len(), 1);
let schema_version_before_checkpoint = schema_version[0][0].as_int().unwrap();
conn.execute("PRAGMA mvcc_checkpoint_threshold = 0")
.unwrap();
let injector = FixedYieldInjector::new([CheckpointYieldPoint::BeforeAcquireLock.point()]);
conn.set_yield_injector(Some(injector.clone()));
let failure_injector = FixedFailureInjector::new([(
CheckpointYieldPoint::AfterDurableBoundaryAdvanced.point(),
LimboError::TxError("synthetic checkpoint failure after pager commit".to_string()),
)]);
conn.set_failure_injector(Some(failure_injector.clone()));
let mut same_conn_stale_integrity_check = conn.prepare("PRAGMA integrity_check").unwrap();
let mut insert_stmt = conn.prepare("INSERT INTO t VALUES (1, 'a')").unwrap();
let mut yielded_before_checkpoint_lock = false;
for _ in 0..10_000 {
match insert_stmt.step().unwrap() {
crate::StepResult::IO if injector.is_empty() => {
yielded_before_checkpoint_lock = true;
break;
}
crate::StepResult::IO => {}
crate::StepResult::Done => {
panic!("INSERT completed before checkpoint acquire-lock yield fired")
}
other => panic!("unexpected INSERT step result before yield: {other:?}"),
}
}
assert!(
yielded_before_checkpoint_lock,
"expected INSERT auto-checkpoint to yield before acquiring the checkpoint lock"
);
let mut completed_after_durable_boundary_failure = false;
for _ in 0..10_000 {
match insert_stmt.step() {
Ok(crate::StepResult::Done) if failure_injector.is_empty() => {
completed_after_durable_boundary_failure = true;
break;
}
Ok(crate::StepResult::Done) => {
panic!("INSERT completed before checkpoint durable-boundary failure fired")
}
Err(err) => panic!("unexpected INSERT error after yield: {err:?}"),
Ok(crate::StepResult::IO) => {}
Ok(other) => panic!("unexpected INSERT step result after yield: {other:?}"),
}
}
assert!(
completed_after_durable_boundary_failure,
"expected INSERT auto-checkpoint to observe durable-boundary failure and finish"
);
conn.set_yield_injector(None);
conn.set_failure_injector(None);
let schema_version = get_rows(&conn, "PRAGMA schema_version");
assert_eq!(schema_version.len(), 1);
assert_eq!(
schema_version[0][0].as_int().unwrap(),
schema_version_before_checkpoint
);
let rows = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
let rows = same_conn_stale_integrity_check.run_collect_rows().unwrap();
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
let rows = stale_integrity_check.run_collect_rows().unwrap();
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
let rows = get_rows(&stale_schema_conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
let rows = get_rows(&conn, "SELECT id, v FROM t");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(&rows[0][1].to_string(), "a");
}
#[test]
fn test_running_integrity_check_reprepares_after_checkpoint_root_publish() {
let db = MvccTestDbNoConn::new_with_random_db();
let writer = db.connect();
writer
.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT UNIQUE)")
.unwrap();
let stale_conn = db.connect();
let injector = FixedYieldInjector::new([TransactionYieldPoint::BeforeStart.point()]);
stale_conn.set_yield_injector(Some(injector.clone()));
let mut stale_integrity_check = stale_conn.prepare("PRAGMA integrity_check").unwrap();
assert!(
matches!(stale_integrity_check.step().unwrap(), crate::StepResult::IO)
&& injector.is_empty(),
"integrity_check should yield before opening its read transaction"
);
writer.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
writer.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
stale_conn.set_yield_injector(None);
let rows = stale_integrity_check.run_collect_rows().unwrap();
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
assert_eq!(
stale_integrity_check.stmt_status(StatementStatusCounter::Reprepare),
1
);
}
#[test]
fn test_deferred_begin_integrity_check_reprepares_after_checkpoint_root_publish() {
let db = MvccTestDbNoConn::new_with_random_db();
let writer = db.connect();
writer
.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT UNIQUE)")
.unwrap();
let stale_conn = db.connect();
stale_conn.execute("BEGIN").unwrap();
let injector = FixedYieldInjector::new([TransactionYieldPoint::BeforeStart.point()]);
stale_conn.set_yield_injector(Some(injector.clone()));
let mut stale_integrity_check = stale_conn.prepare("PRAGMA integrity_check").unwrap();
assert!(
matches!(stale_integrity_check.step().unwrap(), crate::StepResult::IO)
&& injector.is_empty(),
"integrity_check should yield before opening its read transaction"
);
writer.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
writer.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
stale_conn.set_yield_injector(None);
let rows = stale_integrity_check.run_collect_rows().unwrap();
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
assert_eq!(
stale_integrity_check.stmt_status(StatementStatusCounter::Reprepare),
1
);
stale_conn.execute("COMMIT").unwrap();
}
#[test]
fn test_running_integrity_check_reprepares_without_schema_cookie_bump() {
let db = MvccTestDbNoConn::new_with_random_db();
let writer = db.connect();
writer
.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT UNIQUE)")
.unwrap();
let stale_conn = db.connect();
let injector = FixedYieldInjector::new([TransactionYieldPoint::BeforeStart.point()]);
stale_conn.set_yield_injector(Some(injector.clone()));
let mut stale_integrity_check = stale_conn.prepare("PRAGMA integrity_check").unwrap();
assert_eq!(
stale_integrity_check.stmt_status(StatementStatusCounter::Reprepare),
0
);
let schema_version_before = get_rows(&writer, "PRAGMA schema_version")[0][0]
.as_int()
.unwrap();
assert!(
matches!(stale_integrity_check.step().unwrap(), crate::StepResult::IO)
&& injector.is_empty(),
"integrity_check should yield before opening its read transaction"
);
writer.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
writer.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
let schema_version_after = get_rows(&writer, "PRAGMA schema_version")[0][0]
.as_int()
.unwrap();
assert_eq!(
schema_version_after, schema_version_before,
"checkpoint root publication must not change SQLite's schema cookie"
);
stale_conn.set_yield_injector(None);
let rows = stale_integrity_check.run_collect_rows().unwrap();
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
assert_eq!(
stale_integrity_check.stmt_status(StatementStatusCounter::Reprepare),
1
);
}
#[test]
fn test_meta_checkpoint_case_11_auto_checkpoint_failure_after_commit_remains_recoverable() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
let mvcc_store = db.get_mvcc_store();
let ts1 = mvcc_store.last_committed_tx_ts.load(Ordering::SeqCst);
assert!(ts1 > 0, "expected committed timestamp for first insert");
let pager = conn.pager.load().clone();
let mut checkpoint_sm = CheckpointStateMachine::new(
pager.clone(),
mvcc_store.clone(),
conn.clone(),
true,
conn.get_sync_mode(),
);
let mut reached_truncate = false;
for _ in 0..50_000 {
if checkpoint_sm.state_for_test() == CheckpointState::TruncateLogicalLog {
reached_truncate = true;
break; }
match checkpoint_sm.step(&()).unwrap() {
TransitionResult::Io(io) => io.wait(pager.io.as_ref()).unwrap(),
TransitionResult::Continue => {}
TransitionResult::Done(_) => {
panic!("checkpoint finished before reaching truncate state")
}
}
}
assert!(
reached_truncate,
"expected to reach TruncateLogicalLog state"
);
let durable_boundary = mvcc_store.durable_txid_max.load(Ordering::SeqCst);
assert!(
durable_boundary >= ts1,
"expected in-memory durable checkpoint boundary to advance after pager commit: boundary={durable_boundary} ts1={ts1}"
);
let sync_mode = conn.get_sync_mode();
let checkpoint_sm2 = CheckpointStateMachine::new(pager, mvcc_store, conn, true, sync_mode);
let (old_boundary, _) = checkpoint_sm2.checkpoint_bounds_for_test();
assert!(
old_boundary.unwrap_or_default() >= ts1,
"expected retry checkpoint to start from durable boundary: old={old_boundary:?} ts1={ts1}"
);
}
#[test]
fn test_checkpoint_resamples_boundary_before_starting() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("PRAGMA mvcc_checkpoint_threshold = -1")
.unwrap();
conn.execute(
"CREATE TABLE dry_floor_846 (
sour_sand_972 BLOB UNIQUE,
sour_river_140 REAL,
sweet_wall_518 BLOB,
fast_grass_379 TEXT,
dark_wave_139 REAL UNIQUE,
sad_wind_216 INTEGER UNIQUE PRIMARY KEY
)",
)
.unwrap();
conn.execute(
"INSERT INTO dry_floor_846 (
sour_sand_972, sour_river_140, sweet_wall_518,
fast_grass_379, dark_wave_139, sad_wind_216
) VALUES (
zeroblob(16), 6.85, x'736d6172745f6c6561665f353637',
'wild_hill_714', 8.43, 788
)",
)
.unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
let mvcc_store = db.get_mvcc_store();
let first_boundary = mvcc_store.durable_txid_max.load(Ordering::SeqCst);
assert!(first_boundary > 0);
conn.execute(
"UPDATE dry_floor_846
SET sour_sand_972 = x'66756c6c5f737461725f333732',
sour_river_140 = 5.75,
sweet_wall_518 = zeroblob(32),
fast_grass_379 = 'old_moon_16',
dark_wave_139 = 2.90
WHERE sad_wind_216 = 788",
)
.unwrap();
let update_ts = mvcc_store.last_committed_tx_ts.load(Ordering::SeqCst);
assert!(update_ts > first_boundary);
let delayed_conn = db.connect();
let delayed_pager = delayed_conn.pager.load().clone();
let mut delayed_checkpoint = CheckpointStateMachine::new(
delayed_pager.clone(),
mvcc_store.clone(),
delayed_conn.clone(),
true,
delayed_conn.get_sync_mode(),
);
let (old_boundary, _) = delayed_checkpoint.checkpoint_bounds_for_test();
assert_eq!(old_boundary, Some(first_boundary));
let interrupted_conn = db.connect();
let interrupted_pager = interrupted_conn.pager.load().clone();
let mut interrupted_checkpoint = CheckpointStateMachine::new(
interrupted_pager.clone(),
mvcc_store.clone(),
interrupted_conn.clone(),
true,
interrupted_conn.get_sync_mode(),
);
let mut reached_wal_checkpoint = false;
for _ in 0..50_000 {
if interrupted_checkpoint.state_for_test() == CheckpointState::CheckpointWal {
reached_wal_checkpoint = true;
break;
}
match interrupted_checkpoint.step(&()).unwrap() {
TransitionResult::Io(io) => io.wait(interrupted_pager.io.as_ref()).unwrap(),
TransitionResult::Continue => {}
TransitionResult::Done(_) => {
panic!("checkpoint finished before reaching WAL checkpoint")
}
}
}
assert!(
reached_wal_checkpoint,
"expected checkpoint to reach WAL checkpoint"
);
assert_eq!(
mvcc_store.durable_txid_max.load(Ordering::SeqCst),
update_ts
);
interrupted_checkpoint.cleanup_after_external_io_error();
let mut finished = false;
for _ in 0..50_000 {
match delayed_checkpoint.step(&()).unwrap() {
TransitionResult::Io(io) => io.wait(delayed_pager.io.as_ref()).unwrap(),
TransitionResult::Continue => {}
TransitionResult::Done(_) => {
finished = true;
break;
}
}
}
assert!(finished, "delayed checkpoint did not finish");
let rows = get_rows(
&conn,
"SELECT sad_wind_216, dark_wave_139, hex(sour_sand_972)
FROM dry_floor_846
WHERE sad_wind_216 = 788",
);
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 788);
assert_eq!(rows[0][1].to_string(), "2.9");
assert_eq!(&rows[0][2].to_string(), "66756C6C5F737461725F333732");
let integrity = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(integrity.len(), 1);
assert_eq!(&integrity[0][0].to_string(), "ok");
}
#[test]
fn test_checkpoint_resamples_boundary_before_starting_with_yield_injection() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("PRAGMA mvcc_checkpoint_threshold = -1")
.unwrap();
conn.execute(
"CREATE TABLE dry_floor_846 (
sour_sand_972 BLOB UNIQUE,
sour_river_140 REAL,
sweet_wall_518 BLOB,
fast_grass_379 TEXT,
dark_wave_139 REAL UNIQUE,
sad_wind_216 INTEGER UNIQUE PRIMARY KEY
)",
)
.unwrap();
conn.execute(
"INSERT INTO dry_floor_846 (
sour_sand_972, sour_river_140, sweet_wall_518,
fast_grass_379, dark_wave_139, sad_wind_216
) VALUES (
zeroblob(16), 6.85, x'736d6172745f6c6561665f353637',
'wild_hill_714', 8.43, 788
)",
)
.unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
let mvcc_store = db.get_mvcc_store();
let first_boundary = mvcc_store.durable_txid_max.load(Ordering::SeqCst);
assert!(first_boundary > 0);
conn.execute(
"UPDATE dry_floor_846
SET sour_sand_972 = x'66756c6c5f737461725f333732',
sour_river_140 = 5.75,
sweet_wall_518 = zeroblob(32),
fast_grass_379 = 'old_moon_16',
dark_wave_139 = 2.90
WHERE sad_wind_216 = 788",
)
.unwrap();
let update_ts = mvcc_store.last_committed_tx_ts.load(Ordering::SeqCst);
assert!(update_ts > first_boundary);
let delayed_conn = db.connect();
delayed_conn.set_yield_injector(Some(FixedYieldInjector::new([
CheckpointYieldPoint::BeforeAcquireLock.point(),
])));
let mut delayed_checkpoint = delayed_conn.prepare("PRAGMA journal_mode = 'wal'").unwrap();
assert!(
matches!(delayed_checkpoint.step().unwrap(), StepResult::IO),
"first checkpoint should yield before acquiring the checkpoint lock"
);
let interleaving_conn = db.connect();
interleaving_conn.set_failure_injector(Some(FixedFailureInjector::new([(
CheckpointYieldPoint::AfterDurableBoundaryAdvanced.point(),
LimboError::TxError("synthetic checkpoint failure after pager commit".to_string()),
)])));
interleaving_conn
.execute("PRAGMA wal_checkpoint(TRUNCATE)")
.expect_err("interleaving checkpoint should fail after advancing durable boundary");
interleaving_conn.set_failure_injector(None);
assert_eq!(
mvcc_store.durable_txid_max.load(Ordering::SeqCst),
update_ts
);
let journal_mode_rows = delayed_checkpoint.run_collect_rows().unwrap();
assert_eq!(journal_mode_rows.len(), 1);
assert_eq!(&journal_mode_rows[0][0].to_string(), "wal");
let rows = get_rows(
&conn,
"SELECT sad_wind_216, dark_wave_139, hex(sour_sand_972)
FROM dry_floor_846
WHERE sad_wind_216 = 788",
);
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 788);
assert_eq!(rows[0][1].to_string(), "2.9");
assert_eq!(&rows[0][2].to_string(), "66756C6C5F737461725F333732");
let integrity = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(integrity.len(), 1);
assert_eq!(&integrity[0][0].to_string(), "ok");
}
#[test]
fn test_checkpoint_retry_does_not_replay_checkpointed_btree_resident_unique_delete() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("PRAGMA mvcc_checkpoint_threshold = -1")
.unwrap();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("CREATE UNIQUE INDEX idx_t_v ON t(v)").unwrap();
conn.execute("INSERT INTO t VALUES (75, 'blue_river_906')")
.unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
conn.execute("BEGIN CONCURRENT").unwrap();
conn.execute("DELETE FROM t WHERE id = 75").unwrap();
conn.execute("ROLLBACK").unwrap();
conn.execute("UPDATE t SET v = 'old_path_352' WHERE id = 75")
.unwrap();
let rows = get_rows(&conn, "SELECT id FROM t WHERE v = 'blue_river_906'");
assert!(
rows.is_empty(),
"old unique key should no longer be visible"
);
let rows = get_rows(&conn, "SELECT id FROM t WHERE v = 'old_path_352'");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 75);
let ckpt = db.connect();
ckpt.set_failure_injector(Some(FixedFailureInjector::new([(
CheckpointYieldPoint::AfterDurableBoundaryAdvanced.point(),
LimboError::TxError("synthetic checkpoint failure after pager commit".to_string()),
)])));
let err = ckpt
.execute("PRAGMA wal_checkpoint(TRUNCATE)")
.expect_err("checkpoint should fail");
assert!(
matches!(err, LimboError::TxError(_)),
"expected injected checkpoint failure, got: {err:?}"
);
conn.execute("UPDATE t SET v = 'empty_path_27' WHERE id = 75")
.unwrap();
conn.execute("BEGIN CONCURRENT").unwrap();
conn.execute("UPDATE t SET v = 'shy_cloud_434' WHERE id = 75")
.unwrap();
conn.execute("COMMIT").unwrap();
let rows = get_rows(&conn, "SELECT id FROM t WHERE v = 'empty_path_27'");
assert!(
rows.is_empty(),
"intermediate unique key should not remain visible"
);
let rows = get_rows(&conn, "SELECT id FROM t WHERE v = 'shy_cloud_434'");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 75);
let retry_conn = db.connect();
retry_conn
.execute("PRAGMA wal_checkpoint(TRUNCATE)")
.expect("retry checkpoint should not replay the already-durable blue delete");
let rows = get_rows(&conn, "SELECT id, v FROM t WHERE id = 75");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 75);
assert_eq!(rows[0][1].cast_text().unwrap(), "shy_cloud_434");
let rows = get_rows(&conn, "SELECT id FROM t WHERE v = 'blue_river_906'");
assert!(
rows.is_empty(),
"blue key should stay absent after checkpoint retry"
);
let rows = get_rows(&conn, "SELECT id FROM t WHERE v = 'old_path_352'");
assert!(
rows.is_empty(),
"old_path key should stay absent after checkpoint retry"
);
let rows = get_rows(&conn, "SELECT id FROM t WHERE v = 'empty_path_27'");
assert!(
rows.is_empty(),
"empty_path key should stay absent after checkpoint retry"
);
let rows = get_rows(&conn, "SELECT id FROM t WHERE v = 'shy_cloud_434'");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 75);
let integrity = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(integrity.len(), 1);
assert_eq!(&integrity[0][0].to_string(), "ok");
}
#[test]
fn test_checkpoint_stale_unique_index_delete_with_out_of_order_commit_yield() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("PRAGMA mvcc_checkpoint_threshold = -1")
.unwrap();
conn.execute("CREATE TABLE t (id INTEGER PRIMARY KEY, v TEXT UNIQUE)")
.unwrap();
conn.execute("CREATE TABLE s (id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO s VALUES (1, 'first')").unwrap();
conn.execute("INSERT INTO t VALUES (1, 'first')").unwrap();
conn.execute("INSERT INTO t VALUES (2, 'second')").unwrap();
conn.execute("INSERT INTO t VALUES (75, 'blue_river_906')")
.unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
let older = db.connect();
older.execute("BEGIN CONCURRENT").unwrap();
older
.execute("UPDATE s SET v = 'older_commit' WHERE id = 1")
.unwrap();
older.set_yield_injector(Some(FixedYieldInjector::new([
CommitYieldPoint::LogRecordPrepared.point(),
])));
let mut older_commit = older.prepare("COMMIT").unwrap();
assert!(
matches!(older_commit.step().unwrap(), StepResult::IO),
"older commit should yield after taking its commit timestamp"
);
let updater = db.connect();
updater.execute("BEGIN CONCURRENT").unwrap();
updater
.execute("UPDATE t SET v = 'old_path_352' WHERE id = 75")
.unwrap();
updater.execute("COMMIT").unwrap();
older_commit.run_ignore_rows().unwrap();
drop(older_commit);
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
let rows = get_rows(&conn, "SELECT id, v FROM t WHERE id = 75");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 75);
assert_eq!(&rows[0][1].to_string(), "old_path_352");
}
#[test]
fn test_checkpoint_stale_boundary_does_not_replay_checkpointed_create_table_after_restart() {
let mut db = MvccTestDbNoConn::new_with_random_db();
{
let conn = db.connect();
conn.execute("PRAGMA mvcc_checkpoint_threshold = -1")
.unwrap();
conn.execute("CREATE TABLE s (id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO s VALUES (1, 'first')").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
let older = db.connect();
older.execute("BEGIN CONCURRENT").unwrap();
older
.execute("UPDATE s SET v = 'older_commit' WHERE id = 1")
.unwrap();
older.set_yield_injector(Some(FixedYieldInjector::new([
CommitYieldPoint::BeforeCommittedTimestampWatermarkUpdate.point(),
])));
let mut older_commit = older.prepare("COMMIT").unwrap();
assert!(
matches!(older_commit.step().unwrap(), StepResult::IO),
"older commit should yield before updating the committed timestamp watermark"
);
let creator = db.connect();
creator
.execute("CREATE TABLE created_after_yield (id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
creator
.execute("INSERT INTO created_after_yield VALUES (1, 'persisted')")
.unwrap();
older_commit.run_ignore_rows().unwrap();
drop(older_commit);
conn.set_failure_injector(Some(FixedFailureInjector::new([(
CheckpointYieldPoint::AfterDurableBoundaryAdvanced.point(),
LimboError::TxError("synthetic checkpoint failure after pager commit".to_string()),
)])));
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
.expect_err("checkpoint should fail after pager commit");
conn.set_failure_injector(None);
};
db.restart();
let conn = db.connect();
let rows = get_rows(&conn, "SELECT id, v FROM created_after_yield ORDER BY id");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(&rows[0][1].to_string(), "persisted");
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
let rows = get_rows(&conn, "SELECT id, v FROM created_after_yield ORDER BY id");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(&rows[0][1].to_string(), "persisted");
}
#[test]
#[cfg_attr(
feature = "checksum",
ignore = "byte-level tamper caught by checksum layer"
)]
fn test_meta_recovery_case_12_replay_gate_skips_at_or_below_metadata_boundary() {
let mut db = MvccTestDbNoConn::new_with_random_db();
let db_path = db.path.as_ref().unwrap().clone();
let boundary = {
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
conn.execute("INSERT INTO t VALUES (2, 'b')").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
let root_page = metadata_root_page(&conn);
conn.execute("INSERT INTO t VALUES (3, 'c')").unwrap();
let ts3 = db
.get_mvcc_store()
.last_committed_tx_ts
.load(Ordering::SeqCst);
drop(conn);
force_close_for_artifact_tamper(&mut db);
tamper_db_metadata_row_value_by_key(
&db_path,
root_page,
MVCC_META_KEY_PERSISTENT_TX_TS_MAX,
ts3 as i64,
);
ts3
};
db.restart();
let conn = db.connect();
let rows = get_rows(&conn, "SELECT id, v FROM t ORDER BY id");
assert_eq!(rows.len(), 2);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(rows[1][0].as_int().unwrap(), 2);
let meta = get_rows(
&conn,
"SELECT v FROM __turso_internal_mvcc_meta WHERE k = 'persistent_tx_ts_max'",
);
assert_eq!(meta.len(), 1);
assert_eq!(meta[0][0].as_int().unwrap() as u64, boundary);
}
#[test]
fn test_mvcc_memory_keeps_builtin_table_valued_functions() {
let db = MvccTestDb::new();
let rows = get_rows(&db.conn, "SELECT value FROM generate_series(1,3)");
assert_eq!(rows.len(), 3);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(rows[1][0].as_int().unwrap(), 2);
assert_eq!(rows[2][0].as_int().unwrap(), 3);
}
#[test]
fn test_insert_read() {
let db = MvccTestDb::new();
let tx1 = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
let tx1_row = generate_simple_string_row((-2).into(), 1, "Hello");
db.mvcc_store.insert(tx1, tx1_row.clone()).unwrap();
let row = db
.mvcc_store
.read(
tx1,
&RowID {
table_id: (-2).into(),
row_id: RowKey::Int(1),
},
)
.unwrap()
.unwrap();
assert_eq!(tx1_row, row);
commit_tx(db.mvcc_store.clone(), &db.conn, tx1).unwrap();
let tx2 = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
let row = db
.mvcc_store
.read(
tx2,
&RowID {
table_id: (-2).into(),
row_id: RowKey::Int(1),
},
)
.unwrap()
.unwrap();
assert_eq!(tx1_row, row);
}
#[test]
fn test_read_nonexistent() {
let db = MvccTestDb::new();
let tx = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
let row = db.mvcc_store.read(
tx,
&RowID {
table_id: (-2).into(),
row_id: RowKey::Int(1),
},
);
assert!(row.unwrap().is_none());
}
#[test]
fn test_delete() {
let db = MvccTestDb::new();
let tx1 = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
let tx1_row = generate_simple_string_row((-2).into(), 1, "Hello");
db.mvcc_store.insert(tx1, tx1_row.clone()).unwrap();
let row = db
.mvcc_store
.read(
tx1,
&RowID {
table_id: (-2).into(),
row_id: RowKey::Int(1),
},
)
.unwrap()
.unwrap();
assert_eq!(tx1_row, row);
db.mvcc_store
.delete(
tx1,
RowID {
table_id: (-2).into(),
row_id: RowKey::Int(1),
},
)
.unwrap();
let row = db
.mvcc_store
.read(
tx1,
&RowID {
table_id: (-2).into(),
row_id: RowKey::Int(1),
},
)
.unwrap();
assert!(row.is_none());
commit_tx(db.mvcc_store.clone(), &db.conn, tx1).unwrap();
let tx2 = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
let row = db
.mvcc_store
.read(
tx2,
&RowID {
table_id: (-2).into(),
row_id: RowKey::Int(1),
},
)
.unwrap();
assert!(row.is_none());
}
#[test]
fn test_delete_nonexistent() {
let db = MvccTestDb::new();
let tx = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
assert!(!db
.mvcc_store
.delete(
tx,
RowID {
table_id: (-2).into(),
row_id: RowKey::Int(1)
},
)
.unwrap());
}
#[test]
fn test_commit() {
let db = MvccTestDb::new();
let tx1 = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
let tx1_row = generate_simple_string_row((-2).into(), 1, "Hello");
db.mvcc_store.insert(tx1, tx1_row.clone()).unwrap();
let row = db
.mvcc_store
.read(
tx1,
&RowID {
table_id: (-2).into(),
row_id: RowKey::Int(1),
},
)
.unwrap()
.unwrap();
assert_eq!(tx1_row, row);
let tx1_updated_row = generate_simple_string_row((-2).into(), 1, "World");
db.mvcc_store.update(tx1, tx1_updated_row.clone()).unwrap();
let row = db
.mvcc_store
.read(
tx1,
&RowID {
table_id: (-2).into(),
row_id: RowKey::Int(1),
},
)
.unwrap()
.unwrap();
assert_eq!(tx1_updated_row, row);
commit_tx(db.mvcc_store.clone(), &db.conn, tx1).unwrap();
let tx2 = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
let row = db
.mvcc_store
.read(
tx2,
&RowID {
table_id: (-2).into(),
row_id: RowKey::Int(1),
},
)
.unwrap()
.unwrap();
commit_tx(db.mvcc_store.clone(), &db.conn, tx2).unwrap();
assert_eq!(tx1_updated_row, row);
db.mvcc_store.drop_unused_row_versions();
}
#[test]
fn test_rollback() {
let db = MvccTestDb::new();
let tx1 = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
let row1 = generate_simple_string_row((-2).into(), 1, "Hello");
db.mvcc_store.insert(tx1, row1.clone()).unwrap();
let row2 = db
.mvcc_store
.read(
tx1,
&RowID {
table_id: (-2).into(),
row_id: RowKey::Int(1),
},
)
.unwrap()
.unwrap();
assert_eq!(row1, row2);
let row3 = generate_simple_string_row((-2).into(), 1, "World");
db.mvcc_store.update(tx1, row3.clone()).unwrap();
let row4 = db
.mvcc_store
.read(
tx1,
&RowID {
table_id: (-2).into(),
row_id: RowKey::Int(1),
},
)
.unwrap()
.unwrap();
assert_eq!(row3, row4);
db.mvcc_store.rollback_tx(
tx1,
db.conn.pager.load().clone(),
&db.conn,
crate::MAIN_DB_ID,
);
let tx2 = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
let row5 = db
.mvcc_store
.read(
tx2,
&RowID {
table_id: (-2).into(),
row_id: RowKey::Int(1),
},
)
.unwrap();
assert_eq!(row5, None);
}
#[test]
fn test_dirty_write() {
let db = MvccTestDb::new();
let tx1 = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
let tx1_row = generate_simple_string_row((-2).into(), 1, "Hello");
db.mvcc_store.insert(tx1, tx1_row.clone()).unwrap();
let row = db
.mvcc_store
.read(
tx1,
&RowID {
table_id: (-2).into(),
row_id: RowKey::Int(1),
},
)
.unwrap()
.unwrap();
assert_eq!(tx1_row, row);
let conn2 = db.db.connect().unwrap();
let tx2 = db.mvcc_store.begin_tx(conn2.pager.load().clone()).unwrap();
let tx2_row = generate_simple_string_row((-2).into(), 1, "World");
assert!(!db.mvcc_store.update(tx2, tx2_row).unwrap());
let row = db
.mvcc_store
.read(
tx1,
&RowID {
table_id: (-2).into(),
row_id: RowKey::Int(1),
},
)
.unwrap()
.unwrap();
assert_eq!(tx1_row, row);
}
#[test]
fn test_dirty_read() {
let db = MvccTestDb::new();
let tx1 = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
let row1 = generate_simple_string_row((-2).into(), 1, "Hello");
db.mvcc_store.insert(tx1, row1).unwrap();
let conn2 = db.db.connect().unwrap();
let tx2 = db.mvcc_store.begin_tx(conn2.pager.load().clone()).unwrap();
let row2 = db
.mvcc_store
.read(
tx2,
&RowID {
table_id: (-2).into(),
row_id: RowKey::Int(1),
},
)
.unwrap();
assert_eq!(row2, None);
}
#[test]
fn test_dirty_read_deleted() {
let db = MvccTestDb::new();
let tx1 = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
let tx1_row = generate_simple_string_row((-2).into(), 1, "Hello");
db.mvcc_store.insert(tx1, tx1_row.clone()).unwrap();
commit_tx(db.mvcc_store.clone(), &db.conn, tx1).unwrap();
let conn2 = db.db.connect().unwrap();
let tx2 = db.mvcc_store.begin_tx(conn2.pager.load().clone()).unwrap();
assert!(db
.mvcc_store
.delete(
tx2,
RowID {
table_id: (-2).into(),
row_id: RowKey::Int(1)
},
)
.unwrap());
let conn3 = db.db.connect().unwrap();
let tx3 = db.mvcc_store.begin_tx(conn3.pager.load().clone()).unwrap();
let row = db
.mvcc_store
.read(
tx3,
&RowID {
table_id: (-2).into(),
row_id: RowKey::Int(1),
},
)
.unwrap()
.unwrap();
assert_eq!(tx1_row, row);
}
#[test]
fn test_fuzzy_read() {
let db = MvccTestDb::new();
let tx1 = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
let tx1_row = generate_simple_string_row((-2).into(), 1, "First");
db.mvcc_store.insert(tx1, tx1_row.clone()).unwrap();
let row = db
.mvcc_store
.read(
tx1,
&RowID {
table_id: (-2).into(),
row_id: RowKey::Int(1),
},
)
.unwrap()
.unwrap();
assert_eq!(tx1_row, row);
commit_tx(db.mvcc_store.clone(), &db.conn, tx1).unwrap();
let conn2 = db.db.connect().unwrap();
let tx2 = db.mvcc_store.begin_tx(conn2.pager.load().clone()).unwrap();
let row = db
.mvcc_store
.read(
tx2,
&RowID {
table_id: (-2).into(),
row_id: RowKey::Int(1),
},
)
.unwrap()
.unwrap();
assert_eq!(tx1_row, row);
let conn3 = db.db.connect().unwrap();
let tx3 = db.mvcc_store.begin_tx(conn3.pager.load().clone()).unwrap();
let tx3_row = generate_simple_string_row((-2).into(), 1, "Second");
db.mvcc_store.update(tx3, tx3_row).unwrap();
commit_tx(db.mvcc_store.clone(), &conn3, tx3).unwrap();
let row = db
.mvcc_store
.read(
tx2,
&RowID {
table_id: (-2).into(),
row_id: RowKey::Int(1),
},
)
.unwrap()
.unwrap();
assert_eq!(tx1_row, row);
let tx2_newrow = generate_simple_string_row((-2).into(), 1, "Third");
let update_result = db.mvcc_store.update(tx2, tx2_newrow);
assert!(matches!(update_result, Err(LimboError::WriteWriteConflict)));
}
#[test]
fn test_lost_update() {
let db = MvccTestDb::new();
let tx1 = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
let tx1_row = generate_simple_string_row((-2).into(), 1, "Hello");
db.mvcc_store.insert(tx1, tx1_row.clone()).unwrap();
let row = db
.mvcc_store
.read(
tx1,
&RowID {
table_id: (-2).into(),
row_id: RowKey::Int(1),
},
)
.unwrap()
.unwrap();
assert_eq!(tx1_row, row);
commit_tx(db.mvcc_store.clone(), &db.conn, tx1).unwrap();
let conn2 = db.db.connect().unwrap();
let tx2 = db.mvcc_store.begin_tx(conn2.pager.load().clone()).unwrap();
let tx2_row = generate_simple_string_row((-2).into(), 1, "World");
assert!(db.mvcc_store.update(tx2, tx2_row.clone()).unwrap());
let conn3 = db.db.connect().unwrap();
let tx3 = db.mvcc_store.begin_tx(conn3.pager.load().clone()).unwrap();
let tx3_row = generate_simple_string_row((-2).into(), 1, "Hello, world!");
assert!(matches!(
db.mvcc_store.update(tx3, tx3_row),
Err(LimboError::WriteWriteConflict)
));
db.mvcc_store
.rollback_tx(tx3, conn3.pager.load().clone(), &conn3, crate::MAIN_DB_ID);
commit_tx(db.mvcc_store.clone(), &conn2, tx2).unwrap();
assert!(matches!(
commit_tx(db.mvcc_store.clone(), &conn3, tx3),
Err(LimboError::TxTerminated)
));
let conn4 = db.db.connect().unwrap();
let tx4 = db.mvcc_store.begin_tx(conn4.pager.load().clone()).unwrap();
let row = db
.mvcc_store
.read(
tx4,
&RowID {
table_id: (-2).into(),
row_id: RowKey::Int(1),
},
)
.unwrap()
.unwrap();
assert_eq!(tx2_row, row);
}
#[test]
fn test_committed_visibility() {
let db = MvccTestDb::new();
let tx1 = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
let tx1_row = generate_simple_string_row((-2).into(), 1, "10");
db.mvcc_store.insert(tx1, tx1_row.clone()).unwrap();
commit_tx(db.mvcc_store.clone(), &db.conn, tx1).unwrap();
let conn2 = db.db.connect().unwrap();
let tx2 = db.mvcc_store.begin_tx(conn2.pager.load().clone()).unwrap();
let tx2_row = generate_simple_string_row((-2).into(), 1, "20");
assert!(db.mvcc_store.update(tx2, tx2_row.clone()).unwrap());
let row = db
.mvcc_store
.read(
tx2,
&RowID {
table_id: (-2).into(),
row_id: RowKey::Int(1),
},
)
.unwrap()
.unwrap();
assert_eq!(row, tx2_row);
let conn3 = db.db.connect().unwrap();
let tx3 = db.mvcc_store.begin_tx(conn3.pager.load().clone()).unwrap();
let row = db
.mvcc_store
.read(
tx3,
&RowID {
table_id: (-2).into(),
row_id: RowKey::Int(1),
},
)
.unwrap()
.unwrap();
assert_eq!(tx1_row, row);
}
#[test]
fn test_future_row() {
let db = MvccTestDb::new();
let tx1 = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
let conn2 = db.db.connect().unwrap();
let tx2 = db.mvcc_store.begin_tx(conn2.pager.load().clone()).unwrap();
let tx2_row = generate_simple_string_row((-2).into(), 1, "Hello");
db.mvcc_store.insert(tx2, tx2_row).unwrap();
let row = db
.mvcc_store
.read(
tx1,
&RowID {
table_id: (-2).into(),
row_id: RowKey::Int(1),
},
)
.unwrap();
assert_eq!(row, None);
commit_tx(db.mvcc_store.clone(), &conn2, tx2).unwrap();
let row = db
.mvcc_store
.read(
tx1,
&RowID {
table_id: (-2).into(),
row_id: RowKey::Int(1),
},
)
.unwrap();
assert_eq!(row, None);
}
use crate::mvcc::cursor::MvccLazyCursor;
use crate::mvcc::database::CommitYieldPoint::LogRecordPrepared;
use crate::mvcc::database::{MvStore, Row, RowID};
use crate::types::Text;
use crate::Value;
use crate::{Database, StepResult};
use crate::{MemoryIO, Statement};
use crate::{ValueRef, DATABASE_MANAGER};
fn setup_test_db() -> (MvccTestDb, u64, MVTableId, i64) {
let db = MvccTestDb::new();
db.conn
.execute("CREATE TABLE mvcc_lazy_gap_test(x INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
let root_page = get_rows(
&db.conn,
"SELECT rootpage FROM sqlite_schema WHERE type = 'table' AND name = 'mvcc_lazy_gap_test'",
)[0][0]
.as_int()
.unwrap();
let table_id = db.mvcc_store.get_table_id_from_root_page(root_page);
let btree_root_page = root_page.abs();
let tx_id = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
let test_rows = [
(5, "row5"),
(10, "row10"),
(15, "row15"),
(20, "row20"),
(30, "row30"),
];
for (row_id, data) in test_rows.iter() {
let id = RowID::new(table_id, RowKey::Int(*row_id));
let record = ImmutableRecord::from_values(&[Value::Text(Text::new(data.to_string()))], 1);
let row = Row::new_table_row(id, record.as_blob().to_vec(), 1);
db.mvcc_store.insert(tx_id, row).unwrap();
}
commit_tx(db.mvcc_store.clone(), &db.conn, tx_id).unwrap();
let tx_id = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
(db, tx_id, table_id, btree_root_page)
}
fn setup_lazy_db(initial_keys: &[i64]) -> (MvccTestDb, u64, MVTableId, i64) {
let db = MvccTestDb::new();
db.conn
.execute("CREATE TABLE mvcc_lazy_basic_test(x INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
let root_page = get_rows(
&db.conn,
"SELECT rootpage FROM sqlite_schema WHERE type = 'table' AND name = 'mvcc_lazy_basic_test'",
)[0][0]
.as_int()
.unwrap();
let table_id = db.mvcc_store.get_table_id_from_root_page(root_page);
let btree_root_page = root_page.abs();
let tx_id = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
for i in initial_keys {
let id = RowID::new(table_id, RowKey::Int(*i));
let data = format!("row{i}");
let record = ImmutableRecord::from_values(&[Value::Text(Text::new(data))], 1);
let row = Row::new_table_row(id, record.as_blob().to_vec(), 1);
db.mvcc_store.insert(tx_id, row).unwrap();
}
commit_tx(db.mvcc_store.clone(), &db.conn, tx_id).unwrap();
let tx_id = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
(db, tx_id, table_id, btree_root_page)
}
#[test]
fn test_mvcc_cursor_next_yields_with_injected_yield() {
let db = MvccTestDb::new();
db.conn
.execute("CREATE TABLE cursor_yield_test(x INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
let root_page = get_rows(
&db.conn,
"SELECT rootpage FROM sqlite_schema WHERE type = 'table' AND name = 'cursor_yield_test'",
)[0][0]
.as_int()
.unwrap();
let table_id = db.mvcc_store.get_table_id_from_root_page(root_page);
let tx_id = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
db.conn.set_yield_injector(Some(FixedYieldInjector::new([
CursorYieldPoint::NextStart.point()
])));
let mut cursor = MvccLazyCursor::new(
db.mvcc_store.clone(),
&db.conn,
tx_id,
i64::from(table_id),
MvccCursorType::Table,
Box::new(BTreeCursor::new(
db.conn.pager.load().clone(),
root_page.abs(),
1,
)),
)
.unwrap();
let saw_yield = matches!(
cursor.next().unwrap(),
IOResult::IO(io) if io.is_explicit_yield()
);
db.mvcc_store
.rollback_tx(tx_id, db.conn.pager.load().clone(), db.conn.as_ref(), 0);
assert!(
saw_yield,
"MVCC cursor should inject an explicit yield on the first next() transition",
);
}
pub(crate) fn commit_tx(
mv_store: Arc<MvStore<MvccClock>>,
conn: &Arc<Connection>,
tx_id: u64,
) -> Result<()> {
let mut sm = mv_store.commit_tx(tx_id, conn, crate::MAIN_DB_ID).unwrap();
loop {
let res = sm.step(&mv_store)?;
match res {
IOResult::IO(io) => {
io.wait(conn.db.io.as_ref())?;
}
IOResult::Done(_) => break,
}
}
assert!(sm.is_finalized());
Ok(())
}
pub(crate) fn commit_tx_no_conn(
db: &MvccTestDbNoConn,
tx_id: u64,
conn: &Arc<Connection>,
) -> Result<(), LimboError> {
let mv_store = db.get_mvcc_store();
let mut sm = mv_store.commit_tx(tx_id, conn, crate::MAIN_DB_ID).unwrap();
loop {
let res = sm.step(&mv_store)?;
match res {
IOResult::IO(io) => {
io.wait(conn.db.io.as_ref())?;
}
IOResult::Done(_) => break,
}
}
assert!(sm.is_finalized());
Ok(())
}
#[test]
fn test_lazy_scan_cursor_basic() {
let (db, tx_id, table_id, btree_root_page) = setup_lazy_db(&[1, 2, 3, 4, 5]);
let mut cursor = MvccLazyCursor::new(
db.mvcc_store.clone(),
&db.conn,
tx_id,
i64::from(table_id),
MvccCursorType::Table,
Box::new(BTreeCursor::new(
db.conn.pager.load().clone(),
btree_root_page,
1,
)),
)
.unwrap();
let res = cursor.next().unwrap();
assert!(matches!(res, IOResult::Done(())));
assert!(cursor.has_record());
assert!(!cursor.is_empty());
let row = cursor.read_mvcc_current_row().unwrap().unwrap();
assert_eq!(row.id.row_id.to_int_or_panic(), 1);
let mut count = 1;
loop {
let res = cursor.next().unwrap();
let IOResult::Done(()) = res else {
panic!("unexpected next result {res:?}");
};
if !cursor.has_record() {
break;
}
count += 1;
let row = cursor.read_mvcc_current_row().unwrap().unwrap();
assert_eq!(row.id.row_id.to_int_or_panic(), count);
}
assert_eq!(count, 5);
let res = cursor.next().unwrap();
assert!(matches!(res, IOResult::Done(())));
assert!(!cursor.has_record());
assert!(cursor.is_empty());
}
#[test]
fn test_lazy_scan_cursor_with_gaps() {
let (db, tx_id, table_id, btree_root_page) = setup_test_db();
let mut cursor = MvccLazyCursor::new(
db.mvcc_store.clone(),
&db.conn,
tx_id,
i64::from(table_id),
MvccCursorType::Table,
Box::new(BTreeCursor::new(
db.conn.pager.load().clone(),
btree_root_page,
1,
)),
)
.unwrap();
let res = cursor.next().unwrap();
assert!(matches!(res, IOResult::Done(())));
assert!(cursor.has_record());
assert!(!cursor.is_empty());
let row = cursor.read_mvcc_current_row().unwrap().unwrap();
assert_eq!(row.id.row_id.to_int_or_panic(), 5);
let expected_ids = [5, 10, 15, 20, 30];
let mut index = 0;
let IOResult::Done(rowid) = cursor.rowid().unwrap() else {
unreachable!();
};
let rowid = rowid.unwrap();
assert_eq!(rowid, expected_ids[index]);
loop {
let res = cursor.next().unwrap();
let IOResult::Done(()) = res else {
panic!("unexpected next result {res:?}");
};
if !cursor.has_record() {
break;
}
index += 1;
if index < expected_ids.len() {
let IOResult::Done(rowid) = cursor.rowid().unwrap() else {
unreachable!();
};
let rowid = rowid.unwrap();
assert_eq!(rowid, expected_ids[index]);
}
}
assert_eq!(index, expected_ids.len() - 1);
}
#[test]
fn test_cursor_basic() {
let (db, tx_id, table_id, btree_root_page) = setup_lazy_db(&[1, 2, 3, 4, 5]);
let mut cursor = MvccLazyCursor::new(
db.mvcc_store.clone(),
&db.conn,
tx_id,
i64::from(table_id),
MvccCursorType::Table,
Box::new(BTreeCursor::new(
db.conn.pager.load().clone(),
btree_root_page,
1,
)),
)
.unwrap();
let _ = cursor.next().unwrap();
assert!(!cursor.is_empty());
let row = cursor.read_mvcc_current_row().unwrap().unwrap();
assert_eq!(row.id.row_id.to_int_or_panic(), 1);
let mut count = 1;
loop {
let res = cursor.next().unwrap();
let IOResult::Done(()) = res else {
panic!("unexpected next result {res:?}");
};
if !cursor.has_record() {
break;
}
count += 1;
let row = cursor.read_mvcc_current_row().unwrap().unwrap();
assert_eq!(row.id.row_id.to_int_or_panic(), count);
}
assert_eq!(count, 5);
let res = cursor.next().unwrap();
assert!(matches!(res, IOResult::Done(())));
assert!(!cursor.has_record());
assert!(cursor.is_empty());
}
#[test]
fn test_cursor_with_empty_table() {
let db = MvccTestDb::new();
{
let pager = db.conn.pager.load().clone();
let tx_id = db.mvcc_store.begin_tx(pager).unwrap();
commit_tx(db.mvcc_store.clone(), &db.conn, tx_id).unwrap();
}
let tx_id = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
let table_id = -1;
let mut cursor = MvccLazyCursor::new(
db.mvcc_store.clone(),
&db.conn,
tx_id,
table_id,
MvccCursorType::Table,
Box::new(BTreeCursor::new(db.conn.pager.load().clone(), -table_id, 1)),
)
.unwrap();
assert!(cursor.is_empty());
let rowid = cursor.rowid().unwrap();
assert!(matches!(rowid, IOResult::Done(None)));
}
#[test]
fn test_cursor_modification_during_scan() {
let _ = tracing_subscriber::fmt::try_init();
let (db, tx_id, table_id, btree_root_page) = setup_lazy_db(&[1, 2, 4, 5]);
let mut cursor = MvccLazyCursor::new(
db.mvcc_store.clone(),
&db.conn,
tx_id,
i64::from(table_id),
MvccCursorType::Table,
Box::new(BTreeCursor::new(
db.conn.pager.load().clone(),
btree_root_page,
1,
)),
)
.unwrap();
let res = cursor.next().unwrap();
assert!(matches!(res, IOResult::Done(())));
assert!(cursor.has_record());
let first_row = cursor.read_mvcc_current_row().unwrap().unwrap();
assert_eq!(first_row.id.row_id.to_int_or_panic(), 1);
let new_row_id = RowID::new(table_id, RowKey::Int(3));
let new_row = generate_simple_string_record("new_row");
let _ = cursor
.insert(&BTreeKey::TableRowId((
new_row_id.row_id.to_int_or_panic(),
Some(&new_row),
)))
.unwrap();
let mut read_rowids = vec![];
loop {
let res = cursor.next().unwrap();
let IOResult::Done(()) = res else {
panic!("unexpected next result {res:?}");
};
if !cursor.has_record() {
break;
}
read_rowids.push(
cursor
.read_mvcc_current_row()
.unwrap()
.unwrap()
.id
.row_id
.to_int_or_panic(),
);
}
assert_eq!(read_rowids, vec![2, 3, 4, 5]);
let res = cursor.next().unwrap();
assert!(matches!(res, IOResult::Done(())));
assert!(!cursor.has_record());
assert!(cursor.is_empty());
}
fn new_tx(tx_id: TxID, begin_ts: u64, state: TransactionState) -> Transaction {
let state = state.into();
Transaction {
state,
tx_id,
begin_ts,
write_set: Mutex::new(WriteSet::new()),
read_set: SkipSet::new(),
header: RwLock::new(DatabaseHeader::default()),
header_dirty: AtomicBool::new(false),
savepoint_stack: RwLock::new(Vec::new()),
pager_commit_lock_held: AtomicBool::new(false),
commit_dep_counter: AtomicU64::new(0),
abort_now: AtomicBool::new(false),
commit_dep_set: Mutex::new(HashSet::default()),
}
}
#[test]
fn test_snapshot_isolation_tx_visible1() {
let txs: SkipMap<TxID, Transaction> = SkipMap::from_iter([
(1, new_tx(1, 1, TransactionState::Committed(2))),
(2, new_tx(2, 2, TransactionState::Committed(5))),
(3, new_tx(3, 3, TransactionState::Aborted)),
(5, new_tx(5, 5, TransactionState::Preparing(8))),
(6, new_tx(6, 6, TransactionState::Committed(10))),
(7, new_tx(7, 7, TransactionState::Active)),
(8, new_tx(8, 1, TransactionState::Preparing(3))),
]);
let finalized_tx_states: SkipMap<TxID, TransactionState> = SkipMap::new();
let current_tx = new_tx(4, 4, TransactionState::Preparing(7));
let rv_visible = |begin: Option<TxTimestampOrID>, end: Option<TxTimestampOrID>| {
let row_version = RowVersion {
id: 0, begin,
end,
row: generate_simple_string_row((-2).into(), 1, "testme"),
btree_resident: false,
};
tracing::debug!("Testing visibility of {row_version:?}");
row_version.is_visible_to(¤t_tx, &txs, &finalized_tx_states)
};
assert!(rv_visible(Some(TxTimestampOrID::TxID(1)), None));
assert!(!rv_visible(Some(TxTimestampOrID::TxID(2)), None));
assert!(!rv_visible(Some(TxTimestampOrID::TxID(3)), None));
assert!(!rv_visible(
Some(TxTimestampOrID::Timestamp(0)),
Some(TxTimestampOrID::TxID(1))
));
assert!(rv_visible(
Some(TxTimestampOrID::Timestamp(0)),
Some(TxTimestampOrID::TxID(2))
));
assert!(rv_visible(
Some(TxTimestampOrID::Timestamp(0)),
Some(TxTimestampOrID::TxID(3))
));
assert!(!rv_visible(Some(TxTimestampOrID::TxID(5)), None));
assert!(rv_visible(Some(TxTimestampOrID::TxID(8)), None));
assert_eq!(
current_tx.commit_dep_counter.load(Ordering::Acquire),
1,
"speculative read should register a commit dependency"
);
assert!(!rv_visible(Some(TxTimestampOrID::TxID(6)), None));
assert!(!rv_visible(Some(TxTimestampOrID::TxID(7)), None));
assert!(!rv_visible(Some(TxTimestampOrID::TxID(6)), None));
assert!(!rv_visible(Some(TxTimestampOrID::TxID(7)), None));
assert!(rv_visible(
Some(TxTimestampOrID::Timestamp(0)),
Some(TxTimestampOrID::TxID(5))
));
assert!(!rv_visible(
Some(TxTimestampOrID::Timestamp(6)),
Some(TxTimestampOrID::TxID(6))
));
assert!(rv_visible(
Some(TxTimestampOrID::Timestamp(0)),
Some(TxTimestampOrID::TxID(7))
));
assert!(!rv_visible(None, None));
}
#[test]
fn test_visibility_uses_finalized_state_for_removed_committed_tx() {
let txs: SkipMap<TxID, Transaction> = SkipMap::new();
let finalized_tx_states: SkipMap<TxID, TransactionState> =
SkipMap::from_iter([(42, TransactionState::Committed(5))]);
let reader = new_tx(7, 10, TransactionState::Active);
let inserted_row = RowVersion {
id: 1,
begin: Some(TxTimestampOrID::TxID(42)),
end: None,
row: generate_simple_string_row((-2).into(), 1, "x"),
btree_resident: false,
};
assert!(
inserted_row.is_visible_to(&reader, &txs, &finalized_tx_states),
"stale begin=TxID should resolve via finalized committed state"
);
let deleted_row = RowVersion {
id: 2,
begin: Some(TxTimestampOrID::Timestamp(1)),
end: Some(TxTimestampOrID::TxID(42)),
row: generate_simple_string_row((-2).into(), 2, "y"),
btree_resident: false,
};
assert!(
!deleted_row.is_visible_to(&reader, &txs, &finalized_tx_states),
"stale end=TxID should resolve via finalized committed state"
);
}
#[test]
fn test_read_only_commit_does_not_cache_finalized_state() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
let mvcc_store = db.get_mvcc_store();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v INTEGER)")
.unwrap();
mvcc_store.drop_unused_row_versions();
let baseline = mvcc_store.finalized_tx_states.len();
conn.execute("BEGIN CONCURRENT").unwrap();
let _ = get_rows(&conn, "SELECT 1");
conn.execute("COMMIT").unwrap();
assert_eq!(
mvcc_store.finalized_tx_states.len(),
baseline,
"read-only commit should not add finalized tx cache entries"
);
}
#[test]
fn test_drop_unused_row_versions_prunes_unreferenced_finalized_tx_states() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
let mvcc_store = db.get_mvcc_store();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v INTEGER)")
.unwrap();
mvcc_store.drop_unused_row_versions();
let baseline = mvcc_store.finalized_tx_states.len();
conn.execute("INSERT INTO t VALUES (1, 1)").unwrap();
let after_write = mvcc_store.finalized_tx_states.len();
assert!(
after_write > baseline,
"write commit should add at least one finalized tx cache entry"
);
mvcc_store.drop_unused_row_versions();
assert_eq!(
mvcc_store.finalized_tx_states.len(),
baseline,
"GC scan should prune finalized tx cache entries with no remaining TxID references"
);
}
#[test]
fn test_commit_dependency_speculative_read() {
let txs: SkipMap<TxID, Transaction> =
SkipMap::from_iter([(1, new_tx(1, 1, TransactionState::Preparing(5)))]);
let finalized_tx_states: SkipMap<TxID, TransactionState> = SkipMap::new();
let reader = new_tx(2, 10, TransactionState::Active);
let rv = RowVersion {
id: 0,
begin: Some(TxTimestampOrID::TxID(1)),
end: None,
row: generate_simple_string_row((-2).into(), 1, "test"),
btree_resident: false,
};
assert_eq!(reader.commit_dep_counter.load(Ordering::Acquire), 0);
assert!(rv.is_visible_to(&reader, &txs, &finalized_tx_states));
assert_eq!(reader.commit_dep_counter.load(Ordering::Acquire), 1);
let dep_set = txs.get(&1).unwrap();
assert_eq!(
*dep_set.value().commit_dep_set.lock(),
HashSet::from_iter([2])
);
}
#[test]
fn test_commit_dependency_cascade_abort() {
let txs: SkipMap<TxID, Transaction> =
SkipMap::from_iter([(1, new_tx(1, 1, TransactionState::Preparing(5)))]);
let finalized_tx_states: SkipMap<TxID, TransactionState> = SkipMap::new();
let reader = new_tx(2, 10, TransactionState::Active);
let rv = RowVersion {
id: 0,
begin: Some(TxTimestampOrID::TxID(1)),
end: None,
row: generate_simple_string_row((-2).into(), 1, "test"),
btree_resident: false,
};
assert!(rv.is_visible_to(&reader, &txs, &finalized_tx_states));
assert_eq!(reader.commit_dep_counter.load(Ordering::Acquire), 1);
assert!(!reader.abort_now.load(Ordering::Acquire));
let tx1 = txs.get(&1).unwrap();
let tx1 = tx1.value();
tx1.state.store(TransactionState::Aborted);
txs.insert(2, reader);
for dep_tx_id in tx1.commit_dep_set.lock().drain() {
if let Some(dep_tx_entry) = txs.get(&dep_tx_id) {
let dep_tx = dep_tx_entry.value();
dep_tx.abort_now.store(true, Ordering::Release);
dep_tx.commit_dep_counter.fetch_sub(1, Ordering::AcqRel);
}
}
let reader = txs.get(&2).unwrap();
let reader = reader.value();
assert!(reader.abort_now.load(Ordering::Acquire));
assert_eq!(reader.commit_dep_counter.load(Ordering::Acquire), 0);
}
#[test]
fn test_commit_dependency_already_committed() {
let txs: SkipMap<TxID, Transaction> =
SkipMap::from_iter([(1, new_tx(1, 1, TransactionState::Committed(5)))]);
let reader = new_tx(2, 10, TransactionState::Active);
register_commit_dependency(&txs, &reader, 1);
assert_eq!(reader.commit_dep_counter.load(Ordering::Acquire), 0);
assert!(!reader.abort_now.load(Ordering::Acquire));
}
#[test]
fn test_commit_dependency_already_aborted() {
let txs: SkipMap<TxID, Transaction> =
SkipMap::from_iter([(1, new_tx(1, 1, TransactionState::Aborted))]);
let reader = new_tx(2, 10, TransactionState::Active);
register_commit_dependency(&txs, &reader, 1);
assert_eq!(reader.commit_dep_counter.load(Ordering::Acquire), 0);
assert!(reader.abort_now.load(Ordering::Acquire));
}
#[test]
fn test_commit_dependency_speculative_ignore() {
let txs: SkipMap<TxID, Transaction> = SkipMap::from_iter([
(1, new_tx(1, 1, TransactionState::Committed(2))),
(3, new_tx(3, 3, TransactionState::Preparing(5))),
]);
let finalized_tx_states: SkipMap<TxID, TransactionState> = SkipMap::new();
let reader = new_tx(4, 10, TransactionState::Active);
let rv = RowVersion {
id: 0,
begin: Some(TxTimestampOrID::Timestamp(2)),
end: Some(TxTimestampOrID::TxID(3)),
row: generate_simple_string_row((-2).into(), 1, "test"),
btree_resident: false,
};
assert!(!rv.is_visible_to(&reader, &txs, &finalized_tx_states));
assert_eq!(
reader.commit_dep_counter.load(Ordering::Acquire),
1,
"speculative ignore should register a commit dependency"
);
}
#[test]
fn test_commit_dependency_multiple_reads_dedup() {
let txs: SkipMap<TxID, Transaction> =
SkipMap::from_iter([(1, new_tx(1, 1, TransactionState::Preparing(5)))]);
let finalized_tx_states: SkipMap<TxID, TransactionState> = SkipMap::new();
let reader = new_tx(2, 10, TransactionState::Active);
let make_rv = |row_id: i64| RowVersion {
id: row_id as u64,
begin: Some(TxTimestampOrID::TxID(1)),
end: None,
row: generate_simple_string_row((-2).into(), row_id, "test"),
btree_resident: false,
};
assert!(make_rv(1).is_visible_to(&reader, &txs, &finalized_tx_states));
assert!(make_rv(2).is_visible_to(&reader, &txs, &finalized_tx_states));
assert!(make_rv(3).is_visible_to(&reader, &txs, &finalized_tx_states));
assert_eq!(reader.commit_dep_counter.load(Ordering::Acquire), 1);
let dep_set = txs.get(&1).unwrap();
assert_eq!(dep_set.value().commit_dep_set.lock().len(), 1);
}
#[test]
fn test_commit_dep_threaded_abort_cascades() {
let db = MvccTestDbNoConn::new_with_random_db();
{
let conn = db.connect();
conn.execute("CREATE TABLE t (id INTEGER PRIMARY KEY, value TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'initial')").unwrap();
conn.close().unwrap();
}
let mvcc_store = db.get_mvcc_store();
let writer_conn = db.connect();
writer_conn.execute("BEGIN CONCURRENT").unwrap();
writer_conn
.execute("UPDATE t SET value = 'modified' WHERE id = 1")
.unwrap();
let writer_tx_id = writer_conn.get_mv_tx_id().unwrap();
let _end_ts = mvcc_store.get_commit_timestamp(|ts| {
mvcc_store
.txs
.get(&writer_tx_id)
.unwrap()
.value()
.state
.store(TransactionState::Preparing(ts));
});
let (signal_tx, signal_rx) = std::sync::mpsc::channel();
let db_arc = db.get_db();
let reader_handle = std::thread::spawn(move || {
let reader_conn = db_arc.connect().unwrap();
reader_conn.execute("BEGIN CONCURRENT").unwrap();
let mut stmt = reader_conn
.prepare("SELECT value FROM t WHERE id = 1")
.unwrap();
let rows = stmt.run_collect_rows().unwrap();
reader_conn
.execute("INSERT INTO t VALUES (2, 'reader_data')")
.unwrap();
signal_tx.send(()).unwrap();
let commit_result = reader_conn.execute("COMMIT");
let _ = reader_conn.close(); (rows, commit_result)
});
signal_rx.recv().unwrap();
mvcc_store.rollback_tx(
writer_tx_id,
writer_conn.pager.load().clone(),
&writer_conn,
crate::MAIN_DB_ID,
);
let (rows, commit_result) = reader_handle.join().unwrap();
assert_eq!(rows.len(), 1);
assert_eq!(
rows[0][0].to_text().unwrap(),
"modified",
"reader should have speculatively read the Preparing writer's value"
);
assert!(
matches!(commit_result, Err(LimboError::CommitDependencyAborted)),
"expected CommitDependencyAborted, got: {commit_result:?}",
);
{
let conn = db.connect();
let mut stmt = conn.prepare("SELECT value FROM t WHERE id = 1").unwrap();
let rows = stmt.run_collect_rows().unwrap();
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].to_text().unwrap(), "initial");
let mut stmt = conn.prepare("SELECT * FROM t WHERE id = 2").unwrap();
let rows = stmt.run_collect_rows().unwrap();
assert!(
rows.is_empty(),
"reader's write should not be visible after cascade abort"
);
}
}
#[test]
fn test_commit_dep_threaded_multiple_dependents_abort() {
let db = MvccTestDbNoConn::new_with_random_db();
{
let conn = db.connect();
conn.execute("CREATE TABLE t (id INTEGER PRIMARY KEY, value TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'initial')").unwrap();
conn.close().unwrap();
}
let mvcc_store = db.get_mvcc_store();
let writer_conn = db.connect();
writer_conn.execute("BEGIN CONCURRENT").unwrap();
writer_conn
.execute("UPDATE t SET value = 'modified' WHERE id = 1")
.unwrap();
let writer_tx_id = writer_conn.get_mv_tx_id().unwrap();
let _end_ts = mvcc_store.get_commit_timestamp(|ts| {
mvcc_store
.txs
.get(&writer_tx_id)
.unwrap()
.value()
.state
.store(TransactionState::Preparing(ts));
});
let num_readers = 4;
let barrier = std::sync::Arc::new(std::sync::Barrier::new(num_readers + 1));
let mut handles = Vec::new();
for i in 0..num_readers {
let db_arc = db.get_db();
let barrier_clone = barrier.clone();
handles.push(std::thread::spawn(move || {
let conn = db_arc.connect().unwrap();
conn.execute("BEGIN CONCURRENT").unwrap();
let mut stmt = conn.prepare("SELECT value FROM t WHERE id = 1").unwrap();
let rows = stmt.run_collect_rows().unwrap();
conn.execute(format!("INSERT INTO t VALUES ({}, 'reader_{i}')", i + 10,))
.unwrap();
barrier_clone.wait();
let commit_result = conn.execute("COMMIT");
let _ = conn.close();
(rows, commit_result)
}));
}
barrier.wait();
mvcc_store.rollback_tx(
writer_tx_id,
writer_conn.pager.load().clone(),
&writer_conn,
crate::MAIN_DB_ID,
);
for handle in handles {
let (rows, commit_result) = handle.join().unwrap();
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].to_text().unwrap(), "modified");
assert!(
matches!(commit_result, Err(LimboError::CommitDependencyAborted)),
"expected CommitDependencyAborted, got: {commit_result:?}",
);
}
{
let conn = db.connect();
let mut stmt = conn.prepare("SELECT count(*) FROM t").unwrap();
let rows = stmt.run_collect_rows().unwrap();
assert_eq!(rows[0][0].as_int().unwrap(), 1);
}
}
#[test]
fn test_commit_dep_threaded_commit_resolves() {
let db = MvccTestDbNoConn::new_with_random_db();
{
let conn = db.connect();
conn.execute("CREATE TABLE t (id INTEGER PRIMARY KEY, value TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'initial')").unwrap();
conn.close().unwrap();
}
let mvcc_store = db.get_mvcc_store();
let writer_conn = db.connect();
writer_conn.execute("BEGIN CONCURRENT").unwrap();
writer_conn
.execute("UPDATE t SET value = 'committed' WHERE id = 1")
.unwrap();
let writer_tx_id = writer_conn.get_mv_tx_id().unwrap();
let end_ts = mvcc_store.get_commit_timestamp(|ts| {
mvcc_store
.txs
.get(&writer_tx_id)
.unwrap()
.value()
.state
.store(TransactionState::Preparing(ts));
});
let (signal_tx, signal_rx) = std::sync::mpsc::channel();
let db_arc = db.get_db();
let reader_handle = std::thread::spawn(move || {
let reader_conn = db_arc.connect().unwrap();
reader_conn.execute("BEGIN CONCURRENT").unwrap();
let mut stmt = reader_conn
.prepare("SELECT value FROM t WHERE id = 1")
.unwrap();
let rows = stmt.run_collect_rows().unwrap();
reader_conn
.execute("INSERT INTO t VALUES (2, 'reader_data')")
.unwrap();
signal_tx.send(()).unwrap();
let commit_result = reader_conn.execute("COMMIT");
let _ = reader_conn.close();
(rows, commit_result)
});
signal_rx.recv().unwrap();
{
let writer_tx = mvcc_store.txs.get(&writer_tx_id).unwrap();
let writer_tx = writer_tx.value();
for entry in mvcc_store.rows.iter() {
let mut rvs = entry.value().write();
for rv in rvs.iter_mut() {
if rv.begin == Some(TxTimestampOrID::TxID(writer_tx_id)) {
rv.begin = Some(TxTimestampOrID::Timestamp(end_ts));
}
if rv.end == Some(TxTimestampOrID::TxID(writer_tx_id)) {
rv.end = Some(TxTimestampOrID::Timestamp(end_ts));
}
}
}
writer_tx.state.store(TransactionState::Committed(end_ts));
for dep_tx_id in writer_tx.commit_dep_set.lock().drain() {
if let Some(dep_tx_entry) = mvcc_store.txs.get(&dep_tx_id) {
dep_tx_entry
.value()
.commit_dep_counter
.fetch_sub(1, Ordering::AcqRel);
}
}
}
let (rows, commit_result) = reader_handle.join().unwrap();
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].to_text().unwrap(), "committed");
assert!(
commit_result.is_ok(),
"expected reader COMMIT to succeed, got: {commit_result:?}",
);
{
let conn = db.connect();
let mut stmt = conn.prepare("SELECT value FROM t ORDER BY id").unwrap();
let rows = stmt.run_collect_rows().unwrap();
assert_eq!(rows.len(), 2);
assert_eq!(rows[0][0].to_text().unwrap(), "committed");
assert_eq!(rows[1][0].to_text().unwrap(), "reader_data");
}
}
#[test]
fn test_commit_dep_threaded_readonly_abort_cascades() {
let db = MvccTestDbNoConn::new_with_random_db();
{
let conn = db.connect();
conn.execute("CREATE TABLE t (id INTEGER PRIMARY KEY, value TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'initial')").unwrap();
conn.close().unwrap();
}
let mvcc_store = db.get_mvcc_store();
let writer_conn = db.connect();
writer_conn.execute("BEGIN CONCURRENT").unwrap();
writer_conn
.execute("UPDATE t SET value = 'modified' WHERE id = 1")
.unwrap();
let writer_tx_id = writer_conn.get_mv_tx_id().unwrap();
let _end_ts = mvcc_store.get_commit_timestamp(|ts| {
mvcc_store
.txs
.get(&writer_tx_id)
.unwrap()
.value()
.state
.store(TransactionState::Preparing(ts));
});
let (signal_tx, signal_rx) = std::sync::mpsc::channel();
let db_arc = db.get_db();
let reader_handle = std::thread::spawn(move || {
let reader_conn = db_arc.connect().unwrap();
reader_conn.execute("BEGIN CONCURRENT").unwrap();
let mut stmt = reader_conn
.prepare("SELECT value FROM t WHERE id = 1")
.unwrap();
let rows = stmt.run_collect_rows().unwrap();
signal_tx.send(()).unwrap();
let commit_result = reader_conn.execute("COMMIT");
let _ = reader_conn.close();
(rows, commit_result)
});
signal_rx.recv().unwrap();
mvcc_store.rollback_tx(
writer_tx_id,
writer_conn.pager.load().clone(),
&writer_conn,
crate::MAIN_DB_ID,
);
let (rows, commit_result) = reader_handle.join().unwrap();
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].to_text().unwrap(), "modified");
assert!(
matches!(commit_result, Err(LimboError::CommitDependencyAborted)),
"read-only tx should fail with CommitDependencyAborted, got: {commit_result:?}",
);
}
#[test]
fn test_commit_dependency_counter_no_underflow() {
let txs: SkipMap<TxID, Transaction> =
SkipMap::from_iter([(1, new_tx(1, 1, TransactionState::Preparing(5)))]);
let reader = new_tx(2, 10, TransactionState::Active);
register_commit_dependency(&txs, &reader, 1);
assert_eq!(reader.commit_dep_counter.load(Ordering::Acquire), 1);
reader.commit_dep_counter.fetch_sub(1, Ordering::AcqRel);
assert_eq!(
reader.commit_dep_counter.load(Ordering::Acquire),
0,
"counter should be exactly 0, not u64::MAX (underflow)"
);
}
#[test]
fn test_commit_dependency_terminated_tx_sets_abort() {
let txs: SkipMap<TxID, Transaction> =
SkipMap::from_iter([(1, new_tx(1, 1, TransactionState::Terminated))]);
let reader = new_tx(2, 10, TransactionState::Active);
register_commit_dependency(&txs, &reader, 1);
assert!(
reader.abort_now.load(Ordering::Acquire),
"dependency on Terminated tx should set abort_now"
);
assert_eq!(
reader.commit_dep_counter.load(Ordering::Acquire),
0,
"no counter increment for aborted/terminated dependency"
);
}
#[test]
fn test_commit_dependency_missing_tx_assumes_committed() {
let txs: SkipMap<TxID, Transaction> = SkipMap::new();
let reader = new_tx(2, 10, TransactionState::Active);
register_commit_dependency(&txs, &reader, 99);
assert!(
!reader.abort_now.load(Ordering::Acquire),
"missing tx (committed+removed) should not set abort_now"
);
assert_eq!(reader.commit_dep_counter.load(Ordering::Acquire), 0);
}
#[test]
fn test_commit_dep_readonly_does_not_advance_timestamp() {
let db = MvccTestDbNoConn::new_with_random_db();
{
let conn = db.connect();
conn.execute("CREATE TABLE t (id INTEGER PRIMARY KEY, value TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'initial')").unwrap();
conn.close().unwrap();
}
let mvcc_store = db.get_mvcc_store();
let ts_before = mvcc_store.last_committed_tx_ts.load(Ordering::Acquire);
let writer_conn = db.connect();
writer_conn.execute("BEGIN CONCURRENT").unwrap();
writer_conn
.execute("UPDATE t SET value = 'modified' WHERE id = 1")
.unwrap();
let writer_tx_id = writer_conn.get_mv_tx_id().unwrap();
let end_ts = mvcc_store.get_commit_timestamp(|ts| {
mvcc_store
.txs
.get(&writer_tx_id)
.unwrap()
.value()
.state
.store(TransactionState::Preparing(ts));
});
let (signal_tx, signal_rx) = std::sync::mpsc::channel();
let db_arc = db.get_db();
let mvcc_clone = mvcc_store.clone();
let reader_handle = std::thread::spawn(move || {
let reader_conn = db_arc.connect().unwrap();
reader_conn.execute("BEGIN CONCURRENT").unwrap();
let mut stmt = reader_conn
.prepare("SELECT value FROM t WHERE id = 1")
.unwrap();
let _rows = stmt.run_collect_rows().unwrap();
signal_tx.send(()).unwrap();
let commit_result = reader_conn.execute("COMMIT");
let _ = reader_conn.close();
commit_result
});
signal_rx.recv().unwrap();
{
let writer_tx = mvcc_store.txs.get(&writer_tx_id).unwrap();
let writer_tx = writer_tx.value();
for entry in mvcc_store.rows.iter() {
let mut rvs = entry.value().write();
for rv in rvs.iter_mut() {
if rv.begin == Some(TxTimestampOrID::TxID(writer_tx_id)) {
rv.begin = Some(TxTimestampOrID::Timestamp(end_ts));
}
if rv.end == Some(TxTimestampOrID::TxID(writer_tx_id)) {
rv.end = Some(TxTimestampOrID::Timestamp(end_ts));
}
}
}
writer_tx.state.store(TransactionState::Committed(end_ts));
for dep_tx_id in writer_tx.commit_dep_set.lock().drain() {
if let Some(dep_tx_entry) = mvcc_store.txs.get(&dep_tx_id) {
dep_tx_entry
.value()
.commit_dep_counter
.fetch_sub(1, Ordering::AcqRel);
}
}
}
let commit_result = reader_handle.join().unwrap();
assert!(
commit_result.is_ok(),
"read-only tx with resolved dependency should commit: {commit_result:?}",
);
let ts_after = mvcc_clone.last_committed_tx_ts.load(Ordering::Acquire);
assert_eq!(
ts_before, ts_after,
"read-only tx should NOT advance last_committed_tx_ts (was {ts_before}, now {ts_after})"
);
}
#[test]
fn test_last_committed_timestamp_is_monotonic_for_out_of_order_commits() {
let db = MvccTestDbNoConn::new_with_random_db();
let setup = db.connect();
setup
.execute("CREATE TABLE t (id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
setup.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
setup.execute("INSERT INTO t VALUES (2, 'b')").unwrap();
let mvcc_store = db.get_mvcc_store();
let conn_a = db.connect();
let conn_b = db.connect();
conn_a.execute("BEGIN CONCURRENT").unwrap();
conn_a
.execute("UPDATE t SET v = 'a1' WHERE id = 1")
.unwrap();
let tx_a_id = conn_a.get_mv_tx_id().expect("tx_a should be active");
conn_a.set_yield_injector(Some(FixedYieldInjector::new([
CommitYieldPoint::LogRecordPrepared.point(),
])));
let mut commit_a = conn_a.prepare("COMMIT").unwrap();
assert!(
matches!(commit_a.step().unwrap(), StepResult::IO),
"tx_a should yield after getting its commit timestamp"
);
let tx_a_end_ts = match mvcc_store
.txs
.get(&tx_a_id)
.expect("tx_a should still be tracked")
.value()
.state
.load()
{
TransactionState::Preparing(ts) => ts,
state => panic!("expected tx_a to be Preparing, got {state:?}"),
};
conn_b.execute("BEGIN CONCURRENT").unwrap();
conn_b
.execute("UPDATE t SET v = 'b1' WHERE id = 2")
.unwrap();
conn_b.execute("COMMIT").unwrap();
let tx_b_committed = mvcc_store.last_committed_tx_ts.load(Ordering::Acquire);
assert!(
tx_b_committed > tx_a_end_ts,
"tx_b should commit at a newer timestamp than the yielded tx_a"
);
commit_a.run_ignore_rows().unwrap();
let final_watermark = mvcc_store.last_committed_tx_ts.load(Ordering::Acquire);
assert_eq!(
final_watermark, tx_b_committed,
"finishing an older commit must not lower the committed timestamp watermark"
);
}
#[test]
fn test_commit_dep_readonly_does_not_cause_spurious_busy() {
let db = MvccTestDbNoConn::new_with_random_db();
{
let conn = db.connect();
conn.execute("CREATE TABLE t (id INTEGER PRIMARY KEY, value TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'initial')").unwrap();
conn.close().unwrap();
}
let mvcc_store = db.get_mvcc_store();
let writer_conn = db.connect();
writer_conn.execute("BEGIN CONCURRENT").unwrap();
writer_conn
.execute("UPDATE t SET value = 'modified' WHERE id = 1")
.unwrap();
let writer_tx_id = writer_conn.get_mv_tx_id().unwrap();
let end_ts = mvcc_store.get_commit_timestamp(|ts| {
mvcc_store
.txs
.get(&writer_tx_id)
.unwrap()
.value()
.state
.store(TransactionState::Preparing(ts));
});
let exclusive_conn = db.connect();
exclusive_conn.execute("BEGIN CONCURRENT").unwrap();
let exclusive_tx_id = exclusive_conn.get_mv_tx_id().unwrap();
let (signal_tx, signal_rx) = std::sync::mpsc::channel();
let db_arc = db.get_db();
let reader_handle = std::thread::spawn(move || {
let reader_conn = db_arc.connect().unwrap();
reader_conn.execute("BEGIN CONCURRENT").unwrap();
let mut stmt = reader_conn
.prepare("SELECT value FROM t WHERE id = 1")
.unwrap();
let _rows = stmt.run_collect_rows().unwrap();
signal_tx.send(()).unwrap();
let commit_result = reader_conn.execute("COMMIT");
let _ = reader_conn.close();
commit_result
});
signal_rx.recv().unwrap();
{
let writer_tx = mvcc_store.txs.get(&writer_tx_id).unwrap();
let writer_tx = writer_tx.value();
for entry in mvcc_store.rows.iter() {
let mut rvs = entry.value().write();
for rv in rvs.iter_mut() {
if rv.begin == Some(TxTimestampOrID::TxID(writer_tx_id)) {
rv.begin = Some(TxTimestampOrID::Timestamp(end_ts));
}
if rv.end == Some(TxTimestampOrID::TxID(writer_tx_id)) {
rv.end = Some(TxTimestampOrID::Timestamp(end_ts));
}
}
}
writer_tx.state.store(TransactionState::Committed(end_ts));
for dep_tx_id in writer_tx.commit_dep_set.lock().drain() {
if let Some(dep_tx_entry) = mvcc_store.txs.get(&dep_tx_id) {
dep_tx_entry
.value()
.commit_dep_counter
.fetch_sub(1, Ordering::AcqRel);
}
}
}
let commit_result = reader_handle.join().unwrap();
assert!(commit_result.is_ok());
let acquire_result = mvcc_store.acquire_exclusive_tx(&exclusive_tx_id);
assert!(
acquire_result.is_ok(),
"acquire_exclusive_tx should not return Busy after a read-only dependent committed: {acquire_result:?}",
);
mvcc_store.release_exclusive_tx(&exclusive_tx_id);
}
#[test]
fn test_exclusive_tx_does_not_deadlock_behind_preparing_concurrent_commit() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn_a = db.connect();
conn_a
.execute("CREATE TABLE t (key TEXT PRIMARY KEY, value BLOB)")
.unwrap();
conn_a.execute("BEGIN CONCURRENT").unwrap();
conn_a
.execute("INSERT INTO t VALUES ('a', zeroblob(16))")
.unwrap();
conn_a.set_yield_injector(Some(FixedYieldInjector::new([
CommitYieldPoint::LogRecordPrepared.point(),
])));
let mut commit_a = conn_a.prepare("COMMIT").unwrap();
assert!(
matches!(commit_a.step().unwrap(), StepResult::IO),
"first commit must pause after publishing Preparing and before taking the log lock",
);
let conn_b = db.connect();
let mut insert_b = conn_b
.prepare("INSERT INTO t VALUES ('b', zeroblob(16))")
.unwrap();
let mut saw_busy = false;
for _ in 0..64 {
match insert_b.step() {
Ok(StepResult::IO) => continue,
Ok(StepResult::Busy) | Err(LimboError::Busy) => {
saw_busy = true;
break;
}
Ok(StepResult::Done) => {
panic!("exclusive insert started while another tx was Preparing")
}
Ok(other) => panic!("unexpected insert step result: {other:?}"),
Err(err) => panic!("unexpected insert error: {err:?}"),
}
}
assert!(
saw_busy,
"exclusive insert should return Busy instead of waiting while holding the log lock",
);
insert_b.reset().unwrap();
let mut committed = false;
for _ in 0..1024 {
match commit_a.step().unwrap() {
StepResult::Done => {
committed = true;
break;
}
StepResult::IO => {}
other => panic!("unexpected commit step result: {other:?}"),
}
}
assert!(
committed,
"paused concurrent commit should finish after Busy"
);
conn_a.set_yield_injector(None);
let rows = get_rows(&conn_a, "SELECT key FROM t ORDER BY key");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].to_text().unwrap(), "a");
conn_a.close().unwrap();
conn_b.close().unwrap();
}
fn write_synthetic_row(db: &MvccTestDbNoConn, value: &str) {
let conn = db.connect();
let mvcc_store = db.get_mvcc_store();
let max_root_page = get_rows(
&conn,
"SELECT COALESCE(MAX(rootpage), 0) FROM sqlite_schema WHERE rootpage > 0",
)[0][0]
.as_int()
.unwrap();
let next_schema_rowid = get_rows(
&conn,
"SELECT COALESCE(MAX(rowid), 0) + 1 FROM sqlite_schema",
)[0][0]
.as_int()
.unwrap();
let synthetic_root = -(max_root_page + 100);
let synthetic_table_id = MVTableId::new(synthetic_root);
let tx_id = mvcc_store.begin_tx(conn.pager.load().clone()).unwrap();
let data = ImmutableRecord::from_values(
&[
Value::Text(Text::new("table")),
Value::Text(Text::new("test")),
Value::Text(Text::new("test")),
Value::from_i64(synthetic_root),
Value::Text(Text::new(
"CREATE TABLE test(id INTEGER PRIMARY KEY, data TEXT)",
)),
],
5,
);
mvcc_store
.insert(
tx_id,
Row::new_table_row(
RowID::new((-1).into(), RowKey::Int(next_schema_rowid)),
data.as_blob().to_vec(),
5,
),
)
.unwrap();
let row = generate_simple_string_row(synthetic_table_id, 1, value);
mvcc_store.insert(tx_id, row).unwrap();
commit_tx(mvcc_store, &conn, tx_id).unwrap();
conn.close().unwrap();
}
#[test]
fn test_restart() {
let mut db = MvccTestDbNoConn::new_with_random_db();
write_synthetic_row(&db, "foo");
db.restart();
{
let conn = db.connect();
let mvcc_store = db.get_mvcc_store();
let max_root_page = get_rows(
&conn,
"SELECT COALESCE(MAX(rootpage), 0) FROM sqlite_schema WHERE rootpage > 0",
)[0][0]
.as_int()
.unwrap();
let synthetic_table_id = MVTableId::new(-(max_root_page + 100));
let tx_id = mvcc_store.begin_tx(conn.pager.load().clone()).unwrap();
let row = generate_simple_string_row(synthetic_table_id, 2, "bar");
mvcc_store.insert(tx_id, row).unwrap();
commit_tx(mvcc_store.clone(), &conn, tx_id).unwrap();
let tx_id = mvcc_store.begin_tx(conn.pager.load().clone()).unwrap();
let row = mvcc_store
.read(tx_id, &RowID::new(synthetic_table_id, RowKey::Int(2)))
.unwrap()
.unwrap();
let record = get_record_value(&row);
match record.get_value(0).unwrap() {
ValueRef::Text(text) => {
assert_eq!(text.as_str(), "bar");
}
_ => panic!("Expected Text value"),
}
conn.close().unwrap();
}
}
#[test]
fn test_connection_sees_other_connection_changes() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn0 = db.connect();
conn0
.execute("CREATE TABLE IF NOT EXISTS test_table (id INTEGER PRIMARY KEY, text TEXT)")
.unwrap();
let conn1 = db.connect();
conn1
.execute("CREATE TABLE IF NOT EXISTS test_table (id INTEGER PRIMARY KEY, text TEXT)")
.unwrap();
conn0
.execute("INSERT INTO test_table (id, text) VALUES (965, 'text_877')")
.unwrap();
let mut stmt = conn1.query("SELECT * FROM test_table").unwrap().unwrap();
stmt.run_with_row_callback(|row| {
let text = row.get_value(1).to_text().unwrap();
assert_eq!(text, "text_877");
Ok(())
})
.unwrap();
}
#[test]
fn test_delete_with_conn() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn0 = db.connect();
conn0.execute("CREATE TABLE test(t)").unwrap();
let mut inserts = vec![1, 2, 3, 4, 5, 6, 7];
for t in &inserts {
conn0
.execute(format!("INSERT INTO test(t) VALUES ({t})"))
.unwrap();
}
conn0.execute("DELETE FROM test WHERE t = 5").unwrap();
inserts.remove(4);
let mut stmt = conn0.prepare("SELECT * FROM test").unwrap();
let mut pos = 0;
stmt.run_with_row_callback(|row| {
let t = row.get_value(0).as_int().unwrap();
assert_eq!(t, inserts[pos]);
pos += 1;
Ok(())
})
.unwrap();
}
fn get_record_value(row: &Row) -> ImmutableRecord {
let mut record = ImmutableRecord::new(1024);
record.start_serialization(row.payload());
record
}
#[test]
fn test_interactive_transaction() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("BEGIN").unwrap();
conn.execute("CREATE TABLE test (x)").unwrap();
conn.execute("INSERT INTO test (x) VALUES (1)").unwrap();
conn.execute("INSERT INTO test (x) VALUES (2)").unwrap();
conn.execute("COMMIT").unwrap();
let rows = get_rows(&conn, "SELECT * FROM test");
assert_eq!(
rows,
vec![vec![Value::from_i64(1)], vec![Value::from_i64(2)]]
);
}
#[test]
fn test_commit_without_tx() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE test (x)").unwrap();
conn.execute("INSERT INTO test (x) VALUES (1)").unwrap();
let err = conn.execute("COMMIT").unwrap_err();
if let LimboError::TxError(e) = err {
assert_eq!(e, "cannot commit - no transaction is active");
} else {
panic!("Expected TxError");
}
}
fn get_rows(conn: &Arc<Connection>, query: &str) -> Vec<Vec<Value>> {
let mut stmt = conn.prepare(query).unwrap();
let mut rows = Vec::new();
stmt.run_with_row_callback(|row| {
let values = row.get_values().cloned().collect::<Vec<_>>();
rows.push(values);
Ok(())
})
.unwrap();
rows
}
#[test]
fn test_insert_in_middle_commit_of_create_index_returns_err() {
let _ = tracing_subscriber::fmt::try_init();
let db = MvccTestDbNoConn::new_with_random_db();
{
let setup = db.connect();
setup
.execute("CREATE TABLE t (id INTEGER PRIMARY KEY, c INTEGER)")
.unwrap();
setup.execute("INSERT INTO t VALUES (1, 10)").unwrap();
setup.close().unwrap();
}
let conn_a = db.connect();
let conn_b = db.connect();
conn_a.set_yield_injector(Some(FixedYieldInjector::new([
CommitYieldPoint::LogRecordPrepared.point(),
])));
let mut create_idx = conn_a.prepare("CREATE INDEX i ON t(c)").unwrap();
let mut yielded = false;
for _ in 0..200 {
match create_idx.step().unwrap() {
StepResult::IO => {
yielded = true;
break;
}
StepResult::Done => break,
_ => {}
}
}
assert!(
yielded,
"CREATE INDEX should yield at CommitYieldPoint::LogRecordPrepared"
);
conn_b.execute("BEGIN CONCURRENT").unwrap();
conn_b.execute("INSERT INTO t VALUES (2, 20)").unwrap();
create_idx.run_ignore_rows().unwrap();
drop(create_idx);
let commit_result = conn_b.execute("COMMIT");
assert!(
matches!(
commit_result,
Err(LimboError::SchemaConflict | LimboError::SchemaUpdated)
),
"BUG: tx_b's COMMIT returned {commit_result:?} but should have been \
aborted with SchemaConflict/SchemaUpdated. tx_b began with a stale \
schema (missing index `i`), so its INSERT silently skipped writing \
to that index. Allowing the commit leaves `i` permanently short the \
row tx_b wrote."
);
}
#[test]
#[ignore]
fn test_concurrent_writes() {
struct ConnectionState {
conn: Arc<Connection>,
inserts: Vec<i64>,
current_statement: Option<Statement>,
}
let db = MvccTestDbNoConn::new_with_random_db();
let mut connections = Vec::new();
{
let conn = db.connect();
conn.execute("CREATE TABLE test (x)").unwrap();
conn.close().unwrap();
}
let num_connections = 20;
let num_inserts_per_connection = 10000;
for i in 0..num_connections {
let conn = db.connect();
let mut inserts = ((num_inserts_per_connection * i)
..(num_inserts_per_connection * (i + 1)))
.collect::<Vec<i64>>();
inserts.reverse();
connections.push(ConnectionState {
conn,
inserts,
current_statement: None,
});
}
loop {
let mut all_finished = true;
for conn in &mut connections {
if !conn.inserts.is_empty() || conn.current_statement.is_some() {
all_finished = false;
break;
}
}
for (conn_id, conn) in connections.iter_mut().enumerate() {
if conn.current_statement.is_none() && !conn.inserts.is_empty() {
let write = conn.inserts.pop().unwrap();
println!("inserting row {write} from connection {conn_id}");
conn.current_statement = Some(
conn.conn
.prepare(format!("INSERT INTO test (x) VALUES ({write})"))
.unwrap(),
);
}
if conn.current_statement.is_none() {
continue;
}
println!("connection step {conn_id}");
let stmt = conn.current_statement.as_mut().unwrap();
match stmt.step().unwrap() {
StepResult::Done => {
println!("connection {conn_id} done");
conn.current_statement = None;
}
StepResult::IO => {
}
StepResult::Busy => {
println!("connection {conn_id} busy");
unreachable!();
}
_ => {
unreachable!()
}
}
}
db.get_db().io.step().unwrap();
if all_finished {
println!("all finished");
break;
}
}
let conn = db.connect();
let rows = get_rows(&conn, "SELECT * FROM test ORDER BY x ASC");
assert_eq!(
rows.len() as i64,
num_connections * num_inserts_per_connection
);
for (row_id, row) in rows.iter().enumerate() {
assert_eq!(row[0].as_int().unwrap(), row_id as i64);
}
conn.close().unwrap();
}
#[test]
fn transaction_display() {
let state = AtomicTransactionState::from(TransactionState::Preparing(20250915));
let tx_id = 42;
let begin_ts = 20250914;
let empty_versions = || Arc::new(RwLock::new(Vec::new()));
let write_set = Mutex::new({
let mut write_set = WriteSet::new();
write_set.insert(RowID::new((-2).into(), RowKey::Int(11)), empty_versions());
write_set.insert(RowID::new((-2).into(), RowKey::Int(13)), empty_versions());
write_set
});
let read_set = SkipSet::new();
read_set.insert(RowID::new((-2).into(), RowKey::Int(17)));
read_set.insert(RowID::new((-2).into(), RowKey::Int(19)));
let tx = Transaction {
state,
tx_id,
begin_ts,
write_set,
read_set,
header: RwLock::new(DatabaseHeader::default()),
header_dirty: AtomicBool::new(false),
savepoint_stack: RwLock::new(Vec::new()),
pager_commit_lock_held: AtomicBool::new(false),
commit_dep_counter: AtomicU64::new(0),
abort_now: AtomicBool::new(false),
commit_dep_set: Mutex::new(HashSet::default()),
};
let expected = "{ state: Preparing(20250915), id: 42, begin_ts: 20250914, write_set: [RowID { table_id: MVTableId(-2), row_id: Int(11) }, RowID { table_id: MVTableId(-2), row_id: Int(13) }], read_set: [RowID { table_id: MVTableId(-2), row_id: Int(17) }, RowID { table_id: MVTableId(-2), row_id: Int(19) }] }";
let output = format!("{tx}");
assert_eq!(output, expected);
}
#[test]
fn test_should_checkpoint() {
let db = MvccTestDbNoConn::new_with_random_db();
let mv_store = db.get_mvcc_store();
assert!(!mv_store.storage.should_checkpoint());
mv_store.set_checkpoint_threshold(0);
assert!(mv_store.storage.should_checkpoint());
}
#[test]
fn test_should_checkpoint_after_recovery_uses_recovered_offset() {
let mut db = MvccTestDbNoConn::new_with_random_db();
{
let conn = db.connect();
conn.execute("CREATE TABLE t(x)").unwrap();
conn.execute("INSERT INTO t VALUES (1)").unwrap();
}
db.restart();
let _conn = db.connect();
let mv_store = db.get_mvcc_store();
mv_store.set_checkpoint_threshold(1);
assert!(
mv_store.storage.should_checkpoint(),
"expected should_checkpoint() to reflect the recovered logical-log offset"
);
}
#[test]
fn test_insert_with_checkpoint() {
let db = MvccTestDbNoConn::new_with_random_db();
let mv_store = db.get_mvcc_store();
mv_store.set_checkpoint_threshold(0);
let conn = db.connect();
conn.execute("CREATE TABLE t(x)").unwrap();
conn.execute("INSERT INTO t VALUES (1)").unwrap();
let rows = get_rows(&conn, "SELECT * FROM t");
assert_eq!(rows.len(), 1);
let row = rows.first().unwrap();
assert_eq!(row.len(), 1);
let value = row.first().unwrap();
match value {
Value::Numeric(crate::numeric::Numeric::Integer(i)) => assert_eq!(*i, 1),
_ => unreachable!(),
}
}
#[test]
fn test_auto_checkpoint_busy_is_ignored() {
let db = MvccTestDb::new();
db.mvcc_store.set_checkpoint_threshold(0);
let tx1 = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
let tx2 = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
let row = generate_simple_string_row((-2).into(), 1, "Hello");
db.mvcc_store.insert(tx1, row).unwrap();
commit_tx(db.mvcc_store.clone(), &db.conn, tx1).unwrap();
db.mvcc_store.rollback_tx(
tx2,
db.conn.pager.load().clone(),
&db.conn,
crate::MAIN_DB_ID,
);
}
#[test]
fn test_mvcc_read_tx_lifecycle() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE t(x)").unwrap();
conn.execute("BEGIN").unwrap();
conn.execute("SELECT * FROM t").unwrap();
let pager = conn.pager.load();
let wal = pager.wal.as_ref().expect("wal should be enabled");
assert!(wal.holds_read_lock());
conn.execute("COMMIT").unwrap();
assert!(!wal.holds_read_lock());
}
#[test]
fn test_mvcc_conn_drop_releases_read_tx() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE t(x)").unwrap();
let pager = conn.pager.load();
pager.begin_read_tx().unwrap();
let wal = pager.wal.as_ref().expect("wal should be enabled").clone();
assert!(wal.holds_read_lock());
drop(conn);
assert!(!wal.holds_read_lock());
}
#[test]
fn test_select_empty_table() {
let db = MvccTestDbNoConn::new_with_random_db();
let mv_store = db.get_mvcc_store();
mv_store.set_checkpoint_threshold(0);
let conn = db.connect();
conn.execute("CREATE TABLE t(x integer primary key)")
.unwrap();
let rows = get_rows(&conn, "SELECT * FROM t where x > 100");
assert!(rows.is_empty());
}
#[turso_macros::test(encryption)]
fn test_cursor_with_btree_and_mvcc() {
let mut db = MvccTestDbNoConn::new_maybe_encrypted(encrypted);
{
let conn = db.connect();
conn.execute("CREATE TABLE t(x integer primary key)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1)").unwrap();
conn.execute("INSERT INTO t VALUES (2)").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
}
db.restart();
let conn = db.connect();
println!("getting rows");
let rows = get_rows(&conn, "SELECT * FROM t");
assert_eq!(rows.len(), 2);
assert_eq!(rows[0], vec![Value::from_i64(1)]);
assert_eq!(rows[1], vec![Value::from_i64(2)]);
}
#[turso_macros::test(encryption)]
fn test_cursor_with_btree_and_mvcc_2() {
let mut db = MvccTestDbNoConn::new_maybe_encrypted(encrypted);
{
let conn = db.connect();
conn.execute("CREATE TABLE t(x integer primary key)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1)").unwrap();
conn.execute("INSERT INTO t VALUES (3)").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
}
db.restart();
let conn = db.connect();
conn.execute("INSERT INTO t VALUES (2)").unwrap();
println!("getting rows");
let rows = get_rows(&conn, "SELECT * FROM t");
dbg!(&rows);
assert_eq!(rows.len(), 3);
assert_eq!(rows[0], vec![Value::from_i64(1)]);
assert_eq!(rows[1], vec![Value::from_i64(2)]);
assert_eq!(rows[2], vec![Value::from_i64(3)]);
}
#[turso_macros::test(encryption)]
fn test_cursor_with_btree_and_mvcc_with_backward_cursor() {
let mut db = MvccTestDbNoConn::new_maybe_encrypted(encrypted);
{
let conn = db.connect();
conn.execute("CREATE TABLE t(x integer primary key)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1)").unwrap();
conn.execute("INSERT INTO t VALUES (3)").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
}
db.restart();
let conn = db.connect();
conn.execute("INSERT INTO t VALUES (2)").unwrap();
let rows = get_rows(&conn, "SELECT * FROM t order by x desc");
dbg!(&rows);
assert_eq!(rows.len(), 3);
assert_eq!(rows[0], vec![Value::from_i64(3)]);
assert_eq!(rows[1], vec![Value::from_i64(2)]);
assert_eq!(rows[2], vec![Value::from_i64(1)]);
}
#[turso_macros::test(encryption)]
fn test_cursor_with_btree_and_mvcc_with_backward_cursor_with_delete() {
let mut db = MvccTestDbNoConn::new_maybe_encrypted(encrypted);
{
let conn = db.connect();
conn.execute("CREATE TABLE t(x integer primary key)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1)").unwrap();
conn.execute("INSERT INTO t VALUES (2)").unwrap();
conn.execute("INSERT INTO t VALUES (4)").unwrap();
conn.execute("INSERT INTO t VALUES (5)").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
}
db.restart();
let conn = db.connect();
conn.execute("INSERT INTO t VALUES (3)").unwrap();
conn.execute("DELETE FROM t WHERE x = 2").unwrap();
println!("getting rows");
let rows = get_rows(&conn, "SELECT * FROM t order by x desc");
dbg!(&rows);
assert_eq!(rows.len(), 4);
assert_eq!(rows[0], vec![Value::from_i64(5)]);
assert_eq!(rows[1], vec![Value::from_i64(4)]);
assert_eq!(rows[2], vec![Value::from_i64(3)]);
assert_eq!(rows[3], vec![Value::from_i64(1)]);
}
#[turso_macros::test(encryption)]
#[ignore] fn test_cursor_with_btree_and_mvcc_fuzz() {
let mut db = MvccTestDbNoConn::new_maybe_encrypted(encrypted);
let mut rows_in_db = sorted_vec::SortedVec::new();
let mut seen = HashSet::default();
let (mut rng, _seed) = rng_from_time_or_env();
println!("seed: {_seed}");
let mut maybe_conn = Some(db.connect());
{
maybe_conn
.as_mut()
.unwrap()
.execute("CREATE TABLE t(x integer primary key)")
.unwrap();
}
#[repr(u8)]
#[derive(Debug)]
enum Op {
Insert = 0,
Delete = 1,
SelectForward = 2,
SelectBackward = 3,
SeekForward = 4,
SeekBackward = 5,
Checkpoint = 6,
}
impl From<u8> for Op {
fn from(value: u8) -> Self {
match value {
0 => Op::Insert,
1 => Op::Delete,
2 => Op::SelectForward,
3 => Op::SelectBackward,
4 => Op::SeekForward,
5 => Op::SeekBackward,
6 => Op::Checkpoint,
_ => unreachable!(),
}
}
}
for i in 0..10000 {
let conn = maybe_conn.as_mut().unwrap();
let op = rng.random_range(0..=Op::Checkpoint as usize);
let op = Op::from(op as u8);
println!("tick: {i} op: {op:?} ");
match op {
Op::Insert => {
let value = loop {
let value = rng.random_range(0..10000);
if !seen.contains(&value) {
seen.insert(value);
break value;
}
};
let query = format!("INSERT INTO t VALUES ({value})");
println!("inserting: {query}");
conn.execute(query.as_str()).unwrap();
rows_in_db.push(value);
}
Op::Delete => {
if rows_in_db.is_empty() {
continue;
}
let index = rng.random_range(0..rows_in_db.len());
let value = rows_in_db[index];
let query = format!("DELETE FROM t WHERE x = {value}");
println!("deleting: {query}");
conn.execute(query.as_str()).unwrap();
rows_in_db.remove_index(index);
seen.remove(&value);
}
Op::SelectForward => {
let rows = get_rows(conn, "SELECT * FROM t order by x asc");
assert_eq!(
rows.len(),
rows_in_db.len(),
"expected {} rows, got {}",
rows_in_db.len(),
rows.len()
);
for (row, expected_rowid) in rows.iter().zip(rows_in_db.iter()) {
assert_eq!(
row[0].as_int().unwrap(),
*expected_rowid,
"expected row id {} got {}",
*expected_rowid,
row[0].as_int().unwrap()
);
}
}
Op::SelectBackward => {
let rows = get_rows(conn, "SELECT * FROM t order by x desc");
assert_eq!(
rows.len(),
rows_in_db.len(),
"expected {} rows, got {}",
rows_in_db.len(),
rows.len()
);
for (row, expected_rowid) in rows.iter().zip(rows_in_db.iter().rev()) {
assert_eq!(
row[0].as_int().unwrap(),
*expected_rowid,
"expected row id {} got {}",
*expected_rowid,
row[0].as_int().unwrap()
);
}
}
Op::SeekForward => {
let value = rng.random_range(0..10000);
let rows = get_rows(
conn,
format!("SELECT * FROM t where x > {value} order by x asc").as_str(),
);
let filtered_rows_in_db = rows_in_db
.iter()
.filter(|&id| *id > value)
.cloned()
.collect::<Vec<i64>>();
assert_eq!(
rows.len(),
filtered_rows_in_db.len(),
"expected {} rows, got {}",
filtered_rows_in_db.len(),
rows.len()
);
for (row, expected_rowid) in rows.iter().zip(filtered_rows_in_db.iter()) {
assert_eq!(
row[0].as_int().unwrap(),
*expected_rowid,
"expected row id {} got {}",
*expected_rowid,
row[0].as_int().unwrap()
);
}
}
Op::SeekBackward => {
let value = rng.random_range(0..10000);
let rows = get_rows(
conn,
format!("SELECT * FROM t where x > {value} order by x desc").as_str(),
);
let filtered_rows_in_db = rows_in_db
.iter()
.filter(|&id| *id > value)
.cloned()
.collect::<Vec<i64>>();
assert_eq!(
rows.len(),
filtered_rows_in_db.len(),
"expected {} rows, got {}",
filtered_rows_in_db.len(),
rows.len()
);
for (row, expected_rowid) in rows.iter().zip(filtered_rows_in_db.iter().rev()) {
assert_eq!(
row[0].as_int().unwrap(),
*expected_rowid,
"expected row id {} got {}",
*expected_rowid,
row[0].as_int().unwrap()
);
}
}
Op::Checkpoint => {
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
db.restart();
maybe_conn = Some(db.connect());
}
}
}
}
pub fn rng_from_time_or_env() -> (ChaCha8Rng, u64) {
let seed = std::env::var("SEED").map_or(
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_millis(),
|v| {
v.parse()
.expect("Failed to parse SEED environment variable as u64")
},
);
let rng = ChaCha8Rng::seed_from_u64(seed as u64);
(rng, seed as u64)
}
#[test]
fn test_cursor_with_btree_and_mvcc_insert_after_checkpoint_repeated_key() {
let mut db = MvccTestDbNoConn::new_with_random_db();
{
let conn = db.connect();
conn.execute("CREATE TABLE t(x integer primary key)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1)").unwrap();
conn.execute("INSERT INTO t VALUES (2)").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
}
db.restart();
let conn = db.connect();
let res = conn.execute("INSERT INTO t VALUES (2)");
assert!(res.is_err(), "Expected error because key 2 already exists");
}
#[test]
fn test_cursor_with_btree_and_mvcc_seek_after_checkpoint() {
let mut db = MvccTestDbNoConn::new_with_random_db();
{
let conn = db.connect();
conn.execute("CREATE TABLE t(x integer primary key)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1)").unwrap();
conn.execute("INSERT INTO t VALUES (2)").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
}
db.restart();
let conn = db.connect();
let res = get_rows(&conn, "SELECT * FROM t WHERE x = 2");
assert_eq!(res.len(), 1);
assert_eq!(res[0][0].as_int().unwrap(), 2);
}
#[test]
fn test_cursor_with_btree_and_mvcc_delete_after_checkpoint() {
let mut db = MvccTestDbNoConn::new_with_random_db();
{
let conn = db.connect();
conn.execute("CREATE TABLE t(x integer primary key)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1)").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
}
db.restart();
let conn = db.connect();
conn.execute("DELETE FROM t WHERE x = 1").unwrap();
let rows = get_rows(&conn, "SELECT * FROM t order by x desc");
assert_eq!(rows.len(), 0);
}
#[test]
#[ignore = "AUTOINCREMENT not yet supported in MVCC mode"]
fn test_skips_updated_rowid() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE t(a INTEGER PRIMARY KEY AUTOINCREMENT)")
.unwrap();
conn.execute("INSERT INTO t DEFAULT VALUES").unwrap();
let rows = get_rows(&conn, "SELECT * FROM sqlite_sequence");
dbg!(&rows);
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][1].as_int().unwrap(), 1);
conn.execute("UPDATE t SET a = a + 1").unwrap();
let rows = get_rows(&conn, "SELECT * FROM sqlite_sequence");
dbg!(&rows);
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][1].as_int().unwrap(), 1);
conn.execute("INSERT INTO t DEFAULT VALUES").unwrap();
let rows = get_rows(&conn, "SELECT * FROM sqlite_sequence");
dbg!(&rows);
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][1].as_int().unwrap(), 3);
}
#[test]
fn test_mvcc_integrity_check() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE t(a INTEGER PRIMARY KEY)")
.unwrap();
conn.execute("INSERT INTO t values(1)").unwrap();
let ensure_integrity = || {
let rows = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].cast_text().unwrap(), "ok");
};
ensure_integrity();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
ensure_integrity();
}
#[test]
fn test_checkpoint_index_writer_overwrites_existing_interior_key() {
fn run_pager_until_done<T>(
mut action: impl FnMut() -> Result<IOResult<T>>,
pager: &Pager,
) -> Result<T> {
loop {
match action()? {
IOResult::Done(value) => return Ok(value),
IOResult::IO(io) => io.wait(pager.io.as_ref())?,
}
}
}
let db = MvccTestDb::new();
let pager = db.conn.pager.load().clone();
let index = crate::schema::Index {
name: "testindex".to_string(),
table_name: "test".to_string(),
root_page: 0,
columns: vec![crate::schema::IndexColumn {
name: "id".to_string(),
order: turso_parser::ast::SortOrder::Asc,
pos_in_table: 0,
collation: None,
default: None,
expr: None,
}],
unique: true,
ephemeral: false,
has_rowid: true,
where_clause: None,
index_method: None,
on_conflict: None,
};
pager.begin_read_tx().unwrap();
run_pager_until_done(
|| pager.begin_write_tx(crate::storage::wal::WalAutoActions::all_enabled()),
pager.as_ref(),
)
.unwrap();
let root_page = pager
.io
.block(|| pager.btree_create(&crate::storage::pager::CreateBTreeFlags::new_index()))
.unwrap() as i64;
let cursor = Arc::new(RwLock::new(BTreeCursor::new_index(
pager.clone(),
root_page,
&index,
index.columns.len(),
)));
for key in 1..=600 {
let record = ImmutableRecord::from_values(&[Value::from_i64(key), Value::from_i64(key)], 2);
let seek_result = run_pager_until_done(
|| {
cursor.write().seek(
crate::types::SeekKey::IndexKey(&record),
crate::types::SeekOp::GE { eq_only: true },
)
},
pager.as_ref(),
)
.unwrap();
if matches!(seek_result, SeekResult::TryAdvance) {
run_pager_until_done(|| cursor.write().next(), pager.as_ref()).unwrap();
}
run_pager_until_done(
|| cursor.write().insert(&BTreeKey::new_index_key(&record)),
pager.as_ref(),
)
.unwrap();
}
run_pager_until_done(|| pager.commit_tx(&db.conn, true), pager.as_ref()).unwrap();
pager.begin_read_tx().unwrap();
let mut interior_key = None;
for key in 1..=600 {
let record = ImmutableRecord::from_values(&[Value::from_i64(key), Value::from_i64(key)], 2);
let seek_result = run_pager_until_done(
|| {
cursor.write().seek(
crate::types::SeekKey::IndexKey(&record),
crate::types::SeekOp::GE { eq_only: true },
)
},
pager.as_ref(),
)
.unwrap();
if matches!(seek_result, SeekResult::TryAdvance) {
interior_key = Some(key);
break;
}
}
let interior_key = interior_key.expect("test setup should create an index interior key");
let count_before = run_pager_until_done(|| cursor.write().count(), pager.as_ref()).unwrap();
run_pager_until_done(
|| pager.begin_write_tx(crate::storage::wal::WalAutoActions::all_enabled()),
pager.as_ref(),
)
.unwrap();
let index_info = Arc::new(IndexInfo::new_from_index(&index));
let record = ImmutableRecord::from_values(
&[Value::from_i64(interior_key), Value::from_i64(interior_key)],
2,
);
let row_key = SortableIndexKey::new_from_record(record, index_info);
let row = Row::new_index_row(
RowID::new(MVTableId::new(-42), RowKey::Record(row_key)),
index.columns.len(),
);
let mut write_row_sm = db
.mvcc_store
.write_row_to_pager(&row, cursor.clone(), true)
.unwrap();
loop {
match write_row_sm.step(&()).unwrap() {
IOResult::Done(()) => break,
IOResult::IO(io) => io.wait(pager.io.as_ref()).unwrap(),
}
}
run_pager_until_done(|| pager.commit_tx(&db.conn, true), pager.as_ref()).unwrap();
pager.begin_read_tx().unwrap();
let count_after = run_pager_until_done(|| cursor.write().count(), pager.as_ref()).unwrap();
assert_eq!(
count_after, count_before,
"checkpoint index writer should overwrite an existing interior key, not insert a duplicate"
);
}
#[test]
fn test_sql_checkpoint_reinsert_existing_interior_index_key_keeps_sqlite_integrity() {
let mut db = MvccTestDbNoConn::new_with_random_db();
let db_path = db.path.as_ref().unwrap().clone();
let conn = db.connect();
conn.execute("PRAGMA mvcc_checkpoint_threshold = -1")
.unwrap();
conn.execute("CREATE TABLE t(payload BLOB, id INTEGER UNIQUE)")
.unwrap();
for id in 1..=600 {
conn.execute(format!(
"INSERT INTO t(rowid, payload, id) VALUES ({id}, x'70796c6f6164', {id})"
))
.unwrap();
}
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
for id in 1..=600 {
conn.execute(format!("DELETE FROM t WHERE id = {id}"))
.unwrap();
conn.execute(format!(
"INSERT INTO t(rowid, payload, id) VALUES ({id}, x'7265696e73657274', {id})"
))
.unwrap();
}
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
conn.execute("PRAGMA journal_mode = 'wal'").unwrap();
conn.close().unwrap();
force_close_for_artifact_tamper(&mut db);
let sqlite = rusqlite::Connection::open(db_path).unwrap();
let integrity: String = sqlite
.query_row("PRAGMA integrity_check", [], |row| row.get(0))
.unwrap();
assert_eq!(integrity, "ok");
}
#[test]
fn test_integrity_check_after_drop_table_before_checkpoint() {
let mut db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE t (id INTEGER PRIMARY KEY, data TEXT)")
.unwrap();
conn.execute("CREATE INDEX idx_t_data ON t(data)").unwrap();
for i in 0..10 {
let data = format!("data_{i}");
conn.execute(format!("INSERT INTO t VALUES ({i}, '{data}')"))
.unwrap();
}
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
drop(conn);
db.restart();
let conn = db.connect();
conn.execute("DROP TABLE t").unwrap();
let rows = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
}
#[test]
fn test_integrity_check_after_drop_index_before_checkpoint() {
let mut db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE t (id INTEGER PRIMARY KEY, data TEXT)")
.unwrap();
conn.execute("CREATE INDEX idx_t_data ON t(data)").unwrap();
for i in 0..10 {
let data = format!("data_{i}");
conn.execute(format!("INSERT INTO t VALUES ({i}, '{data}')"))
.unwrap();
}
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
drop(conn);
db.restart();
let conn = db.connect();
conn.execute("DROP INDEX idx_t_data").unwrap();
let rows = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
}
#[test]
fn test_interrupted_drop_table_rolls_back_schema_table_and_indexes() {
let io = Arc::new(MemoryIO::new());
let path = ":memory:interrupted-drop-table-schema-rollback";
let db = Database::open_file(io.clone(), path).unwrap();
let conn = db.connect().unwrap();
conn.execute("PRAGMA journal_mode = 'mvcc'").unwrap();
conn.execute("CREATE TABLE repro_target(c0 INTEGER, c1 REAL)")
.unwrap();
conn.execute(
"CREATE UNIQUE INDEX IF NOT EXISTS idx_repro_target_c0 \
ON repro_target (c0) WHERE c1 IS NULL",
)
.unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
let target_schema_rows = get_rows(
&conn,
"SELECT type, name FROM sqlite_schema \
WHERE tbl_name = 'repro_target' ORDER BY rowid",
);
assert_eq!(target_schema_rows.len(), 2);
assert_eq!(target_schema_rows[0][0].to_string(), "table");
assert_eq!(target_schema_rows[0][1].to_string(), "repro_target");
assert_eq!(target_schema_rows[1][0].to_string(), "index");
assert_eq!(target_schema_rows[1][1].to_string(), "idx_repro_target_c0");
conn.set_yield_injector(Some(FixedYieldInjector::new([
CursorYieldPoint::NextStart.point()
])));
let mut drop_stmt = conn.prepare("DROP TABLE repro_target").unwrap();
match drop_stmt.step().unwrap() {
crate::StepResult::IO => {}
other => panic!("expected injected IO yield while dropping repro_target; got {other:?}"),
}
conn.set_yield_injector(None);
let rows = get_rows(&conn, "SELECT 1");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].to_string(), "1");
drop(drop_stmt);
drop(conn);
drop(db);
let db = Database::open_file(io, path).unwrap();
let conn = db.connect().unwrap();
let target_schema_rows = get_rows(
&conn,
"SELECT type, name FROM sqlite_schema \
WHERE tbl_name = 'repro_target' ORDER BY rowid",
);
assert_eq!(target_schema_rows.len(), 2);
assert_eq!(target_schema_rows[0][0].to_string(), "table");
assert_eq!(target_schema_rows[0][1].to_string(), "repro_target");
assert_eq!(target_schema_rows[1][0].to_string(), "index");
assert_eq!(target_schema_rows[1][1].to_string(), "idx_repro_target_c0");
}
#[test]
fn test_rollback_with_index() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE t(a INTEGER PRIMARY KEY, b INTEGER UNIQUE)")
.unwrap();
conn.execute("BEGIN CONCURRENT").unwrap();
conn.execute("INSERT INTO t values (1, 1)").unwrap();
conn.execute("ROLLBACK").unwrap();
let rows = get_rows(&conn, "SELECT * FROM t where b = 1");
assert_eq!(rows.len(), 0);
let rows = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
}
#[test]
fn test_update_multiple_unique_columns_partial_rollback() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute(
"CREATE TABLE t(
id INTEGER PRIMARY KEY,
col_a TEXT UNIQUE,
col_b REAL UNIQUE
)",
)
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'original_a', 1.0)")
.unwrap();
conn.execute("INSERT INTO t VALUES (2, 'other_a', 2.0)")
.unwrap();
conn.execute("BEGIN CONCURRENT").unwrap();
let result = conn.execute("UPDATE t SET col_a = 'new_a', col_b = 2.0 WHERE id = 1");
assert!(
result.is_err(),
"Expected unique constraint violation on col_b"
);
conn.execute("COMMIT").unwrap();
conn.execute("UPDATE t SET col_a = 'updated_a', col_b = 3.0 WHERE id = 1")
.unwrap();
let rows = get_rows(&conn, "SELECT * FROM t WHERE id = 1");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][1].cast_text().unwrap(), "updated_a");
let rows = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
}
fn make_rv(begin: Option<TxTimestampOrID>, end: Option<TxTimestampOrID>) -> RowVersion {
RowVersion {
id: 0,
begin,
end,
row: generate_simple_string_row((-2).into(), 1, "gc_test"),
btree_resident: false,
}
}
fn ts(v: u64) -> Option<TxTimestampOrID> {
Some(TxTimestampOrID::Timestamp(v))
}
fn txid(v: u64) -> Option<TxTimestampOrID> {
Some(TxTimestampOrID::TxID(v))
}
#[test]
fn test_gc_rule1_aborted_garbage_removed() {
let mut versions = vec![make_rv(None, None)];
let dropped = MvStore::<MvccClock>::gc_version_chain(&mut versions, u64::MAX, 0);
assert_eq!(dropped, 1);
assert!(versions.is_empty());
}
#[test]
fn test_gc_rule1_aborted_among_live_versions() {
let mut versions = vec![
make_rv(ts(5), None), make_rv(None, None), make_rv(ts(3), ts(5)), ];
let dropped = MvStore::<MvccClock>::gc_version_chain(&mut versions, 2, 0);
assert_eq!(dropped, 1);
assert_eq!(versions.len(), 2);
assert!(versions
.iter()
.all(|rv| rv.begin.is_some() || rv.end.is_some()));
}
#[test]
fn test_gc_rule2_superseded_below_lwm_with_current() {
let mut versions = vec![
make_rv(ts(3), ts(5)), make_rv(ts(5), None), ];
let dropped = MvStore::<MvccClock>::gc_version_chain(&mut versions, 10, 0);
assert_eq!(dropped, 1);
assert_eq!(versions.len(), 1);
assert!(versions[0].end.is_none()); }
#[test]
fn test_gc_rule2_superseded_above_lwm_retained() {
let mut versions = vec![make_rv(ts(3), ts(15)), make_rv(ts(15), None)];
let dropped = MvStore::<MvccClock>::gc_version_chain(&mut versions, 10, 0);
assert_eq!(dropped, 0);
assert_eq!(versions.len(), 2);
}
#[test]
fn test_gc_rule2_tombstone_guard_uncheckpointed() {
let mut versions = vec![
make_rv(ts(3), ts(5)), ];
let dropped = MvStore::<MvccClock>::gc_version_chain(&mut versions, 10, 2);
assert_eq!(dropped, 0);
assert_eq!(versions.len(), 1);
}
#[test]
fn test_gc_rule2_tombstone_guard_checkpointed() {
let mut versions = vec![make_rv(ts(3), ts(5))];
let dropped = MvStore::<MvccClock>::gc_version_chain(&mut versions, 10, 5);
assert_eq!(dropped, 1);
assert!(versions.is_empty());
}
#[test]
fn test_gc_rule3_checkpointed_sole_survivor_removed() {
let mut versions = vec![make_rv(ts(5), None)];
let dropped = MvStore::<MvccClock>::gc_version_chain(&mut versions, 10, 5);
assert_eq!(dropped, 1);
assert!(versions.is_empty());
}
#[test]
fn test_gc_rule3_not_checkpointed_retained() {
let mut versions = vec![make_rv(ts(5), None)];
let dropped = MvStore::<MvccClock>::gc_version_chain(&mut versions, 10, 3);
assert_eq!(dropped, 0);
assert_eq!(versions.len(), 1);
}
#[test]
fn test_gc_rule3_visible_to_active_tx_retained() {
let mut versions = vec![make_rv(ts(5), None)];
let dropped = MvStore::<MvccClock>::gc_version_chain(&mut versions, 5, 10);
assert_eq!(dropped, 0);
assert_eq!(versions.len(), 1);
}
#[test]
fn test_gc_rule3_current_retained_before_first_checkpoint() {
let mut versions = vec![make_rv(ts(1), None)];
let dropped = MvStore::<MvccClock>::gc_version_chain(&mut versions, 10, 0);
assert_eq!(dropped, 0);
assert_eq!(versions.len(), 1);
}
#[test]
fn test_gc_rule3_current_collected_after_checkpoint() {
let mut versions = vec![make_rv(ts(1), None)];
let dropped = MvStore::<MvccClock>::gc_version_chain(&mut versions, 10, 5);
assert_eq!(dropped, 1);
assert_eq!(versions.len(), 0);
}
#[test]
fn test_gc_rule3_not_sole_survivor() {
let mut versions = vec![make_rv(ts(3), ts(5)), make_rv(ts(5), None)];
let dropped = MvStore::<MvccClock>::gc_version_chain(&mut versions, 10, 5);
assert_eq!(dropped, 2);
assert!(versions.is_empty());
}
#[test]
fn test_gc_txid_refs_retained() {
let mut versions = vec![make_rv(txid(99), None)];
let dropped = MvStore::<MvccClock>::gc_version_chain(&mut versions, u64::MAX, u64::MAX);
assert_eq!(dropped, 0);
assert_eq!(versions.len(), 1);
}
#[test]
fn test_gc_txid_end_retained() {
let mut versions = vec![make_rv(ts(3), txid(50))];
let dropped = MvStore::<MvccClock>::gc_version_chain(&mut versions, u64::MAX, u64::MAX);
assert_eq!(dropped, 0);
assert_eq!(versions.len(), 1);
}
#[test]
fn test_gc_rule2_pending_insert_does_not_disable_tombstone_guard() {
let mut versions = vec![
make_rv(ts(3), ts(5)), make_rv(txid(99), None), ];
let dropped = MvStore::<MvccClock>::gc_version_chain(&mut versions, 10, 2);
assert_eq!(dropped, 0);
assert_eq!(versions.len(), 2);
}
#[test]
fn test_gc_rule2_committed_current_disables_tombstone_guard() {
let mut versions = vec![
make_rv(ts(3), ts(5)), make_rv(ts(5), None), ];
let dropped = MvStore::<MvccClock>::gc_version_chain(&mut versions, 10, 2);
assert_eq!(dropped, 1);
assert_eq!(versions.len(), 1);
assert!(versions[0].end.is_none());
}
#[test]
fn test_gc_rule2_btree_tombstone_lifecycle() {
let mut versions = vec![make_rv(None, ts(5))];
let dropped = MvStore::<MvccClock>::gc_version_chain(&mut versions, u64::MAX, 3);
assert_eq!(dropped, 0, "tombstone retained: e=5 > ckpt_max=3");
assert_eq!(versions.len(), 1);
let dropped = MvStore::<MvccClock>::gc_version_chain(&mut versions, u64::MAX, 5);
assert_eq!(dropped, 1, "tombstone collected: e=5 <= ckpt_max=5");
assert_eq!(versions.len(), 0);
}
#[test]
fn test_gc_rule3_not_firing_with_unremovable_superseded() {
let mut versions = vec![
make_rv(ts(3), ts(15)), make_rv(ts(15), None), ];
let dropped = MvStore::<MvccClock>::gc_version_chain(&mut versions, 10, 20);
assert_eq!(dropped, 0);
assert_eq!(versions.len(), 2);
}
#[test]
fn test_gc_noop_on_empty() {
let mut versions: Vec<RowVersion> = vec![];
let dropped = MvStore::<MvccClock>::gc_version_chain(&mut versions, 10, 5);
assert_eq!(dropped, 0);
}
#[test]
fn test_gc_combined_rules() {
let mut versions = vec![
make_rv(None, None), make_rv(ts(1), ts(3)), make_rv(ts(3), ts(5)), make_rv(ts(5), None), ];
let dropped = MvStore::<MvccClock>::gc_version_chain(&mut versions, 10, 5);
assert_eq!(dropped, 4);
assert!(versions.is_empty());
}
#[test]
fn test_gc_integration_insert_commit_gc() {
let db = MvccTestDb::new();
let tx1 = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
let row = generate_simple_string_row((-2).into(), 1, "gc_test");
db.mvcc_store.insert(tx1, row).unwrap();
commit_tx(db.mvcc_store.clone(), &db.conn, tx1).unwrap();
assert!(!db.mvcc_store.rows.is_empty());
let dropped = db.mvcc_store.drop_unused_row_versions();
assert_eq!(dropped, 0);
assert!(!db.mvcc_store.rows.is_empty());
}
#[test]
fn test_gc_integration_rollback_creates_aborted_garbage() {
let db = MvccTestDb::new();
let tx1 = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
let row = generate_simple_string_row((-2).into(), 1, "will_rollback");
db.mvcc_store.insert(tx1, row).unwrap();
db.mvcc_store.rollback_tx(
tx1,
db.conn.pager.load().clone(),
&db.conn,
crate::MAIN_DB_ID,
);
let entry = db
.mvcc_store
.rows
.get(&RowID::new((-2).into(), RowKey::Int(1)));
assert!(entry.is_some());
{
let versions = entry.as_ref().unwrap().value().read();
assert_eq!(versions.len(), 1);
assert!(versions[0].begin.is_none());
assert!(versions[0].end.is_none());
}
let dropped = db.mvcc_store.drop_unused_row_versions();
assert_eq!(dropped, 1);
let entry = db
.mvcc_store
.rows
.get(&RowID::new((-2).into(), RowKey::Int(1)));
assert!(entry.is_some(), "SkipMap entry stays (lazy removal)");
assert!(
entry.unwrap().value().read().is_empty(),
"but versions should be empty"
);
}
#[test]
fn test_gc_active_reader_pins_lwm() {
let db = MvccTestDb::new();
let table_id: MVTableId = (-2).into();
let tx1 = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
let row_v1 = generate_simple_string_row(table_id, 1, "version_1");
db.mvcc_store.insert(tx1, row_v1.clone()).unwrap();
commit_tx(db.mvcc_store.clone(), &db.conn, tx1).unwrap();
let conn2 = db.db.connect().unwrap();
let tx2 = db.mvcc_store.begin_tx(conn2.pager.load().clone()).unwrap();
let tx2_begin_ts = db.mvcc_store.txs.get(&tx2).unwrap().value().begin_ts;
let conn3 = db.db.connect().unwrap();
let tx3 = db.mvcc_store.begin_tx(conn3.pager.load().clone()).unwrap();
let row_v2 = generate_simple_string_row(table_id, 1, "version_2");
db.mvcc_store.update(tx3, row_v2).unwrap();
commit_tx(db.mvcc_store.clone(), &conn3, tx3).unwrap();
let lwm = db.mvcc_store.compute_lwm();
assert_eq!(
lwm, tx2_begin_ts,
"LWM should equal the active reader's begin_ts"
);
let row_id = RowID::new(table_id, RowKey::Int(1));
let dropped = db.mvcc_store.drop_unused_row_versions();
assert_eq!(
dropped, 0,
"GC should not remove versions visible to active reader"
);
{
let entry = db.mvcc_store.rows.get(&row_id).unwrap();
let versions = entry.value().read();
assert_eq!(versions.len(), 2, "both versions should be retained");
}
let read_row = db.mvcc_store.read(tx2, &row_id).unwrap().unwrap();
assert_eq!(
read_row, row_v1,
"active reader should still see the old version"
);
db.mvcc_store.remove_tx(tx2);
assert_eq!(db.mvcc_store.compute_lwm(), u64::MAX);
let dropped = db.mvcc_store.drop_unused_row_versions();
assert_eq!(
dropped, 1,
"superseded version should be reclaimed after reader closes"
);
{
let entry = db.mvcc_store.rows.get(&row_id).unwrap();
let versions = entry.value().read();
assert_eq!(versions.len(), 1, "only current version should remain");
}
}
#[test]
fn test_gc_e2e_index_rows_collected_after_checkpoint() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, val TEXT)")
.unwrap();
conn.execute("CREATE INDEX idx_val ON t(val)").unwrap();
conn.execute("INSERT INTO t VALUES (1, 'alpha')").unwrap();
conn.execute("INSERT INTO t VALUES (2, 'beta')").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
let rows = get_rows(&conn, "SELECT val FROM t ORDER BY val");
assert_eq!(rows.len(), 2);
assert_eq!(rows[0][0].to_string(), "alpha");
assert_eq!(rows[1][0].to_string(), "beta");
let rows = get_rows(&conn, "SELECT id FROM t WHERE val = 'alpha'");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
conn.execute("UPDATE t SET val = 'gamma' WHERE id = 1")
.unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
let rows = get_rows(&conn, "SELECT id FROM t WHERE val = 'alpha'");
assert_eq!(rows.len(), 0);
let rows = get_rows(&conn, "SELECT id FROM t WHERE val = 'gamma'");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
let rows = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
}
#[derive(Debug, Clone)]
struct ArbitraryVersionChain {
versions: Vec<RowVersion>,
lwm: u64,
ckpt_max: u64,
}
fn arbitrary_row_version(g: &mut Gen) -> RowVersion {
let kind = u8::arbitrary(g) % 25;
let (begin, end) = match kind {
0..=7 => {
let b = u64::arbitrary(g) % 20 + 1;
(Some(TxTimestampOrID::Timestamp(b)), None)
}
8..=13 => {
let b = u64::arbitrary(g) % 15 + 1;
let e = b + u64::arbitrary(g) % 10 + 1;
(
Some(TxTimestampOrID::Timestamp(b)),
Some(TxTimestampOrID::Timestamp(e)),
)
}
14..=15 => {
(None, None)
}
16..=17 => {
let t = u64::arbitrary(g) % 20 + 1;
(Some(TxTimestampOrID::TxID(t)), None)
}
18..=19 => {
let b = u64::arbitrary(g) % 15 + 1;
let t = u64::arbitrary(g) % 20 + 1;
(
Some(TxTimestampOrID::Timestamp(b)),
Some(TxTimestampOrID::TxID(t)),
)
}
20..=24 => {
let e = u64::arbitrary(g) % 20 + 1;
(None, Some(TxTimestampOrID::Timestamp(e)))
}
_ => unreachable!(),
};
RowVersion {
id: 0,
begin,
end,
row: generate_simple_string_row((-2).into(), 1, "qc"),
btree_resident: bool::arbitrary(g),
}
}
impl Arbitrary for ArbitraryVersionChain {
fn arbitrary(g: &mut Gen) -> Self {
let len = usize::arbitrary(g) % 8 + 1;
let versions: Vec<RowVersion> = (0..len).map(|_| arbitrary_row_version(g)).collect();
let lwm = match u8::arbitrary(g) % 5 {
0 => 0,
1 => u64::MAX, _ => u64::arbitrary(g) % 30,
};
let ckpt_max = match u8::arbitrary(g) % 5 {
0 => 0, 1 => u64::MAX, _ => u64::arbitrary(g) % 30,
};
Self {
versions,
lwm,
ckpt_max,
}
}
}
#[quickcheck]
fn prop_gc_never_increases_version_count(chain: ArbitraryVersionChain) -> bool {
let before = chain.versions.len();
let mut versions = chain.versions;
MvStore::<MvccClock>::gc_version_chain(&mut versions, chain.lwm, chain.ckpt_max);
versions.len() <= before
}
#[quickcheck]
fn prop_gc_is_idempotent(chain: ArbitraryVersionChain) -> bool {
let mut v1 = chain.versions.clone();
MvStore::<MvccClock>::gc_version_chain(&mut v1, chain.lwm, chain.ckpt_max);
let snapshot = v1.clone();
MvStore::<MvccClock>::gc_version_chain(&mut v1, chain.lwm, chain.ckpt_max);
v1.len() == snapshot.len()
&& v1
.iter()
.zip(snapshot.iter())
.all(|(a, b)| a.begin == b.begin && a.end == b.end)
}
#[quickcheck]
fn prop_gc_removes_all_aborted_garbage(chain: ArbitraryVersionChain) -> bool {
let mut versions = chain.versions;
MvStore::<MvccClock>::gc_version_chain(&mut versions, chain.lwm, chain.ckpt_max);
versions
.iter()
.all(|rv| !matches!((&rv.begin, &rv.end), (None, None)))
}
#[quickcheck]
fn prop_gc_retains_txid_begins(chain: ArbitraryVersionChain) -> bool {
let txid_begins_before: usize = chain
.versions
.iter()
.filter(|rv| matches!(&rv.begin, Some(TxTimestampOrID::TxID(_))) && rv.end.is_none())
.count();
let mut versions = chain.versions;
MvStore::<MvccClock>::gc_version_chain(&mut versions, chain.lwm, chain.ckpt_max);
let txid_begins_after: usize = versions
.iter()
.filter(|rv| matches!(&rv.begin, Some(TxTimestampOrID::TxID(_))) && rv.end.is_none())
.count();
txid_begins_after == txid_begins_before
}
#[quickcheck]
fn prop_gc_retains_txid_ends(chain: ArbitraryVersionChain) -> bool {
let filter =
|rv: &&RowVersion| matches!(&rv.end, Some(TxTimestampOrID::TxID(_))) && rv.begin.is_some();
let txid_ends_before: usize = chain.versions.iter().filter(filter).count();
let mut versions = chain.versions;
MvStore::<MvccClock>::gc_version_chain(&mut versions, chain.lwm, chain.ckpt_max);
let txid_ends_after: usize = versions.iter().filter(filter).count();
txid_ends_after == txid_ends_before
}
#[quickcheck]
fn prop_gc_current_versions_protected_before_checkpoint(chain: ArbitraryVersionChain) -> bool {
let current_before: usize = chain
.versions
.iter()
.filter(|rv| {
matches!(
(&rv.begin, &rv.end),
(Some(TxTimestampOrID::Timestamp(_)), None)
)
})
.count();
let mut versions = chain.versions;
MvStore::<MvccClock>::gc_version_chain(&mut versions, chain.lwm, 0);
let current_after: usize = versions
.iter()
.filter(|rv| {
matches!(
(&rv.begin, &rv.end),
(Some(TxTimestampOrID::Timestamp(_)), None)
)
})
.count();
current_after == current_before
}
#[quickcheck]
fn prop_gc_tombstone_guard_preserves_btree_safety(chain: ArbitraryVersionChain) -> bool {
let mut versions = chain.versions.clone();
MvStore::<MvccClock>::gc_version_chain(&mut versions, chain.lwm, chain.ckpt_max);
let had_committed_current = chain
.versions
.iter()
.any(|rv| rv.end.is_none() && matches!(&rv.begin, Some(TxTimestampOrID::Timestamp(_))));
let had_uncheckpointed_tombstone = chain
.versions
.iter()
.any(|rv| matches!(&rv.end, Some(TxTimestampOrID::Timestamp(e)) if *e > chain.ckpt_max));
let had_non_garbage = chain
.versions
.iter()
.any(|rv| !matches!((&rv.begin, &rv.end), (None, None)));
if !had_committed_current && had_uncheckpointed_tombstone && had_non_garbage {
!versions.is_empty()
} else {
true }
}
#[quickcheck]
fn prop_gc_no_orphaned_superseded_versions(chain: ArbitraryVersionChain) -> bool {
let mut versions = chain.versions;
MvStore::<MvccClock>::gc_version_chain(&mut versions, chain.lwm, chain.ckpt_max);
let has_committed_current = versions
.iter()
.any(|rv| rv.end.is_none() && matches!(&rv.begin, Some(TxTimestampOrID::Timestamp(_))));
let has_superseded = versions.iter().any(|rv| {
matches!(
(&rv.begin, &rv.end),
(
Some(TxTimestampOrID::Timestamp(_)),
Some(TxTimestampOrID::Timestamp(_))
)
)
});
if has_superseded && !has_committed_current {
versions
.iter()
.filter(|rv| {
matches!(
(&rv.begin, &rv.end),
(
Some(TxTimestampOrID::Timestamp(_)),
Some(TxTimestampOrID::Timestamp(_))
)
)
})
.all(|rv| {
if let Some(TxTimestampOrID::Timestamp(e)) = &rv.end {
*e > chain.lwm || *e > chain.ckpt_max
} else {
false
}
})
} else {
true
}
}
#[test]
fn test_mvcc_snapshot_isolation() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn1 = db.connect();
conn1
.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, value INTEGER)")
.unwrap();
conn1
.execute("INSERT INTO t VALUES (1, 100), (2, 200), (3, 300)")
.unwrap();
conn1.execute("BEGIN CONCURRENT").unwrap();
let rows1 = get_rows(&conn1, "SELECT value FROM t WHERE id = 2");
assert_eq!(rows1[0][0].to_string(), "200");
let conn2 = db.connect();
conn2.execute("BEGIN CONCURRENT").unwrap();
conn2
.execute("UPDATE t SET value = 999 WHERE id = 2")
.unwrap();
conn2.execute("COMMIT").unwrap();
let rows1_again = get_rows(&conn1, "SELECT value FROM t WHERE id = 2");
assert_eq!(
rows1_again[0][0].to_string(),
"200",
"Tx1 should not see tx2's committed changes"
);
conn1.execute("COMMIT").unwrap();
let rows_after = get_rows(&conn1, "SELECT value FROM t WHERE id = 2");
assert_eq!(rows_after[0][0].to_string(), "999");
}
#[test]
fn test_update_three_unique_columns_partial_rollback() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute(
"CREATE TABLE t(
id INTEGER PRIMARY KEY,
col_a TEXT UNIQUE,
col_b REAL UNIQUE,
col_c INTEGER UNIQUE
)",
)
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'a1', 1.0, 100)")
.unwrap();
conn.execute("INSERT INTO t VALUES (2, 'a2', 2.0, 200)")
.unwrap();
conn.execute("BEGIN CONCURRENT").unwrap();
let result =
conn.execute("UPDATE t SET col_a = 'new_a', col_b = 3.0, col_c = 200 WHERE id = 1");
assert!(
result.is_err(),
"Expected unique constraint violation on col_c"
);
conn.execute("COMMIT").unwrap();
conn.execute("UPDATE t SET col_a = 'updated_a', col_b = 5.0, col_c = 500 WHERE id = 1")
.unwrap();
let rows = get_rows(&conn, "SELECT * FROM t WHERE col_a = 'updated_a'");
assert_eq!(rows.len(), 1);
let rows = get_rows(&conn, "SELECT * FROM t WHERE col_b = 5.0");
assert_eq!(rows.len(), 1);
let rows = get_rows(&conn, "SELECT * FROM t WHERE col_c = 500");
assert_eq!(rows.len(), 1);
let rows = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
}
#[test]
fn test_sequential_updates_with_constraint_errors() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute(
"CREATE TABLE t(
pk REAL PRIMARY KEY,
unique_col TEXT UNIQUE,
other_unique REAL UNIQUE
)",
)
.unwrap();
conn.execute("INSERT INTO t VALUES (1.37, 'sweet_wind_280', 9.05)")
.unwrap();
conn.execute("INSERT INTO t VALUES (2.13, 'other_value', 2.13)")
.unwrap();
conn.execute("UPDATE t SET unique_col = 'cold_grass_813', other_unique = 3.90 WHERE pk = 1.37")
.unwrap();
let rows = get_rows(&conn, "SELECT unique_col FROM t WHERE pk = 1.37");
assert_eq!(rows[0][0].cast_text().unwrap(), "cold_grass_813");
conn.execute("BEGIN CONCURRENT").unwrap();
let result =
conn.execute("UPDATE t SET unique_col = 'new_value', other_unique = 2.13 WHERE pk = 1.37");
assert!(result.is_err(), "Expected unique constraint violation");
conn.execute("COMMIT").unwrap();
conn.execute("UPDATE t SET unique_col = 'fresh_sun_348', other_unique = 5.0 WHERE pk = 1.37")
.unwrap();
let rows = get_rows(
&conn,
"SELECT unique_col, other_unique FROM t WHERE pk = 1.37",
);
assert_eq!(rows[0][0].cast_text().unwrap(), "fresh_sun_348");
let rows = get_rows(&conn, "SELECT * FROM t WHERE unique_col = 'fresh_sun_348'");
assert_eq!(rows.len(), 1);
let rows = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
}
#[test]
fn test_savepoint_multiple_statements_last_fails() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY)")
.unwrap();
conn.execute("BEGIN CONCURRENT").unwrap();
conn.execute("INSERT INTO t VALUES (1)").unwrap();
conn.execute("INSERT INTO t VALUES (2)").unwrap();
let result = conn.execute("INSERT INTO t VALUES (1)");
assert!(result.is_err(), "Expected primary key violation");
conn.execute("COMMIT").unwrap();
let rows = get_rows(&conn, "SELECT * FROM t ORDER BY id");
assert_eq!(rows.len(), 2);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(rows[1][0].as_int().unwrap(), 2);
let rows = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
}
#[test]
fn test_savepoint_same_row_multiple_statements() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v INTEGER, other_unique INTEGER UNIQUE)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 100, 1)").unwrap();
conn.execute("INSERT INTO t VALUES (2, 200, 2)").unwrap();
conn.execute("BEGIN CONCURRENT").unwrap();
conn.execute("UPDATE t SET v = 150 WHERE id = 1").unwrap();
let result = conn.execute("UPDATE t SET v = 175, other_unique = 2 WHERE id = 1");
assert!(result.is_err(), "Expected unique constraint violation");
conn.execute("COMMIT").unwrap();
let rows = get_rows(&conn, "SELECT v, other_unique FROM t WHERE id = 1");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 150);
assert_eq!(rows[0][1].as_int().unwrap(), 1);
let rows = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
}
#[test]
fn test_savepoint_index_multiple_statements() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute(
"CREATE TABLE t(
id INTEGER PRIMARY KEY,
name TEXT UNIQUE,
value INTEGER UNIQUE
)",
)
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'a', 10)").unwrap();
conn.execute("INSERT INTO t VALUES (2, 'b', 20)").unwrap();
conn.execute("BEGIN CONCURRENT").unwrap();
conn.execute("UPDATE t SET name = 'c' WHERE id = 1")
.unwrap();
let result = conn.execute("UPDATE t SET name = 'b' WHERE id = 1");
assert!(
result.is_err(),
"Expected unique constraint violation on name"
);
conn.execute("COMMIT").unwrap();
let rows = get_rows(&conn, "SELECT name FROM t WHERE id = 1");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].cast_text().unwrap(), "c");
let rows = get_rows(&conn, "SELECT id FROM t WHERE name = 'c'");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
let rows = get_rows(&conn, "SELECT id FROM t WHERE name = 'a'");
assert_eq!(rows.len(), 0);
let rows = get_rows(&conn, "SELECT id FROM t WHERE name = 'b'");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 2);
let rows = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
}
#[test]
fn test_savepoint_insert_delete_then_fail() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v INTEGER UNIQUE)")
.unwrap();
conn.execute("INSERT INTO t VALUES (2, 200)").unwrap();
conn.execute("BEGIN CONCURRENT").unwrap();
conn.execute("INSERT INTO t VALUES (1, 100)").unwrap();
conn.execute("DELETE FROM t WHERE id = 1").unwrap();
let result = conn.execute("INSERT INTO t VALUES (3, 200)");
assert!(result.is_err(), "Expected unique constraint violation");
conn.execute("COMMIT").unwrap();
let rows = get_rows(&conn, "SELECT * FROM t WHERE id = 1");
assert_eq!(rows.len(), 0);
let rows = get_rows(&conn, "SELECT * FROM t WHERE id = 2");
assert_eq!(rows.len(), 1);
let rows = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
}
#[test]
fn test_delete_row_is_hidden_from_desc_unique_index_scan() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, val INTEGER UNIQUE)")
.unwrap();
conn.execute("INSERT INTO t VALUES (42, 46)").unwrap();
conn.execute("DELETE FROM t WHERE id = 42").unwrap();
let rows = get_rows(&conn, "SELECT id, val FROM t ORDER BY val DESC");
assert_eq!(rows, Vec::<Vec<Value>>::new());
let rows = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
}
#[test]
fn test_delete_row_is_skipped_by_desc_explicit_index_scan() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, val INTEGER)")
.unwrap();
conn.execute("CREATE INDEX idx_t_val ON t(val)").unwrap();
conn.execute("INSERT INTO t VALUES (1, 10)").unwrap();
conn.execute("INSERT INTO t VALUES (2, 20)").unwrap();
conn.execute("DELETE FROM t WHERE id = 2").unwrap();
let rows = get_rows(&conn, "SELECT id, val FROM t ORDER BY val DESC");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(rows[0][1].as_int().unwrap(), 10);
let rows = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
}
#[test]
fn test_delete_btree_resident_row_is_skipped_by_desc_unique_index_scan() {
let mut db = MvccTestDbNoConn::new_with_random_db();
{
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, val INTEGER UNIQUE)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 10)").unwrap();
conn.execute("INSERT INTO t VALUES (2, 20)").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
}
db.restart();
let conn = db.connect();
conn.execute("DELETE FROM t WHERE id = 2").unwrap();
let rows = get_rows(&conn, "SELECT id, val FROM t ORDER BY val DESC");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(rows[0][1].as_int().unwrap(), 10);
let rows = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
}
#[test]
fn test_desc_index_scan_respects_mvcc_snapshot_for_concurrent_insert() {
let db = MvccTestDbNoConn::new_with_random_db();
let setup = db.connect();
setup.execute("CREATE TABLE t(id INT, val INT)").unwrap();
setup.execute("CREATE INDEX idx_val ON t(val)").unwrap();
setup.execute("INSERT INTO t VALUES (1, 10)").unwrap();
setup.execute("INSERT INTO t VALUES (2, 20)").unwrap();
setup.execute("INSERT INTO t VALUES (3, 30)").unwrap();
let reader = db.connect();
reader.execute("BEGIN CONCURRENT").unwrap();
let rows = get_rows(
&reader,
"SELECT id, val FROM t WHERE val > 10 ORDER BY val DESC",
);
assert_eq!(rows.len(), 2);
let writer = db.connect();
writer.execute("BEGIN CONCURRENT").unwrap();
writer.execute("INSERT INTO t VALUES (4, 100)").unwrap();
writer.execute("COMMIT").unwrap();
let rows = get_rows(
&reader,
"SELECT id, val FROM t WHERE val > 10 ORDER BY val DESC",
);
assert_eq!(
rows.len(),
2,
"DESC scan must still see 2 rows from snapshot"
);
assert_eq!(rows[0][1].as_int().unwrap(), 30);
assert_eq!(rows[1][1].as_int().unwrap(), 20);
let rows = get_rows(&reader, "SELECT MAX(val) FROM t");
assert_eq!(
rows[0][0].as_int().unwrap(),
30,
"MAX must still be 30 from snapshot"
);
}
#[test]
fn test_mvcc_dual_cursor_delete_all_btree_reinsert() {
let _ = tracing_subscriber::fmt::try_init();
let mut db = MvccTestDbNoConn::new_with_random_db();
{
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, val TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'old1')").unwrap();
conn.execute("INSERT INTO t VALUES (2, 'old2')").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
}
db.restart();
let conn = db.connect();
conn.execute("DELETE FROM t WHERE id IN (1, 2)").unwrap();
conn.execute("INSERT INTO t VALUES (1, 'new1')").unwrap();
conn.execute("INSERT INTO t VALUES (2, 'new2')").unwrap();
let rows = get_rows(&conn, "SELECT id, val FROM t ORDER BY id");
assert_eq!(rows.len(), 2);
assert_eq!(rows[0][1].to_string(), "new1");
assert_eq!(rows[1][1].to_string(), "new2");
}
#[test]
fn test_checkpoint_root_page_mismatch_with_index() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("PRAGMA journal_mode = 'mvcc'").unwrap();
for table_num in 1..=30 {
conn.execute(format!(
"CREATE TABLE tbl{table_num} (id INTEGER PRIMARY KEY, data TEXT)",
))
.unwrap();
conn.execute(format!(
"CREATE INDEX idx{table_num} ON tbl{table_num}(data)",
))
.unwrap();
for i in 0..10 {
let data = format!("data_{table_num}_{i}");
conn.execute(format!("INSERT INTO tbl{table_num} VALUES ({i}, '{data}')",))
.unwrap();
}
}
println!("Created 30 tables with indexes and data");
conn.execute("CREATE TABLE test_table (key TEXT PRIMARY KEY, value TEXT)")
.unwrap();
let rows = get_rows(
&conn,
"SELECT name, rootpage FROM sqlite_schema WHERE tbl_name = 'test_table' ORDER BY name",
);
let table_root: i64 = rows
.iter()
.find(|r| r[0].to_string() == "test_table")
.unwrap()[1]
.to_string()
.parse()
.unwrap();
let index_root: i64 = rows
.iter()
.find(|r| r[0].to_string().contains("autoindex"))
.unwrap()[1]
.to_string()
.parse()
.unwrap();
assert!(
table_root < 0,
"test_table should have negative root before checkpoint"
);
assert!(
index_root < 0,
"test_table index should have negative root before checkpoint"
);
conn.execute("INSERT INTO test_table (key, value) VALUES ('test_key', 'test_value')")
.unwrap();
let rows = get_rows(&conn, "SELECT value FROM test_table WHERE key = 'test_key'");
assert_eq!(rows.len(), 1, "Row should exist before checkpoint");
assert_eq!(rows[0][0].to_string(), "test_value");
println!("Inserted row into test_table, verified it exists");
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
println!("Checkpoint complete");
let rows = get_rows(&conn, "SELECT value FROM test_table WHERE key = 'test_key'");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].to_string(), "test_value", "Value should match");
println!("Test passed - row found correctly after checkpoint");
}
#[test]
fn test_checkpoint_drop_table() {
let mut db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE t (id INTEGER PRIMARY KEY, data TEXT)")
.unwrap();
conn.execute("CREATE INDEX idx_t_data ON t(data)").unwrap();
for i in 0..10 {
let data = format!("data_{i}");
conn.execute(format!("INSERT INTO t VALUES ({i}, '{data}')",))
.unwrap();
}
conn.execute("DROP TABLE t").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
drop(conn);
db.restart();
let conn = db.connect();
let rows = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
}
#[test]
fn test_checkpoint_drop_table_then_create_index_page_reuse() {
let mut db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE a(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("CREATE TABLE b(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO a VALUES(1,'x')").unwrap();
conn.execute("INSERT INTO b VALUES(1,'y')").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
conn.execute("DROP TABLE a").unwrap();
conn.execute("CREATE INDEX new_b_v ON b(v)").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
drop(conn);
db.restart();
let conn = db.connect();
let rows = get_rows(&conn, "SELECT * FROM b");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].to_string(), "1");
assert_eq!(rows[0][1].to_string(), "y");
let rows = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
}
#[test]
fn test_mvcc_same_primary_key() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE t (id INTEGER PRIMARY KEY)")
.unwrap();
let conn2 = db.connect();
conn.execute("BEGIN CONCURRENT").unwrap();
conn.execute("INSERT INTO t VALUES (666)").unwrap();
conn.execute("COMMIT").unwrap();
conn2.execute("BEGIN CONCURRENT").unwrap();
conn2
.execute("INSERT INTO t VALUES (666)")
.expect_err("duplicate key - visible committed row");
}
#[test]
fn test_mvcc_same_primary_key_concurrent() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE t (id INTEGER PRIMARY KEY)")
.unwrap();
let conn2 = db.connect();
conn.execute("BEGIN CONCURRENT").unwrap();
conn.execute("INSERT INTO t VALUES (666)").unwrap();
conn2.execute("BEGIN CONCURRENT").unwrap();
conn2.execute("INSERT INTO t VALUES (666)").unwrap();
conn.execute("COMMIT").unwrap();
conn2
.execute("COMMIT")
.expect_err("duplicate key - first committer wins");
}
#[test]
fn test_gc_e2e_checkpointed_row_readable_after_gc() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, val TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'hello')").unwrap();
conn.execute("INSERT INTO t VALUES (2, 'world')").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
let rows = get_rows(&conn, "SELECT id, val FROM t ORDER BY id");
assert_eq!(rows.len(), 2);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(rows[0][1].to_string(), "hello");
assert_eq!(rows[1][0].as_int().unwrap(), 2);
assert_eq!(rows[1][1].to_string(), "world");
let rows = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
}
#[turso_macros::test(encryption)]
fn test_gc_e2e_deleted_row_stays_hidden_after_gc() {
let mut db = MvccTestDbNoConn::new_maybe_encrypted(encrypted);
{
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, val TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'keep')").unwrap();
conn.execute("INSERT INTO t VALUES (2, 'delete_me')")
.unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
}
db.restart();
let conn = db.connect();
conn.execute("DELETE FROM t WHERE id = 2").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
let rows = get_rows(&conn, "SELECT id, val FROM t ORDER BY id");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(rows[0][1].to_string(), "keep");
let rows = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
}
#[turso_macros::test(encryption)]
fn test_gc_e2e_updated_row_correct_after_gc() {
let mut db = MvccTestDbNoConn::new_maybe_encrypted(encrypted);
{
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, val TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'original')")
.unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
}
db.restart();
let conn = db.connect();
conn.execute("UPDATE t SET val = 'updated' WHERE id = 1")
.unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
let rows = get_rows(&conn, "SELECT val FROM t WHERE id = 1");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].to_string(), "updated");
let rows = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
}
#[test]
fn test_gc_e2e_multiple_checkpoint_gc_cycles() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, val INTEGER)")
.unwrap();
for i in 1..=5 {
conn.execute(format!("INSERT INTO t VALUES ({i}, {i})"))
.unwrap();
}
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
conn.execute("DELETE FROM t WHERE id IN (2, 4)").unwrap();
conn.execute("UPDATE t SET val = 30 WHERE id = 3").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
conn.execute("INSERT INTO t VALUES (6, 6)").unwrap();
conn.execute("DELETE FROM t WHERE id = 1").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
let rows = get_rows(&conn, "SELECT id, val FROM t ORDER BY id");
assert_eq!(rows.len(), 3);
assert_eq!(rows[0][0].as_int().unwrap(), 3);
assert_eq!(rows[0][1].as_int().unwrap(), 30);
assert_eq!(rows[1][0].as_int().unwrap(), 5);
assert_eq!(rows[1][1].as_int().unwrap(), 5);
assert_eq!(rows[2][0].as_int().unwrap(), 6);
assert_eq!(rows[2][1].as_int().unwrap(), 6);
let rows = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
}
#[test]
fn test_mvcc_unique_constraint() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE t (id UNIQUE)").unwrap();
let conn2 = db.connect();
conn.execute("BEGIN CONCURRENT").unwrap();
conn.execute("INSERT INTO t VALUES (666)").unwrap();
conn2.execute("BEGIN CONCURRENT").unwrap();
conn2.execute("INSERT INTO t VALUES (666)").unwrap();
conn.execute("COMMIT").unwrap();
conn2
.execute("COMMIT")
.expect_err("duplicate unique - first committer wins");
}
#[test]
fn test_concurrent_commit_yield_spin() {
let db = MvccTestDbNoConn::new();
let conn = db.connect();
conn.execute("CREATE TABLE t (id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("BEGIN CONCURRENT").unwrap();
conn.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
let mv_store = db.get_mvcc_store();
let lock = &mv_store.commit_coordinator.pager_commit_lock;
assert!(lock.write(), "should acquire lock");
let mut stmt = conn.prepare("COMMIT").unwrap();
let mut returned_io = false;
for _ in 0..100 {
match stmt.step().unwrap() {
crate::StepResult::IO => {
returned_io = true;
break;
}
crate::StepResult::Done => break,
_ => {}
}
}
assert!(
returned_io,
"step() should return IO when pager_commit_lock is contended"
);
lock.unlock();
loop {
match stmt.step().unwrap() {
crate::StepResult::Done => break,
crate::StepResult::IO => {}
_ => {}
}
}
let rows = get_rows(&conn, "SELECT COUNT(*) FROM t");
assert_eq!(rows[0][0].as_int().unwrap(), 1);
}
fn abandon_commit_after_first_io(conn: &Arc<Connection>, mv_store: &Arc<MvStore<MvccClock>>) {
let lock = &mv_store.commit_coordinator.pager_commit_lock;
assert!(lock.write(), "should acquire commit lock");
let mut stmt = conn.prepare("COMMIT").unwrap();
assert!(
matches!(stmt.step().unwrap(), crate::StepResult::IO),
"COMMIT should yield while the commit lock is held",
);
drop(stmt);
lock.unlock();
conn.close().unwrap();
}
#[test]
fn test_abandoned_commit_rolls_back_insert_with_injected_yield() {
let db = MvccTestDbNoConn::new_with_random_db_with_opts(DatabaseOpts::new());
let conn = db.connect();
conn.execute("CREATE TABLE t (id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("BEGIN CONCURRENT").unwrap();
conn.execute("INSERT INTO t VALUES (1, 'new')").unwrap();
conn.set_yield_injector(Some(FixedYieldInjector::new([
CommitYieldPoint::LogRecordPrepared.point(),
])));
let mut stmt = conn.prepare("COMMIT").unwrap();
assert!(
matches!(stmt.step().unwrap(), crate::StepResult::IO),
"MVCC commit should yield before completion",
);
drop(stmt);
conn.close().unwrap();
let observer = db.connect();
let rows = get_rows(&observer, "SELECT id FROM t WHERE id = 1");
assert!(
rows.is_empty(),
"row from abandoned INSERT commit remained visible: {rows:?}",
);
observer.close().unwrap();
}
#[test]
fn test_build_log_record_yields_for_large_write_set() {
use super::MVCC_COMMIT_BATCH_SIZE;
#[derive(Debug)]
struct BracketingYieldInjector {
start: YieldPoint,
end: YieldPoint,
started: Arc<AtomicBool>,
finished: Arc<AtomicBool>,
}
impl YieldInjector for BracketingYieldInjector {
fn should_yield(&self, _instance_id: u64, _selection_key: u64, point: YieldPoint) -> bool {
if point == self.start && !self.started.load(Ordering::SeqCst) {
self.started.store(true, Ordering::SeqCst);
return true;
}
if point == self.end && !self.finished.load(Ordering::SeqCst) {
self.finished.store(true, Ordering::SeqCst);
return true;
}
false
}
}
let db = MvccTestDbNoConn::new_with_random_db_with_opts(DatabaseOpts::new());
let conn = db.connect();
conn.execute("CREATE TABLE t (id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
let n_rows = 3 * MVCC_COMMIT_BATCH_SIZE;
conn.execute("BEGIN CONCURRENT").unwrap();
for i in 1..=n_rows {
conn.execute(format!("INSERT INTO t VALUES ({i}, 'val')"))
.unwrap();
}
let started = Arc::new(AtomicBool::new(false));
let finished = Arc::new(AtomicBool::new(false));
conn.set_yield_injector(Some(Arc::new(BracketingYieldInjector {
start: CommitYieldPoint::BuildLogRecordStart.point(),
end: CommitYieldPoint::LogRecordPrepared.point(),
started: started.clone(),
finished: finished.clone(),
})));
let mut stmt = conn.prepare("COMMIT").unwrap();
let mut chunked_io_yields = 0;
let mut saw_start = false;
loop {
match stmt.step().unwrap() {
crate::StepResult::IO => {
if !saw_start {
if started.load(Ordering::SeqCst) {
saw_start = true;
}
continue;
}
if finished.load(Ordering::SeqCst) {
break;
}
chunked_io_yields += 1;
}
crate::StepResult::Done => break,
other => panic!("unexpected step result: {other:?}"),
}
}
assert!(
saw_start,
"BuildLogRecordStart yield never fired — BuildLogRecord state never reached"
);
assert!(
finished.load(Ordering::SeqCst),
"LogRecordPrepared yield never fired — BuildLogRecord did not complete"
);
assert_eq!(
chunked_io_yields, 4,
"with {n_rows} rows, expected exactly 4 chunked IO yields between \
BuildLogRecordStart and LogRecordPrepared, got {chunked_io_yields}"
);
drop(stmt);
conn.close().unwrap();
}
#[test]
fn test_commit_failure_after_remove_tx_does_not_strand_conn_cache() {
let db = MvccTestDbNoConn::new();
let conn = db.connect();
conn.execute("CREATE TABLE t (id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("BEGIN CONCURRENT").unwrap();
conn.execute("INSERT INTO t VALUES (1, 'new')").unwrap();
let tx_id = conn.get_mv_tx_id().expect("tx should be open after BEGIN");
let mv_store = db.get_mvcc_store();
assert!(
mv_store.txs.get(&tx_id).is_some(),
"precondition: tx must be live in txs before COMMIT"
);
conn.set_failure_injector(Some(FixedFailureInjector::new([(
CommitYieldPoint::AfterRemoveTx.point(),
LimboError::TxError("synthetic post-remove_tx failure".to_string()),
)])));
let commit_err = conn
.execute("COMMIT")
.expect_err("commit must fail at the injected boundary");
tracing::info!("injected commit failure: {commit_err}");
assert!(
mv_store.txs.get(&tx_id).is_none(),
"fix: tx must be gone from txs (finish_committed_tx ran before the \
injection point)"
);
assert_eq!(
conn.get_mv_tx_id(),
None,
"fix: connection mv_tx cache must be cleared in lock-step with the \
txs removal — pre-fix this stranded the cache"
);
conn.close().unwrap();
}
#[test]
fn test_abandoned_commit_rolls_back_insert() {
let db = MvccTestDbNoConn::new();
let conn = db.connect();
conn.execute("CREATE TABLE t (id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("BEGIN CONCURRENT").unwrap();
conn.execute("INSERT INTO t VALUES (1, 'new')").unwrap();
let mv_store = db.get_mvcc_store();
abandon_commit_after_first_io(&conn, &mv_store);
let observer = db.connect();
let rows = get_rows(&observer, "SELECT id FROM t WHERE id = 1");
assert!(
rows.is_empty(),
"row from abandoned INSERT commit remained visible: {rows:?}",
);
observer.close().unwrap();
}
#[test]
fn test_abandoned_commit_rolls_back_delete() {
let db = MvccTestDbNoConn::new();
let conn = db.connect();
conn.execute("CREATE TABLE t (id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'seed')").unwrap();
conn.execute("BEGIN CONCURRENT").unwrap();
conn.execute("DELETE FROM t WHERE id = 1").unwrap();
let mv_store = db.get_mvcc_store();
abandon_commit_after_first_io(&conn, &mv_store);
let observer = db.connect();
let rows = get_rows(&observer, "SELECT id, v FROM t WHERE id = 1");
assert_eq!(
rows,
vec![vec![
Value::Numeric(Numeric::Integer(1)),
Value::Text(Text::new("seed".to_string())),
]],
"row disappeared after abandoned DELETE commit: {rows:?}",
);
observer.close().unwrap();
}
#[test]
fn test_alter_table_rename_with_index_panics_on_restart() {
let mut db = MvccTestDbNoConn::new_with_random_db();
{
let conn = db.connect();
conn.execute("PRAGMA mvcc_checkpoint_threshold = 1")
.unwrap();
conn.execute("CREATE TABLE old_name(id INTEGER PRIMARY KEY, val TEXT)")
.unwrap();
conn.execute("CREATE INDEX idx_val ON old_name(val)")
.unwrap();
conn.execute("INSERT INTO old_name VALUES (1, 'a')")
.unwrap();
conn.close().unwrap();
}
db.restart();
{
let conn = db.connect();
conn.execute("PRAGMA journal_mode = 'mvcc'").unwrap();
conn.execute("ALTER TABLE old_name RENAME TO new_name")
.unwrap();
conn.close().unwrap();
}
db.restart();
{
let conn = db.connect();
conn.execute("PRAGMA journal_mode = 'mvcc'").unwrap();
let rows = get_rows(&conn, "SELECT * FROM new_name");
assert_eq!(rows.len(), 1);
}
}
#[test]
fn test_alter_table_rename_with_unique_constraint_panics_on_restart() {
let _ = tracing_subscriber::fmt::try_init();
let mut db = MvccTestDbNoConn::new_with_random_db();
{
let conn = db.connect();
conn.execute("PRAGMA mvcc_checkpoint_threshold = 1")
.unwrap();
conn.execute("CREATE TABLE old_name(id INTEGER PRIMARY KEY, val TEXT UNIQUE)")
.unwrap();
conn.execute("INSERT INTO old_name VALUES (1, 'a')")
.unwrap();
conn.close().unwrap();
}
db.restart();
{
let conn = db.connect();
conn.execute("PRAGMA journal_mode = 'mvcc'").unwrap();
conn.execute("ALTER TABLE old_name RENAME TO new_name")
.unwrap();
conn.close().unwrap();
}
db.restart();
{
let conn = db.connect();
conn.execute("PRAGMA journal_mode = 'mvcc'").unwrap();
let rows = get_rows(&conn, "SELECT * FROM new_name");
assert_eq!(rows.len(), 1);
}
}
#[test]
fn test_close_persists_drop_table() {
let mut db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE todrop(id INTEGER PRIMARY KEY, val TEXT)")
.unwrap();
conn.execute("INSERT INTO todrop VALUES (1, 'data')")
.unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
conn.close().unwrap();
let conn = db.connect();
conn.execute("DROP TABLE todrop").unwrap();
conn.close().unwrap();
db.restart();
let conn = db.connect();
let create_result = conn.execute("CREATE TABLE todrop(id INTEGER PRIMARY KEY, newval TEXT)");
assert!(
create_result.is_ok(),
"CREATE TABLE should succeed after DROP, but got: {:?}",
create_result.unwrap_err()
);
let rows = get_rows(&conn, "SELECT * FROM todrop");
assert!(rows.is_empty(), "New table should be empty, got {rows:?}");
let rows = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
}
#[test]
fn test_abandoned_drop() {
let _ = tracing_subscriber::fmt::try_init();
let io = Arc::new(MemoryIO::new());
let path = ":memory:";
let db = Database::open_file(io.clone(), path).unwrap();
let conn = db.connect().unwrap();
conn.execute("PRAGMA journal_mode = 'mvcc'").unwrap();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, row_number INTEGER, ts INTEGER)")
.unwrap();
conn.execute(
"CREATE UNIQUE INDEX IF NOT EXISTS t_index \
ON t (row_number) WHERE ts IS NULL",
)
.unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
assert!(conn.get_auto_commit());
conn.set_yield_injector(Some(FixedYieldInjector::new([
CursorYieldPoint::NextStart.point()
])));
conn.execute("BEGIN").unwrap();
let mut drop_stmt = conn.prepare("DROP TABLE t").unwrap();
match drop_stmt.step().unwrap() {
crate::StepResult::IO => {}
other => panic!("expected injected IO yield mid-DROP TABLE; got {other:?}"),
}
conn.set_yield_injector(None);
drop_stmt.reset().unwrap();
drop(drop_stmt);
conn.execute("COMMIT").unwrap();
drop(conn);
drop(db);
let db = Database::open_file(io, path).expect(
"reopen should not fail; abandoned DROP must not have committed its partial Delete",
);
let conn = db.connect().unwrap();
let after = get_rows(
&conn,
"SELECT type, name FROM sqlite_schema \
WHERE tbl_name = 't' ORDER BY rowid",
);
assert!(
after.len() == 2,
"schema must not be half-dropped; got rows: {after:?}",
);
}
#[test]
fn test_close_persists_drop_index() {
let mut db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE tdropidx(id INTEGER PRIMARY KEY, val TEXT)")
.unwrap();
conn.execute("CREATE INDEX idx_tdropidx_val ON tdropidx(val)")
.unwrap();
conn.execute("INSERT INTO tdropidx VALUES (1, 'data')")
.unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
conn.close().unwrap();
let conn = db.connect();
conn.execute("DROP INDEX idx_tdropidx_val").unwrap();
conn.close().unwrap();
db.restart();
let conn = db.connect();
let recreate_index = conn.execute("CREATE INDEX idx_tdropidx_val ON tdropidx(val)");
assert!(
recreate_index.is_ok(),
"CREATE INDEX should succeed after DROP INDEX, but got: {:?}",
recreate_index.unwrap_err()
);
let rows = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
}
#[test]
fn test_partial_commit_visibility_bug() {
use crate::sync::atomic::{AtomicBool, AtomicU64, Ordering};
use crate::sync::Arc;
use std::collections::HashMap;
use std::thread;
use std::time::Duration;
for _ in 0..10 {
let db = Arc::new(MvccTestDbNoConn::new_with_random_db());
{
let conn = db.connect();
conn.execute("CREATE TABLE consistency_test (batch_id INTEGER, row_num INTEGER)")
.unwrap();
}
const ROWS_PER_BATCH: i64 = 50; const NUM_BATCHES: u64 = 100;
const NUM_READER_THREADS: usize = 4;
let writer_done = Arc::new(AtomicBool::new(false));
let violation_detected = Arc::new(AtomicBool::new(false));
let current_batch = Arc::new(AtomicU64::new(0));
let writer_handle = {
let db = db.clone();
let writer_done = writer_done.clone();
let current_batch = current_batch.clone();
thread::spawn(move || {
let conn = db.connect();
for batch_id in 0..NUM_BATCHES {
conn.execute("BEGIN CONCURRENT").unwrap();
for row_num in 0..ROWS_PER_BATCH {
conn.execute(format!(
"INSERT INTO consistency_test VALUES ({batch_id}, {row_num})",
))
.unwrap();
}
current_batch.store(batch_id, Ordering::Release);
conn.execute("COMMIT").unwrap();
thread::sleep(Duration::from_micros(100));
}
writer_done.store(true, Ordering::Release);
})
};
let mut reader_handles = Vec::new();
for reader_id in 0..NUM_READER_THREADS {
let db = db.clone();
let writer_done = writer_done.clone();
let violation_detected = violation_detected.clone();
let current_batch = current_batch.clone();
let handle = thread::spawn(move || {
let conn = db.connect();
let mut iteration = 0u64;
loop {
iteration += 1;
conn.execute("BEGIN CONCURRENT").unwrap();
let rows = get_rows(
&conn,
"SELECT batch_id, row_num FROM consistency_test ORDER BY batch_id, row_num",
);
let mut batches: HashMap<i64, Vec<i64>> = HashMap::new();
for row in rows {
let batch_id = row[0].as_int().unwrap();
let row_num = row[1].as_int().unwrap();
batches.entry(batch_id).or_default().push(row_num);
}
for (batch_id, row_nums) in &batches {
let count = row_nums.len() as i64;
if count != 0 && count != ROWS_PER_BATCH {
eprintln!(
"[Reader {reader_id}] VIOLATION DETECTED at iteration {iteration}!",
);
eprintln!(
" Batch {batch_id} has {count} rows (expected {ROWS_PER_BATCH} or 0)",
);
eprintln!(" Visible row_nums: {row_nums:?}");
eprintln!();
eprintln!(" EXPLANATION:");
eprintln!(
" - This reader started a snapshot during batch {batch_id}'s commit",
);
eprintln!(
" - The commit loop (mod.rs:912-984) was updating timestamps"
);
eprintln!(" - Transaction state was still Preparing(ts)");
eprintln!(" - Rows with updated Timestamps became visible");
eprintln!(" - Rows with TxID timestamps remained invisible");
eprintln!(" - Result: Partial batch visibility (atomicity violation)");
eprintln!();
eprintln!(" RACE TIMELINE:");
eprintln!(" 1. Writer: state = Preparing(end_ts)");
eprintln!(" 2. Writer: Update row 0's timestamp");
eprintln!(" 3. Writer: Update row 1's timestamp");
eprintln!(" ...");
eprintln!(" N. Reader: BEGIN (snapshot)");
eprintln!(
" N+1. Reader: Read rows 0-{} (visible via Timestamp)",
count - 1
);
eprintln!(
" N+2. Reader: Read rows {}-{} (invisible, still TxID)",
count,
ROWS_PER_BATCH - 1
);
eprintln!(" N+3. Writer: Continue updating remaining timestamps...");
violation_detected.store(true, Ordering::Release);
}
}
conn.execute("COMMIT").unwrap();
if writer_done.load(Ordering::Acquire) {
let final_batch = current_batch.load(Ordering::Acquire);
if iteration > final_batch + 10 {
break;
}
}
thread::sleep(Duration::from_micros(50));
}
eprintln!("[Reader {reader_id}] Completed {iteration} iterations");
});
reader_handles.push(handle);
}
writer_handle.join().unwrap();
for handle in reader_handles {
handle.join().unwrap();
}
assert!(
!violation_detected.load(Ordering::Acquire),
"Partial commit visibility detected! Transaction atomicity violated.\n\
\n\
ROOT CAUSE: Commit loop (mod.rs:912-984) updates row timestamps non-atomically\n\
while transaction state remains Preparing. Concurrent readers see inconsistent\n\
snapshots with partial transaction visibility.\n\
\n\
FIX REQUIRED: Make timestamp updates atomic, or change visibility logic to\n\
always dereference transaction state instead of reading row timestamps directly."
);
}
}
#[test]
fn test_double_delete_btree_resident_row_with_unique_index() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, val INTEGER, uniq TEXT UNIQUE)")
.unwrap();
conn.execute("INSERT INTO t VALUES(1, 10, 'a')").unwrap();
conn.execute("INSERT INTO t VALUES(2, 20, 'b')").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
drop(conn);
let conn1 = db.connect();
let conn2 = db.connect();
conn1.execute("BEGIN CONCURRENT").unwrap();
conn2.execute("BEGIN CONCURRENT").unwrap();
conn1.execute("DELETE FROM t WHERE id = 1").unwrap();
conn2.execute("DELETE FROM t WHERE id = 1").unwrap();
conn1.execute("COMMIT").unwrap();
assert!(
conn2.execute("COMMIT").is_err(),
"T2's COMMIT should fail with WriteWriteConflict when T1 already \
committed a tombstone for the same row"
);
drop(conn1);
drop(conn2);
let conn = db.connect();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
let rows = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(
&rows[0][0].to_string(),
"ok",
"Index corruption after concurrent double-delete of B-tree-resident row"
);
}
#[test]
fn test_autoincrement_blocked_in_mvcc() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
let result = conn.execute("CREATE TABLE t(a INTEGER PRIMARY KEY AUTOINCREMENT, b TEXT)");
assert!(
result.is_err(),
"CREATE TABLE with AUTOINCREMENT should fail in MVCC mode"
);
let err = result.unwrap_err().to_string();
assert!(
err.contains("AUTOINCREMENT is not supported in MVCC mode"),
"unexpected error: {err}"
);
conn.execute("CREATE TABLE t(a INTEGER PRIMARY KEY, b TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'hello')").unwrap();
let rows = get_rows(&conn, "SELECT * FROM t");
assert_eq!(rows.len(), 1);
}
#[test]
fn test_autoincrement_insert_blocked_for_preexisting_table() {
let temp_dir = tempfile::TempDir::new().unwrap();
let path = temp_dir
.path()
.join(format!("test_{}", rand::random::<u64>()));
let path_str = path.to_str().unwrap();
let io = Arc::new(PlatformIO::new().unwrap());
{
let db = crate::Database::open_file_with_flags(
io.clone(),
path_str,
OpenFlags::default(),
DatabaseOpts::new(),
None,
)
.unwrap();
let conn = db.connect().unwrap();
conn.execute("CREATE TABLE t(a INTEGER PRIMARY KEY AUTOINCREMENT, b TEXT)")
.unwrap();
conn.execute("INSERT INTO t(b) VALUES ('before_mvcc')")
.unwrap();
conn.close().unwrap();
}
{
let mut manager = crate::DATABASE_MANAGER.lock();
manager.clear();
}
{
let db = crate::Database::open_file_with_flags(
io,
path_str,
OpenFlags::default(),
DatabaseOpts::new(),
None,
)
.unwrap();
let conn = db.connect().unwrap();
conn.execute("PRAGMA journal_mode = 'experimental_mvcc'")
.unwrap();
let result = conn.execute("INSERT INTO t(b) VALUES ('in_mvcc')");
assert!(
result.is_err(),
"INSERT into AUTOINCREMENT table should fail in MVCC mode"
);
let err = result.unwrap_err().to_string();
assert!(
err.contains("AUTOINCREMENT is not supported in MVCC mode"),
"unexpected error: {err}"
);
}
}
#[test]
#[ignore = "AUTOINCREMENT not yet supported in MVCC mode"]
fn test_concurrent_autoincrement_inserts() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn1 = db.connect();
conn1
.execute("CREATE TABLE t(a INTEGER PRIMARY KEY AUTOINCREMENT, b TEXT)")
.unwrap();
conn1.execute("BEGIN CONCURRENT").unwrap();
conn1
.execute("INSERT INTO t(b) VALUES ('from_tx1')")
.unwrap();
let conn2 = db.connect();
conn2.execute("BEGIN CONCURRENT").unwrap();
conn2
.execute("INSERT INTO t(b) VALUES ('from_tx2')")
.unwrap();
conn1.execute("COMMIT").unwrap();
conn2.execute("COMMIT").unwrap();
let rows = get_rows(&conn1, "SELECT a, b FROM t ORDER BY a");
assert_eq!(rows.len(), 2, "both inserts should be visible");
let rowid1 = rows[0][0].as_int().unwrap();
let rowid2 = rows[1][0].as_int().unwrap();
assert!(rowid1 < rowid2, "rowids must be strictly increasing");
assert_eq!(rows[0][1].to_string(), "from_tx1");
assert_eq!(rows[1][1].to_string(), "from_tx2");
}
#[test]
#[ignore = "AUTOINCREMENT not yet supported in MVCC mode"]
fn test_autoincrement_sqlite_sequence_after_checkpoint() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn1 = db.connect();
conn1
.execute("CREATE TABLE t(a INTEGER PRIMARY KEY AUTOINCREMENT, b TEXT)")
.unwrap();
conn1.execute("BEGIN CONCURRENT").unwrap();
conn1.execute("INSERT INTO t(b) VALUES ('row1')").unwrap();
conn1.execute("COMMIT").unwrap();
let conn2 = db.connect();
conn2.execute("BEGIN CONCURRENT").unwrap();
conn2.execute("INSERT INTO t(b) VALUES ('row2')").unwrap();
conn2.execute("COMMIT").unwrap();
conn1.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
let rows = get_rows(&conn1, "SELECT seq FROM sqlite_sequence WHERE name = 't'");
assert_eq!(rows.len(), 1, "sqlite_sequence should have entry for 't'");
let seq = rows[0][0].as_int().unwrap();
assert_eq!(seq, 2, "sqlite_sequence should reflect the max rowid");
conn1.execute("INSERT INTO t(b) VALUES ('row3')").unwrap();
let rows = get_rows(&conn1, "SELECT MAX(a) FROM t");
assert_eq!(rows[0][0].as_int().unwrap(), 3);
}
#[test]
#[ignore = "AUTOINCREMENT not yet supported in MVCC mode"]
fn test_three_concurrent_autoincrement_inserts() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE t(a INTEGER PRIMARY KEY AUTOINCREMENT, b TEXT)")
.unwrap();
let conn1 = db.connect();
let conn2 = db.connect();
let conn3 = db.connect();
conn1.execute("BEGIN CONCURRENT").unwrap();
conn1.execute("INSERT INTO t(b) VALUES ('tx1')").unwrap();
conn2.execute("BEGIN CONCURRENT").unwrap();
conn2.execute("INSERT INTO t(b) VALUES ('tx2')").unwrap();
conn3.execute("BEGIN CONCURRENT").unwrap();
conn3.execute("INSERT INTO t(b) VALUES ('tx3')").unwrap();
conn1.execute("COMMIT").unwrap();
conn2.execute("COMMIT").unwrap();
conn3.execute("COMMIT").unwrap();
let rows = get_rows(&conn, "SELECT a FROM t ORDER BY a");
assert_eq!(rows.len(), 3, "all three inserts should be visible");
let ids: Vec<i64> = rows.iter().map(|r| r[0].as_int().unwrap()).collect();
assert!(
ids[0] < ids[1] && ids[1] < ids[2],
"rowids must be strictly increasing: {ids:?}"
);
}
#[test]
#[ignore = "AUTOINCREMENT not yet supported in MVCC mode"]
fn test_autoincrement_no_reuse_after_delete_and_restart() {
let _ = tracing_subscriber::fmt().try_init();
let mut db = MvccTestDbNoConn::new_with_random_db();
let conn1 = db.connect();
conn1
.execute("CREATE TABLE t(a INTEGER PRIMARY KEY AUTOINCREMENT, b TEXT)")
.unwrap();
conn1.execute("BEGIN CONCURRENT").unwrap();
conn1
.execute("INSERT INTO t(b) VALUES ('from_tx1')")
.unwrap();
let conn2 = db.connect();
conn2.execute("BEGIN CONCURRENT").unwrap();
conn2
.execute("INSERT INTO t(b) VALUES ('from_tx2')")
.unwrap();
conn1.execute("COMMIT").unwrap();
conn2.execute("COMMIT").unwrap();
let rows = get_rows(&conn1, "SELECT a FROM t ORDER BY a");
assert_eq!(rows.len(), 2);
let max_data_rowid = rows[1][0].as_int().unwrap();
assert_eq!(max_data_rowid, 2);
let seq_rows = get_rows(
&conn1,
"SELECT rowid, seq FROM sqlite_sequence WHERE name = 't' ORDER BY rowid",
);
let seq_count = seq_rows.len();
conn1.execute("DELETE FROM t").unwrap();
conn1.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
drop(conn1);
drop(conn2);
db.restart();
let conn = db.connect();
let rows = get_rows(&conn, "SELECT COUNT(*) FROM t");
assert_eq!(rows[0][0].as_int().unwrap(), 0);
conn.execute("INSERT INTO t(b) VALUES ('after_restart')")
.unwrap();
let rows = get_rows(&conn, "SELECT a FROM t");
let new_rowid = rows[0][0].as_int().unwrap();
if seq_count > 1 {
eprintln!(
"sqlite_sequence had {seq_count} rows for 't'. \
After restart, new rowid = {new_rowid} (previous max was {max_data_rowid})"
);
}
assert!(
new_rowid > max_data_rowid,
"AUTOINCREMENT rowid reuse! Previous max was {max_data_rowid}, \
but new rowid after delete+restart is {new_rowid}. \
sqlite_sequence had {seq_count} duplicate rows; \
init_autoincrement picked the stale one (seq=1 instead of seq=2)."
);
}
#[test]
fn test_speculative_delete_hides_committed_version_sql() {
let mut db = MvccTestDbNoConn::new_with_random_db();
{
let conn = db.connect();
conn.execute("CREATE TABLE t (key TEXT PRIMARY KEY, val TEXT)")
.unwrap();
conn.execute("PRAGMA mvcc_checkpoint_threshold = 1")
.unwrap();
conn.execute("INSERT INTO t VALUES ('k1', 'a')").unwrap();
conn.close().unwrap();
}
db.restart();
{
let conn = db.connect();
conn.execute("PRAGMA journal_mode = 'mvcc'").unwrap();
conn.close().unwrap();
}
let upsert = |val: &str| {
format!(
"INSERT INTO t VALUES ('k1', '{val}') \
ON CONFLICT(key) DO UPDATE SET val = excluded.val"
)
};
let conn2 = db.connect();
conn2.execute("BEGIN CONCURRENT").unwrap();
let conn1 = db.connect();
conn1.execute(upsert("b")).unwrap();
conn1.close().unwrap();
let conn3 = db.connect();
conn3.execute("BEGIN CONCURRENT").unwrap();
conn3.execute(upsert("d")).unwrap();
conn2.execute(upsert("c")).unwrap();
let result = conn2.execute("COMMIT");
assert!(
matches!(&result, Err(LimboError::WriteWriteConflict)),
"Expected WriteWriteConflict, got: {result:?}."
);
}
#[test]
fn test_elle_lost_update_exclusive_concurrent() {
let mut db = MvccTestDbNoConn::new_with_random_db();
{
let conn = db.connect();
conn.execute("CREATE TABLE elle_lists (key TEXT PRIMARY KEY, vals TEXT DEFAULT '')")
.unwrap();
conn.execute("PRAGMA mvcc_checkpoint_threshold = 1")
.unwrap();
conn.execute("INSERT INTO elle_lists (key, vals) VALUES ('k8', '100')")
.unwrap();
conn.close().unwrap();
}
db.restart();
{
let conn = db.connect();
conn.execute("PRAGMA journal_mode = 'mvcc'").unwrap();
conn.close().unwrap();
}
let conn2 = db.connect();
conn2.execute("BEGIN CONCURRENT").unwrap();
let conn1 = db.connect();
conn1
.execute(
"INSERT INTO elle_lists (key, vals) VALUES ('k8', '200') \
ON CONFLICT(key) DO UPDATE SET vals = CASE WHEN vals = '' THEN '200' ELSE vals || ',' || '200' END",
)
.unwrap();
conn1.close().unwrap();
let conn3 = db.connect();
conn3.execute("BEGIN CONCURRENT").unwrap();
conn3
.execute(
"INSERT INTO elle_lists (key, vals) VALUES ('k8', '400') \
ON CONFLICT(key) DO UPDATE SET vals = CASE WHEN vals = '' THEN '400' ELSE vals || ',' || '400' END",
)
.unwrap();
conn2
.execute(
"INSERT INTO elle_lists (key, vals) VALUES ('k8', '300') \
ON CONFLICT(key) DO UPDATE SET vals = CASE WHEN vals = '' THEN '300' ELSE vals || ',' || '300' END",
)
.unwrap();
let commit_result = conn2.execute("COMMIT");
assert!(
matches!(&commit_result, Err(LimboError::WriteWriteConflict)),
"Expected WriteWriteConflict, got: {commit_result:?}. \
T1's committed version was hidden by T3's speculative delete (end=TxID), \
causing check_version_conflicts to skip it."
);
}
#[test]
fn test_speculative_delete_hides_committed_version() {
let db = MvccTestDb::new();
let table_id: MVTableId = (-2).into();
let tx1 = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
let row_v1 = generate_simple_string_row(table_id, 1, "v1");
db.mvcc_store.insert(tx1, row_v1).unwrap();
commit_tx(db.mvcc_store.clone(), &db.conn, tx1).unwrap();
let conn2 = db.db.connect().unwrap();
let tx2 = db.mvcc_store.begin_tx(conn2.pager.load().clone()).unwrap();
let conn3 = db.db.connect().unwrap();
let tx3 = db.mvcc_store.begin_tx(conn3.pager.load().clone()).unwrap();
let row_v3 = generate_simple_string_row(table_id, 1, "v3");
assert!(db.mvcc_store.update(tx3, row_v3).unwrap());
let row_v2 = generate_simple_string_row(table_id, 1, "v2");
db.mvcc_store
.insert_btree_resident_to_table_or_index(tx2, row_v2, None)
.unwrap();
let result = commit_tx(db.mvcc_store, &conn2, tx2);
assert!(
matches!(&result, Err(LimboError::WriteWriteConflict)),
"Expected WriteWriteConflict, got: {result:?}. \
T3's speculative delete (end=TxID) on T1's version must not hide it from conflict checks."
);
}
#[test]
fn test_committed_delete_tombstone_conflict() {
let db = MvccTestDb::new();
let table_id: MVTableId = (-2).into();
let tx1 = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
let row_v1 = generate_simple_string_row(table_id, 1, "v1");
db.mvcc_store.insert(tx1, row_v1).unwrap();
commit_tx(db.mvcc_store.clone(), &db.conn, tx1).unwrap();
let conn2 = db.db.connect().unwrap();
let tx2 = db.mvcc_store.begin_tx(conn2.pager.load().clone()).unwrap();
let conn_d = db.db.connect().unwrap();
let tx_d = db.mvcc_store.begin_tx(conn_d.pager.load().clone()).unwrap();
assert!(db
.mvcc_store
.delete(tx_d, RowID::new(table_id, RowKey::Int(1)))
.unwrap());
commit_tx(db.mvcc_store.clone(), &conn_d, tx_d).unwrap();
let row_v2 = generate_simple_string_row(table_id, 1, "v2");
db.mvcc_store
.insert_btree_resident_to_table_or_index(tx2, row_v2, None)
.unwrap();
let result = commit_tx(db.mvcc_store, &conn2, tx2);
assert!(
matches!(&result, Err(LimboError::WriteWriteConflict)),
"Expected WriteWriteConflict, got: {result:?}. \
Td's committed delete (tombstone) must be detected as a conflict."
);
}
#[test]
fn test_committed_update_version_conflict() {
let db = MvccTestDb::new();
let table_id: MVTableId = (-2).into();
let tx1 = db
.mvcc_store
.begin_tx(db.conn.pager.load().clone())
.unwrap();
let row_v1 = generate_simple_string_row(table_id, 1, "v1");
db.mvcc_store.insert(tx1, row_v1).unwrap();
commit_tx(db.mvcc_store.clone(), &db.conn, tx1).unwrap();
let conn2 = db.db.connect().unwrap();
let tx2 = db.mvcc_store.begin_tx(conn2.pager.load().clone()).unwrap();
let conn_d = db.db.connect().unwrap();
let tx_d = db.mvcc_store.begin_tx(conn_d.pager.load().clone()).unwrap();
let row_vd = generate_simple_string_row(table_id, 1, "vd");
assert!(db.mvcc_store.update(tx_d, row_vd).unwrap());
commit_tx(db.mvcc_store.clone(), &conn_d, tx_d).unwrap();
let row_v2 = generate_simple_string_row(table_id, 1, "v2");
db.mvcc_store
.insert_btree_resident_to_table_or_index(tx2, row_v2, None)
.unwrap();
let result = commit_tx(db.mvcc_store, &conn2, tx2);
assert!(
matches!(&result, Err(LimboError::WriteWriteConflict)),
"Expected WriteWriteConflict, got: {result:?}. \
Td's committed update must be detected via Td's new version."
);
}
#[test]
fn test_mvcc_encrypted_log_recovery_and_wrong_key() {
let hex_key = "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f";
let mut db = MvccTestDbNoConn::new_encrypted(hex_key);
write_synthetic_row(&db, "encrypted_value");
{
let log_path = std::path::PathBuf::from(db.path.as_ref().unwrap()).with_extension("db-log");
let log_bytes = std::fs::read(&log_path).expect("MVCC log file should exist");
assert!(
log_bytes.len() > 56,
"MVCC log should contain data beyond the header"
);
let plaintext = b"encrypted_value";
assert!(
!log_bytes.windows(plaintext.len()).any(|w| w == plaintext),
"MVCC log must not contain plaintext data when encryption is enabled"
);
}
db.restart();
{
let conn = db.connect();
let mvcc_store = db.get_mvcc_store();
let max_root_page = get_rows(
&conn,
"SELECT COALESCE(MAX(rootpage), 0) FROM sqlite_schema WHERE rootpage > 0",
)[0][0]
.as_int()
.unwrap();
let synthetic_table_id = MVTableId::new(-(max_root_page + 100));
let tx_id = mvcc_store.begin_tx(conn.pager.load().clone()).unwrap();
let row = mvcc_store
.read(tx_id, &RowID::new(synthetic_table_id, RowKey::Int(1)))
.unwrap()
.unwrap();
let record = get_record_value(&row);
match record.get_value(0).unwrap() {
ValueRef::Text(text) => assert_eq!(text.as_str(), "encrypted_value"),
other => panic!("Expected Text, got {other:?}"),
}
conn.close().unwrap();
}
let wrong_key = "ff0102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f";
db.enc_opts = Some(crate::EncryptionOpts {
cipher: "aes256gcm".to_string(),
hexkey: wrong_key.to_string(),
});
assert!(
db.restart_result().is_err(),
"Expected error when reopening encrypted MVCC DB with wrong key"
);
}
#[test]
fn test_mvcc_late_encryption_setup_keeps_metadata_bootstrapped() {
let temp_dir = tempfile::TempDir::new().unwrap();
let path = temp_dir.path().join("test.db");
let io = Arc::new(PlatformIO::new().unwrap());
let opts = DatabaseOpts::new().with_encryption(true);
let db = Database::open_file_with_flags(
io,
path.as_os_str().to_str().unwrap(),
OpenFlags::default(),
opts,
None,
)
.unwrap();
let conn = db.connect().unwrap();
conn.execute("PRAGMA journal_mode = 'mvcc'").unwrap();
let metadata_root = metadata_root_page(&conn);
assert!(
metadata_root > 0,
"metadata table must be present after enabling MVCC on a file-backed db",
);
let meta = get_rows(
&conn,
"SELECT k, v FROM __turso_internal_mvcc_meta ORDER BY rowid",
);
assert_eq!(meta.len(), 1);
assert_eq!(meta[0][0].to_string(), "persistent_tx_ts_max");
assert_eq!(meta[0][1].as_int().unwrap(), 0);
}
#[test]
fn test_mvcc_encrypted_restart_without_key_fails_before_recovery() {
let hex_key = "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f";
let mut db = MvccTestDbNoConn::new_encrypted(hex_key);
let log_path = std::path::PathBuf::from(db.path.as_ref().unwrap()).with_extension("db-log");
{
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'secret')").unwrap();
conn.close().unwrap();
}
let log_bytes = std::fs::read(&log_path).expect("db-log should exist after MVCC writes");
assert!(
log_bytes.len() > LOG_HDR_SIZE,
"db-log should contain at least one frame before restart"
);
db.enc_opts = None;
assert!(
matches!(db.restart_result(), Err(LimboError::NotADB)),
"reopening an encrypted MVCC database without a key must fail during db open, before recovery",
);
}
#[test]
fn test_encrypted_recovery_large_payload_multi_chunk() {
let hex_key = "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f";
let large_value = "x".repeat(ENCRYPTED_PAYLOAD_CHUNK_SIZE * 3);
let mut db = MvccTestDbNoConn::new_encrypted(hex_key);
{
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute(format!("INSERT INTO t VALUES (1, '{large_value}')"))
.unwrap();
}
let log_path = std::path::PathBuf::from(db.path.as_ref().unwrap()).with_extension("db-log");
assert!(log_path.exists(), "db-log should exist before restart");
assert_log_payloads_decrypt(
&log_path,
hex_key,
crate::storage::encryption::CipherMode::Aes256Gcm,
);
db.restart();
let conn = db.connect();
let rows = get_rows(
&conn,
"SELECT id, length(v), substr(v, 1, 16), substr(v, length(v) - 15, 16) FROM t",
);
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(rows[0][1].as_int().unwrap(), large_value.len() as i64);
assert_eq!(rows[0][2].to_string(), "xxxxxxxxxxxxxxxx");
assert_eq!(rows[0][3].to_string(), "xxxxxxxxxxxxxxxx");
}
#[test]
fn test_encrypted_recovery_corrupted_later_chunk_keeps_checkpointed_prefix() {
let hex_key = "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f";
let large_value = "z".repeat(ENCRYPTED_PAYLOAD_CHUNK_SIZE * 3);
let mut db = MvccTestDbNoConn::new_encrypted(hex_key);
let log_path = std::path::PathBuf::from(db.path.as_ref().unwrap()).with_extension("db-log");
{
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'survives')")
.unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
conn.execute(format!("INSERT INTO t VALUES (2, '{large_value}')"))
.unwrap();
}
let mut log_bytes = std::fs::read(&log_path).expect("db-log should exist");
let payload_size = u64::from_le_bytes(
log_bytes[LOG_HDR_SIZE + 4..LOG_HDR_SIZE + 12]
.try_into()
.unwrap(),
) as usize;
let chunk_count = payload_size.div_ceil(ENCRYPTED_PAYLOAD_CHUNK_SIZE);
assert!(
chunk_count >= 3,
"expected multi-chunk encrypted recovery tail"
);
let enc_ctx = crate::storage::encryption::EncryptionContext::new(
crate::storage::encryption::CipherMode::Aes256Gcm,
&EncryptionKey::from_hex_string(hex_key).unwrap(),
4096,
)
.unwrap();
let first_chunk_on_disk_size =
ENCRYPTED_PAYLOAD_CHUNK_SIZE + enc_ctx.tag_size() + enc_ctx.nonce_size();
let corrupt_offset = LOG_HDR_SIZE + TX_HEADER_SIZE + first_chunk_on_disk_size + 1;
log_bytes[corrupt_offset] ^= 0xFF;
std::fs::write(&log_path, &log_bytes).unwrap();
db.restart();
let conn = db.connect();
let rows = get_rows(&conn, "SELECT id, v FROM t ORDER BY id");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(rows[0][1].to_string(), "survives");
}
fn assert_log_payloads_decrypt(
log_path: &std::path::Path,
hex_key: &str,
cipher: crate::storage::encryption::CipherMode,
) {
use crate::storage::encryption::EncryptionContext;
let log_bytes = std::fs::read(log_path).expect("db-log file should exist");
assert!(
log_bytes.len() > LOG_HDR_SIZE,
"db-log should contain data beyond the header"
);
let key = EncryptionKey::from_hex_string(hex_key).unwrap();
let enc_ctx = EncryptionContext::new(cipher, &key, 4096).unwrap();
let nonce_size = enc_ctx.nonce_size();
let tag_size = enc_ctx.tag_size();
let salt = u64::from_le_bytes(log_bytes[8..16].try_into().unwrap());
let mut offset = LOG_HDR_SIZE;
let mut frame_count = 0;
while offset + TX_HEADER_SIZE + TX_TRAILER_SIZE <= log_bytes.len() {
let frame_magic = u32::from_le_bytes(log_bytes[offset..offset + 4].try_into().unwrap());
if frame_magic != FRAME_MAGIC {
break; }
let payload_size =
u64::from_le_bytes(log_bytes[offset + 4..offset + 12].try_into().unwrap()) as usize;
let op_count = u32::from_le_bytes(log_bytes[offset + 12..offset + 16].try_into().unwrap());
let commit_ts = u64::from_le_bytes(log_bytes[offset + 16..offset + 24].try_into().unwrap());
let mut payload_offset = offset + TX_HEADER_SIZE;
let chunk_count = if payload_size == 0 {
0
} else {
payload_size.div_ceil(ENCRYPTED_PAYLOAD_CHUNK_SIZE)
};
let mut frame_complete = true;
for chunk_index in 0..chunk_count {
let chunk_plaintext_len = (payload_size - chunk_index * ENCRYPTED_PAYLOAD_CHUNK_SIZE)
.min(ENCRYPTED_PAYLOAD_CHUNK_SIZE);
let chunk_on_disk_size = chunk_plaintext_len + tag_size + nonce_size;
if payload_offset + chunk_on_disk_size + TX_TRAILER_SIZE > log_bytes.len() {
frame_complete = false;
break;
}
let blob = &log_bytes[payload_offset..payload_offset + chunk_on_disk_size];
let ciphertext = &blob[..chunk_plaintext_len + tag_size];
let nonce = &blob[chunk_plaintext_len + tag_size..];
let mut aad = [0u8; 32];
aad[..8].copy_from_slice(&salt.to_le_bytes());
if chunk_index + 1 == chunk_count {
aad[8..16].copy_from_slice(&(payload_size as u64).to_le_bytes());
}
aad[16..20].copy_from_slice(&op_count.to_le_bytes());
aad[20..28].copy_from_slice(&commit_ts.to_le_bytes());
aad[28..32].copy_from_slice(&(chunk_index as u32).to_le_bytes());
enc_ctx
.decrypt_chunk(ciphertext, nonce, &aad)
.unwrap_or_else(|e| {
panic!(
"failed to decrypt frame {frame_count} chunk {chunk_index} at offset {offset}: {e}"
)
});
payload_offset += chunk_on_disk_size;
}
if !frame_complete {
break;
}
frame_count += 1;
offset = payload_offset + TX_TRAILER_SIZE; }
assert!(
frame_count > 0,
"db-log should contain at least one TX frame"
);
}
#[test]
fn test_encrypted_recovery_checkpoint_then_more_writes() {
let hex_key = "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f";
let mut db = MvccTestDbNoConn::new_encrypted(hex_key);
{
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
conn.execute("INSERT INTO t VALUES (2, 'b')").unwrap();
conn.execute("INSERT INTO t VALUES (3, 'c')").unwrap();
}
let log_path = std::path::PathBuf::from(db.path.as_ref().unwrap()).with_extension("db-log");
assert!(log_path.exists(), "db-log file should exist before restart");
assert_log_payloads_decrypt(
&log_path,
hex_key,
crate::storage::encryption::CipherMode::Aes256Gcm,
);
db.restart();
let conn = db.connect();
let rows = get_rows(&conn, "SELECT id, v FROM t ORDER BY id");
assert_eq!(rows.len(), 3);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(rows[0][1].to_string(), "a");
assert_eq!(rows[1][0].as_int().unwrap(), 2);
assert_eq!(rows[1][1].to_string(), "b");
assert_eq!(rows[2][0].as_int().unwrap(), 3);
assert_eq!(rows[2][1].to_string(), "c");
}
#[test]
fn test_encrypted_recovery_multiple_restart_cycles() {
let hex_key = "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f";
let mut db = MvccTestDbNoConn::new_encrypted(hex_key);
let log_path = std::path::PathBuf::from(db.path.as_ref().unwrap()).with_extension("db-log");
{
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'first')").unwrap();
}
assert!(log_path.exists(), "db-log file should exist after cycle 1");
assert_log_payloads_decrypt(
&log_path,
hex_key,
crate::storage::encryption::CipherMode::Aes256Gcm,
);
db.restart();
{
let conn = db.connect();
conn.execute("INSERT INTO t VALUES (2, 'second')").unwrap();
conn.execute("INSERT INTO t VALUES (3, 'third')").unwrap();
}
db.restart();
let conn = db.connect();
let rows = get_rows(&conn, "SELECT id, v FROM t ORDER BY id");
assert_eq!(rows.len(), 3);
assert_eq!(rows[0][1].to_string(), "first");
assert_eq!(rows[1][1].to_string(), "second");
assert_eq!(rows[2][1].to_string(), "third");
}
#[test]
fn test_encrypted_recovery_corrupted_ciphertext() {
let hex_key = "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f";
let mut db = MvccTestDbNoConn::new_encrypted(hex_key);
let log_path = std::path::PathBuf::from(db.path.as_ref().unwrap()).with_extension("db-log");
{
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'survives')")
.unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
conn.execute("INSERT INTO t VALUES (2, 'corrupted')")
.unwrap();
}
assert!(
log_path.exists(),
"db-log file should exist before corruption"
);
assert_log_payloads_decrypt(
&log_path,
hex_key,
crate::storage::encryption::CipherMode::Aes256Gcm,
);
{
let mut log_bytes = std::fs::read(&log_path).expect("log file should exist");
assert!(
log_bytes.len() > 56 + 24 + 1,
"log should have data beyond header + tx header"
);
let corrupt_offset = 56 + 24 + 1;
log_bytes[corrupt_offset] ^= 0xFF;
std::fs::write(&log_path, &log_bytes).unwrap();
}
db.restart();
let conn = db.connect();
let rows = get_rows(&conn, "SELECT id, v FROM t ORDER BY id");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(rows[0][1].to_string(), "survives");
}
#[test]
fn test_recovery_many_tables_checkpoint_restart_checkpoint_restart() {
let mut db = MvccTestDbNoConn::new_with_random_db();
let num_initial_tables = 50;
let num_extra_tables = 30;
{
let conn = db.connect();
for i in 0..num_initial_tables {
conn.execute(format!("CREATE TABLE t{i}(id INTEGER PRIMARY KEY, v TEXT)"))
.unwrap();
conn.execute(format!("INSERT INTO t{i} VALUES (1, 'init')"))
.unwrap();
}
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
conn.close().unwrap();
}
db.restart();
{
let conn = db.connect();
for i in 0..num_extra_tables {
conn.execute(format!(
"CREATE TABLE extra{i}(id INTEGER PRIMARY KEY, v TEXT)"
))
.unwrap();
conn.execute(format!("INSERT INTO extra{i} VALUES (1, 'extra')"))
.unwrap();
}
for i in 0..num_initial_tables {
conn.execute(format!("INSERT INTO t{i} VALUES (2, 'after_restart')"))
.unwrap();
}
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
for i in 0..num_initial_tables {
conn.execute(format!("INSERT INTO t{i} VALUES (3, 'post_ckpt2')"))
.unwrap();
}
for i in 0..num_extra_tables {
conn.execute(format!(
"INSERT INTO extra{i} VALUES (2, 'extra_post_ckpt')"
))
.unwrap();
}
conn.close().unwrap();
}
db.restart();
{
let conn = db.connect();
for i in 0..num_initial_tables {
let rows = get_rows(&conn, &format!("SELECT id, v FROM t{i} ORDER BY id"));
assert_eq!(
rows.len(),
3,
"table t{i} should have 3 rows, got {}",
rows.len()
);
}
for i in 0..num_extra_tables {
let rows = get_rows(&conn, &format!("SELECT id, v FROM extra{i} ORDER BY id"));
assert_eq!(
rows.len(),
2,
"table extra{i} should have 2 rows, got {}",
rows.len()
);
}
}
}
#[test]
fn test_recovery_three_restarts_with_table_creation() {
let mut db = MvccTestDbNoConn::new_with_random_db();
{
let conn = db.connect();
for i in 0..20 {
conn.execute(format!("CREATE TABLE a{i}(id INTEGER PRIMARY KEY, v TEXT)"))
.unwrap();
conn.execute(format!("INSERT INTO a{i} VALUES (1, 'a')"))
.unwrap();
}
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
conn.close().unwrap();
}
db.restart();
{
let conn = db.connect();
for i in 0..20 {
conn.execute(format!("CREATE TABLE b{i}(id INTEGER PRIMARY KEY, v TEXT)"))
.unwrap();
conn.execute(format!("INSERT INTO b{i} VALUES (1, 'b')"))
.unwrap();
}
for i in 0..20 {
conn.execute(format!("INSERT INTO a{i} VALUES (2, 'a2')"))
.unwrap();
}
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
for i in 0..20 {
conn.execute(format!("INSERT INTO a{i} VALUES (3, 'a3')"))
.unwrap();
conn.execute(format!("INSERT INTO b{i} VALUES (2, 'b2')"))
.unwrap();
}
conn.close().unwrap();
}
db.restart();
{
let conn = db.connect();
for i in 0..20 {
conn.execute(format!("CREATE TABLE c{i}(id INTEGER PRIMARY KEY, v TEXT)"))
.unwrap();
conn.execute(format!("INSERT INTO c{i} VALUES (1, 'c')"))
.unwrap();
}
for i in 0..20 {
conn.execute(format!("INSERT INTO a{i} VALUES (4, 'a4')"))
.unwrap();
conn.execute(format!("INSERT INTO b{i} VALUES (3, 'b3')"))
.unwrap();
}
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
for i in 0..20 {
conn.execute(format!("INSERT INTO a{i} VALUES (5, 'a5')"))
.unwrap();
conn.execute(format!("INSERT INTO b{i} VALUES (4, 'b4')"))
.unwrap();
conn.execute(format!("INSERT INTO c{i} VALUES (2, 'c2')"))
.unwrap();
}
conn.close().unwrap();
}
db.restart();
{
let conn = db.connect();
for i in 0..20 {
let rows = get_rows(&conn, &format!("SELECT id FROM a{i} ORDER BY id"));
assert_eq!(rows.len(), 5, "table a{i} should have 5 rows");
let rows = get_rows(&conn, &format!("SELECT id FROM b{i} ORDER BY id"));
assert_eq!(rows.len(), 4, "table b{i} should have 4 rows");
let rows = get_rows(&conn, &format!("SELECT id FROM c{i} ORDER BY id"));
assert_eq!(rows.len(), 2, "table c{i} should have 2 rows");
}
}
}
fn create_wide_table_like_schema(conn: &Arc<Connection>) {
conn.execute(
"CREATE TABLE IF NOT EXISTS core(
id INTEGER PRIMARY KEY,
row_number INTEGER NOT NULL,
sheet_id INTEGER NOT NULL,
created_by TEXT,
updated_by TEXT,
created_at TEXT DEFAULT (datetime('now')),
updated_at TEXT DEFAULT (datetime('now')),
col_1 TEXT,
col_2 TEXT,
col_3 TEXT,
col_4 TEXT,
col_5 TEXT,
col_6 TEXT,
col_7 TEXT,
col_8 TEXT
)",
)
.unwrap();
conn.execute("CREATE INDEX IF NOT EXISTS idx_core_sheet_row ON core(sheet_id, row_number)")
.unwrap();
conn.execute("CREATE INDEX IF NOT EXISTS idx_core_created ON core(created_at)")
.unwrap();
conn.execute("CREATE INDEX IF NOT EXISTS idx_core_updated ON core(updated_at, sheet_id)")
.unwrap();
conn.execute("CREATE INDEX IF NOT EXISTS idx_core_created_by ON core(created_by, sheet_id)")
.unwrap();
conn.execute(
"CREATE TABLE IF NOT EXISTS metadata(
sheet_id INTEGER PRIMARY KEY,
next_row_number INTEGER NOT NULL DEFAULT 1,
row_count INTEGER NOT NULL DEFAULT 0,
updated_at TEXT DEFAULT (datetime('now'))
)",
)
.unwrap();
conn.execute(
"CREATE TABLE IF NOT EXISTS audit_log(
id INTEGER PRIMARY KEY,
sheet_id INTEGER NOT NULL,
action TEXT NOT NULL,
row_id INTEGER,
row_number INTEGER,
created_at TEXT DEFAULT (datetime('now')),
details TEXT
)",
)
.unwrap();
conn.execute(
"CREATE TABLE IF NOT EXISTS trigger_gate(
id INTEGER PRIMARY KEY,
sheet_id INTEGER NOT NULL,
trigger_type TEXT NOT NULL,
payload TEXT,
created_at TEXT DEFAULT (datetime('now'))
)",
)
.unwrap();
conn.execute(
"INSERT OR IGNORE INTO metadata(sheet_id, next_row_number, row_count, updated_at)
VALUES (1, 1, 0, datetime('now'))",
)
.unwrap();
}
fn drop_wide_table_like_schema(conn: &Arc<Connection>) {
conn.execute("DROP TABLE IF EXISTS trigger_gate").unwrap();
conn.execute("DROP TABLE IF EXISTS audit_log").unwrap();
conn.execute("DROP TABLE IF EXISTS metadata").unwrap();
conn.execute("DROP INDEX IF EXISTS idx_core_sheet_row")
.unwrap();
conn.execute("DROP INDEX IF EXISTS idx_core_created")
.unwrap();
conn.execute("DROP INDEX IF EXISTS idx_core_updated")
.unwrap();
conn.execute("DROP INDEX IF EXISTS idx_core_created_by")
.unwrap();
conn.execute("DROP TABLE IF EXISTS core").unwrap();
}
fn insert_wide_table_like_batch(conn: &Arc<Connection>, start_row_number: i64, rows: usize) {
conn.execute("BEGIN").unwrap();
for offset in 0..rows {
let row_number = start_row_number + offset as i64;
conn.execute(format!(
"INSERT INTO core(
row_number, sheet_id, created_by, updated_by,
created_at, updated_at,
col_1, col_2, col_3, col_4, col_5, col_6, col_7, col_8
) VALUES (
{row_number}, 1, 'seed', 'seed',
datetime('now'), datetime('now'),
hex(randomblob(8)), hex(randomblob(8)), hex(randomblob(8)), hex(randomblob(8)),
hex(randomblob(8)), hex(randomblob(8)), hex(randomblob(8)), hex(randomblob(8))
)",
))
.unwrap();
conn.execute(format!(
"INSERT INTO audit_log(sheet_id, action, row_number, details, created_at)
VALUES (1, 'INSERT', {row_number}, 'wide table repro', datetime('now'))",
))
.unwrap();
}
conn.execute(format!(
"UPDATE metadata
SET next_row_number = next_row_number + {rows},
row_count = row_count + {rows},
updated_at = datetime('now')
WHERE sheet_id = 1",
))
.unwrap();
conn.execute(
"INSERT INTO trigger_gate(sheet_id, trigger_type, payload, created_at)
VALUES (1, 'ROW_INSERT', '{\"count\": 1}', datetime('now'))",
)
.unwrap();
conn.execute(
"INSERT INTO trigger_gate(sheet_id, trigger_type, payload, created_at)
VALUES (1, 'RECALC', '{\"sheet_id\": 1}', datetime('now'))",
)
.unwrap();
conn.execute(
"INSERT INTO trigger_gate(sheet_id, trigger_type, payload, created_at)
VALUES (1, 'WEBHOOK', '{\"event\": \"rows_added\"}', datetime('now'))",
)
.unwrap();
conn.execute("COMMIT").unwrap();
}
#[test]
fn test_checkpoint_recovers_after_crash_restart_drop_recreate_table() {
let mut db = MvccTestDbNoConn::new_with_random_db();
{
let conn = db.connect();
conn.execute("PRAGMA mvcc_checkpoint_threshold = 1000000")
.unwrap();
create_wide_table_like_schema(&conn);
insert_wide_table_like_batch(&conn, 1, 1);
}
force_close_for_artifact_tamper(&mut db);
db.restart();
let conn = db.connect();
conn.execute("PRAGMA mvcc_checkpoint_threshold = 1000000")
.unwrap();
drop_wide_table_like_schema(&conn);
create_wide_table_like_schema(&conn);
insert_wide_table_like_batch(&conn, 1, 1);
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
let rows = get_rows(
&conn,
"SELECT row_number, sheet_id, created_by FROM core ORDER BY id",
);
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(rows[0][1].as_int().unwrap(), 1);
assert_eq!(rows[0][2].to_string(), "seed");
let rows = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
conn.close().unwrap();
db.restart();
let conn = db.connect();
let rows = get_rows(
&conn,
"SELECT row_number, sheet_id, created_by FROM core ORDER BY id",
);
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(rows[0][1].as_int().unwrap(), 1);
assert_eq!(rows[0][2].to_string(), "seed");
}
#[test]
fn test_checkpoint_recovers_after_crash_restart_drop_recreate_index() {
let mut db = MvccTestDbNoConn::new_with_random_db();
{
let conn = db.connect();
conn.execute("PRAGMA mvcc_checkpoint_threshold = 1000000")
.unwrap();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT, payload TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'seed_1', hex(randomblob(16)))")
.unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
conn.execute("CREATE INDEX idx_t_v ON t(v)").unwrap();
}
force_close_for_artifact_tamper(&mut db);
db.restart();
let conn = db.connect();
conn.execute("PRAGMA mvcc_checkpoint_threshold = 1000000")
.unwrap();
conn.execute("DROP INDEX IF EXISTS idx_t_v").unwrap();
conn.execute("CREATE INDEX idx_t_v ON t(v)").unwrap();
conn.execute("INSERT INTO t VALUES (2, 'post_2', hex(randomblob(16)))")
.unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
let rows = get_rows(&conn, "SELECT id, v FROM t ORDER BY id");
assert_eq!(rows.len(), 2);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(rows[0][1].to_string(), "seed_1");
assert_eq!(rows[1][0].as_int().unwrap(), 2);
assert_eq!(rows[1][1].to_string(), "post_2");
let rows = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
conn.close().unwrap();
db.restart();
let conn = db.connect();
let rows = get_rows(&conn, "SELECT id, v FROM t ORDER BY id");
assert_eq!(rows.len(), 2);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(rows[0][1].to_string(), "seed_1");
assert_eq!(rows[1][0].as_int().unwrap(), 2);
assert_eq!(rows[1][1].to_string(), "post_2");
}
#[test]
fn test_checkpoint_recovers_after_restart_drop_checkpointed_index() {
let mut db = MvccTestDbNoConn::new_with_random_db();
{
let conn = db.connect();
conn.execute("PRAGMA mvcc_checkpoint_threshold = 1000000")
.unwrap();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("CREATE INDEX idx_t_v ON t(v)").unwrap();
conn.execute("INSERT INTO t VALUES (1, 'seed_1')").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
conn.execute("DROP INDEX idx_t_v").unwrap();
}
force_close_for_artifact_tamper(&mut db);
db.restart();
let conn = db.connect();
conn.execute("PRAGMA mvcc_checkpoint_threshold = 1000000")
.unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
let rows = get_rows(&conn, "SELECT id, v FROM t ORDER BY id");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 1);
assert_eq!(rows[0][1].to_string(), "seed_1");
let rows = get_rows(
&conn,
"SELECT name FROM sqlite_schema WHERE type = 'index' AND name = 'idx_t_v'",
);
assert_eq!(rows.len(), 0);
let rows = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
}
#[test]
fn test_drop_recreate_indexed_table_many_inserts_restart() {
let mut db = MvccTestDbNoConn::new_with_random_db();
for round in 0..2 {
{
let conn = db.connect();
let mv_store = db.get_mvcc_store();
mv_store.set_checkpoint_threshold(4096);
if round > 0 {
conn.execute("DROP TABLE IF EXISTS t").unwrap();
}
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, a TEXT, b TEXT, c INTEGER)")
.unwrap();
conn.execute("CREATE INDEX idx_a ON t(a)").unwrap();
conn.execute("CREATE INDEX idx_b ON t(b)").unwrap();
conn.execute("CREATE INDEX idx_c ON t(c)").unwrap();
for i in 0..1000 {
conn.execute(format!("INSERT INTO t VALUES({i}, 'a_{i}', 'b_{i}', {i})"))
.unwrap();
}
conn.close().unwrap();
}
db.restart();
{
let conn = db.connect();
let rows = get_rows(&conn, "SELECT count(*) FROM t");
assert_eq!(
rows[0][0].as_int().unwrap(),
1000,
"round {round}: expected 1000 rows"
);
conn.close().unwrap();
}
}
}
#[test]
fn test_create_type_visible_to_second_connection_under_mvcc() {
let db =
MvccTestDbNoConn::new_with_random_db_with_opts(DatabaseOpts::new().with_custom_types(true));
let conn1 = db.connect();
conn1
.execute("CREATE TYPE my_uint(value any) BASE text ENCODE my_uint_enc(value) DECODE my_uint_dec(value)")
.unwrap();
conn1.close().unwrap();
let conn2 = db.connect();
let rows = get_rows(
&conn2,
"SELECT name FROM sqlite_turso_types WHERE name LIKE 'my_uint%'",
);
assert_eq!(rows.len(), 1, "CREATE TYPE should be visible to conn2");
assert_eq!(rows[0][0].to_string(), "my_uint(value any)");
conn2.close().unwrap();
}
#[test]
fn test_integrity_check_ignores_dropped_root_that_is_live_after_recovery() {
let mut db = MvccTestDbNoConn::new_with_random_db();
{
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO t VALUES (1, 'x')").unwrap();
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
conn.close().unwrap();
}
db.restart();
let conn = db.connect();
let rows = get_rows(
&conn,
"SELECT rootpage FROM sqlite_schema WHERE type = 'table' AND name = 't'",
);
let root_page = rows[0][0].as_int().unwrap();
assert!(root_page > 0);
conn.with_schema_mut(|schema| {
schema.dropped_root_pages.insert(root_page);
});
let rows = get_rows(&conn, "PRAGMA integrity_check");
assert_eq!(rows.len(), 1);
assert_eq!(&rows[0][0].to_string(), "ok");
}
#[test]
fn test_snapshot_stability_full() {
use crate::sync::atomic::{AtomicBool, AtomicI64, AtomicU64, Ordering};
use std::time::{Duration, Instant};
let _ = tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_writer(std::io::stderr)
.try_init();
let db = MvccTestDbNoConn::new_with_random_db();
{
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT, b BLOB)")
.unwrap();
conn.execute("CREATE INDEX idx_v ON t(v)").unwrap();
for i in 0..500 {
conn.execute(format!("INSERT INTO t VALUES ({i}, 'v_{i}', NULL)"))
.unwrap();
}
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
conn.close().unwrap();
}
let stop = Arc::new(AtomicBool::new(false));
let mismatch = Arc::new(AtomicBool::new(false));
let reader_iters = Arc::new(AtomicU64::new(0));
let reader_samples = Arc::new(AtomicU64::new(0));
let sp_iters = Arc::new(AtomicU64::new(0));
let writer_iters = Arc::new(AtomicU64::new(0));
let ckpt_iters = Arc::new(AtomicU64::new(0));
let ddl_iters = Arc::new(AtomicU64::new(0));
let next_id = Arc::new(AtomicU64::new(10_000_000));
let mismatch_first = Arc::new(AtomicI64::new(0));
let mismatch_second = Arc::new(AtomicI64::new(0));
let mismatch_idx_a = Arc::new(AtomicU64::new(0));
let mismatch_idx_b = Arc::new(AtomicU64::new(0));
let duration = Duration::from_secs(
std::env::var("REPRO_DURATION_SECS")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(5),
);
let reader_ops: usize = std::env::var("REPRO_READER_OPS")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(8);
let enable_sp = std::env::var("REPRO_SP").map(|s| s != "0").unwrap_or(true);
let enable_writer = std::env::var("REPRO_WRITER")
.map(|s| s != "0")
.unwrap_or(true);
let enable_ckpt = std::env::var("REPRO_CKPT")
.map(|s| s != "0")
.unwrap_or(true);
let enable_ddl = std::env::var("REPRO_DDL")
.map(|s| s != "0")
.unwrap_or(false);
let reader = {
let db_arc = db.get_db();
let stop = stop.clone();
let mismatch = mismatch.clone();
let reader_iters = reader_iters.clone();
let reader_samples = reader_samples.clone();
let mismatch_first = mismatch_first.clone();
let mismatch_second = mismatch_second.clone();
let mismatch_idx_a = mismatch_idx_a.clone();
let mismatch_idx_b = mismatch_idx_b.clone();
std::thread::spawn(move || {
let conn = db_arc.connect().unwrap();
while !stop.load(Ordering::Relaxed) && !mismatch.load(Ordering::Relaxed) {
if conn.execute("BEGIN CONCURRENT").is_err() {
std::thread::yield_now();
continue;
}
let mut samples: Vec<i64> = Vec::with_capacity(reader_ops);
for _ in 0..reader_ops {
let mut stmt = conn.prepare("SELECT count(*) FROM t").unwrap();
let rows = stmt.run_collect_rows().unwrap();
let c = rows[0][0].as_int().unwrap();
samples.push(c);
reader_samples.fetch_add(1, Ordering::Relaxed);
}
if let Some((i, &c)) = samples.iter().enumerate().find(|(_, &c)| c != samples[0]) {
mismatch_first.store(samples[0], Ordering::Relaxed);
mismatch_second.store(c, Ordering::Relaxed);
mismatch_idx_a.store(0, Ordering::Relaxed);
mismatch_idx_b.store(i as u64, Ordering::Relaxed);
mismatch.store(true, Ordering::Relaxed);
let _ = conn.execute("ROLLBACK");
return;
}
let _ = conn.execute("COMMIT");
reader_iters.fetch_add(1, Ordering::Relaxed);
}
})
};
let sp_thread = enable_sp.then(|| {
let db_arc = db.get_db();
let stop = stop.clone();
let mismatch = mismatch.clone();
let sp_iters = sp_iters.clone();
let next_id = next_id.clone();
std::thread::spawn(move || {
let conn = db_arc.connect().unwrap();
let mut rng = ChaCha8Rng::seed_from_u64(0xCAFEF00D);
while !stop.load(Ordering::Relaxed) && !mismatch.load(Ordering::Relaxed) {
if conn.execute("BEGIN CONCURRENT").is_err() {
std::thread::yield_now();
continue;
}
let depth = 2 + (rng.random::<u8>() % 3) as usize;
let mut sps = Vec::with_capacity(depth);
let mut aborted = false;
'sp: for i in 0..depth {
let name = format!("sp_{i}_{}", rng.random::<u32>() % 100_000);
if conn.execute(format!("SAVEPOINT {name}")).is_err() {
aborted = true;
break 'sp;
}
sps.push(name);
let muts = 1 + (rng.random::<u8>() % 4) as u64;
for _ in 0..muts {
let op = rng.random::<u8>() % 3;
let sql = if op == 0 {
let target = (rng.random::<u32>() % 500) as i64;
format!("DELETE FROM t WHERE id = {target}")
} else {
let id = next_id.fetch_add(1, Ordering::Relaxed) as i64;
format!("INSERT INTO t VALUES ({id}, 'sp_{id}', NULL)")
};
match conn.execute(&sql) {
Ok(_) => {}
Err(LimboError::Constraint(_)) => {}
Err(LimboError::WriteWriteConflict)
| Err(LimboError::Busy)
| Err(LimboError::TxTerminated) => {
aborted = true;
break 'sp;
}
Err(e) => panic!("sp mutation failed: {e:?}"),
}
}
}
if aborted {
let _ = conn.execute("ROLLBACK");
continue;
}
let rb = (rng.random::<u8>() as usize) % depth;
let target = sps[rb].clone();
let _ = conn.execute(format!("ROLLBACK TO {target}"));
let _ = conn.execute(format!("RELEASE {target}"));
let _ = conn.execute("COMMIT");
sp_iters.fetch_add(1, Ordering::Relaxed);
}
})
});
let writer_thread = enable_writer.then(|| {
let db_arc = db.get_db();
let stop = stop.clone();
let mismatch = mismatch.clone();
let writer_iters = writer_iters.clone();
let next_id = next_id.clone();
std::thread::spawn(move || {
let conn = db_arc.connect().unwrap();
let mut rng = ChaCha8Rng::seed_from_u64(0xDEADBEEF);
while !stop.load(Ordering::Relaxed) && !mismatch.load(Ordering::Relaxed) {
if conn.execute("BEGIN CONCURRENT").is_err() {
std::thread::yield_now();
continue;
}
let id = next_id.fetch_add(1, Ordering::Relaxed) as i64;
let sql = if rng.random::<u8>() & 3 == 0 {
let target = (rng.random::<u32>() % 500) as i64;
format!("DELETE FROM t WHERE id = {target}")
} else {
format!("INSERT INTO t VALUES ({id}, 'w_{id}', NULL)")
};
if conn.execute(&sql).is_err() {
let _ = conn.execute("ROLLBACK");
continue;
}
if conn.execute("COMMIT").is_ok() {
writer_iters.fetch_add(1, Ordering::Relaxed);
}
}
})
});
let ckpt_thread = enable_ckpt.then(|| {
let db_arc = db.get_db();
let stop = stop.clone();
let mismatch = mismatch.clone();
let ckpt_iters = ckpt_iters.clone();
std::thread::spawn(move || {
let conn = db_arc.connect().unwrap();
let modes = ["PASSIVE", "FULL", "RESTART", "TRUNCATE"];
let mut idx = 0usize;
while !stop.load(Ordering::Relaxed) && !mismatch.load(Ordering::Relaxed) {
let _ = conn.execute(format!(
"PRAGMA wal_checkpoint({})",
modes[idx % modes.len()]
));
idx = idx.wrapping_add(1);
ckpt_iters.fetch_add(1, Ordering::Relaxed);
}
})
});
let ddl_thread = enable_ddl.then(|| {
let db_arc = db.get_db();
let stop = stop.clone();
let mismatch = mismatch.clone();
let ddl_iters = ddl_iters.clone();
std::thread::spawn(move || {
let conn = db_arc.connect().unwrap();
let mut i = 0u32;
while !stop.load(Ordering::Relaxed) && !mismatch.load(Ordering::Relaxed) {
let name = format!("idx_dyn_{}", i % 4);
let _ = conn.execute(format!("CREATE INDEX {name} ON t(v)"));
let _ = conn.execute(format!("DROP INDEX {name}"));
i = i.wrapping_add(1);
ddl_iters.fetch_add(1, Ordering::Relaxed);
}
})
});
let started = Instant::now();
while started.elapsed() < duration && !mismatch.load(Ordering::Relaxed) {
std::thread::sleep(Duration::from_millis(50));
}
stop.store(true, Ordering::Relaxed);
reader.join().unwrap();
if let Some(h) = sp_thread {
h.join().unwrap();
}
if let Some(h) = writer_thread {
h.join().unwrap();
}
if let Some(h) = ckpt_thread {
h.join().unwrap();
}
if let Some(h) = ddl_thread {
h.join().unwrap();
}
let r = reader_iters.load(Ordering::Relaxed);
let rs = reader_samples.load(Ordering::Relaxed);
let s = sp_iters.load(Ordering::Relaxed);
let w = writer_iters.load(Ordering::Relaxed);
let c = ckpt_iters.load(Ordering::Relaxed);
let d = ddl_iters.load(Ordering::Relaxed);
eprintln!(
"reader_iters={r} reader_samples={rs} sp_iters={s} writer_iters={w} ckpt_iters={c} ddl_iters={d} elapsed={:?}",
started.elapsed()
);
if mismatch.load(Ordering::Relaxed) {
let a = mismatch_first.load(Ordering::Relaxed);
let b = mismatch_second.load(Ordering::Relaxed);
let ia = mismatch_idx_a.load(Ordering::Relaxed);
let ib = mismatch_idx_b.load(Ordering::Relaxed);
panic!(
"snapshot count drifted within a single BEGIN CONCURRENT: \
samples[{ia}]={a} samples[{ib}]={b} \
(reader_iters={r}, sp_iters={s}, writer_iters={w}, ckpt_iters={c}, ddl_iters={d})"
);
}
assert!(rs > 0, "reader made no progress");
}
#[test]
fn test_read_lock_leak_deferred_then_concurrent() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn0 = db.connect();
conn0
.execute("CREATE TABLE t1(id INTEGER PRIMARY KEY, val TEXT)")
.unwrap();
conn0.execute("INSERT INTO t1 VALUES(1, 'v1')").unwrap();
conn0.close().unwrap();
let conn1 = db.connect();
conn1.execute("BEGIN DEFERRED").unwrap();
let result = conn1.execute("BEGIN CONCURRENT");
assert!(result.is_err());
let rows = get_rows(&conn1, "SELECT * FROM t1");
assert_eq!(rows.len(), 1);
}
#[test]
fn rowid_allocator_lock_released_when_statement_dropped_at_seek_yield() {
use std::time::{Duration, Instant};
let db = MvccTestDbNoConn::new_with_random_db();
let setup = db.connect();
setup
.execute("CREATE TABLE t (id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
setup.close().unwrap();
let leaker = db.connect();
let victim = db.connect();
leaker.set_yield_injector(Some(FixedYieldInjector::new([
CursorYieldPoint::SeekStart.point()
])));
let mut leak_stmt = leaker
.prepare("INSERT INTO t VALUES (NULL, 'leaker')")
.unwrap();
match leak_stmt.step().unwrap() {
crate::StepResult::IO => {}
other => panic!("expected IO yield from injected seek_start; got {other:?}"),
}
drop(leak_stmt);
leaker.set_yield_injector(None);
let mut victim_stmt = victim
.prepare("INSERT INTO t VALUES (NULL, 'victim')")
.unwrap();
let deadline = Instant::now() + Duration::from_secs(5);
loop {
if Instant::now() >= deadline {
panic!("victim INSERT did not complete within 5s — rowid allocator lock leaked");
}
match victim_stmt.step().unwrap() {
crate::StepResult::Done => break,
crate::StepResult::IO => continue,
other => panic!("unexpected step result on victim INSERT: {other:?}"),
}
}
}
#[test]
fn exclusive_commit_failure_at_after_remove_tx_strands_exclusive_atom() {
let db = MvccTestDbNoConn::new();
let conn_a = db.connect();
let conn_b = db.connect();
conn_a
.execute("CREATE TABLE t (id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn_a.execute("BEGIN IMMEDIATE").unwrap();
conn_a.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
conn_a.set_failure_injector(Some(FixedFailureInjector::new([(
CommitYieldPoint::AfterRemoveTx.point(),
LimboError::TxError("synthetic AfterRemoveTx failure".to_string()),
)])));
conn_a
.execute("COMMIT")
.expect_err("COMMIT must surface the injected TxError");
conn_b.execute("BEGIN CONCURRENT").unwrap();
conn_b.execute("INSERT INTO t VALUES (2, 'b')").unwrap();
let mut commit_b = conn_b.prepare("COMMIT").unwrap();
let step_result = loop {
match commit_b.step() {
Ok(StepResult::IO) => continue,
other => break other,
}
};
match step_result {
Ok(StepResult::Done) => {}
Ok(other) => panic!("stage 3: unexpected step result: {other:?}"),
Err(err) => panic!("INSERT after failed commit must not return error, got {err}"),
}
}
#[test]
fn dropped_concurrent_commit_does_not_strand_connection() {
let db = MvccTestDbNoConn::new_with_random_db();
let setup = db.connect();
setup
.execute("CREATE TABLE t (id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
setup.close().unwrap();
let conn = db.connect();
conn.execute("BEGIN CONCURRENT").unwrap();
conn.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
let mv_store = db.get_mvcc_store();
let tx_id = conn
.get_mv_tx_id()
.expect("tx must be open after INSERT inside BEGIN CONCURRENT");
conn.set_yield_injector(Some(FixedYieldInjector::new([
CommitYieldPoint::LogRecordPrepared.point(),
])));
{
let mut commit = conn.prepare("COMMIT").unwrap();
match commit.step().unwrap() {
crate::StepResult::IO => {}
other => panic!("expected IO yield at LogRecordPrepared; got {other:?}"),
}
}
conn.set_yield_injector(None);
assert!(
!mv_store.txs.contains_key(&tx_id),
"orphan tx must be rolled back by abort-side cleanup"
);
assert!(
conn.get_mv_tx_id().is_none(),
"connection's mv_tx slot must be cleared"
);
assert_eq!(
conn.get_tx_state(),
crate::connection::TransactionState::None,
"transaction_state must be reset after abort-side rollback"
);
conn.execute("INSERT INTO t VALUES (2, 'b')").unwrap();
let rows = get_rows(&conn, "SELECT id FROM t ORDER BY id");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 2);
}
#[test]
fn dropped_exclusive_commit_releases_locks() {
let db = MvccTestDbNoConn::new_with_random_db();
let setup = db.connect();
setup
.execute("CREATE TABLE t (id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
setup.close().unwrap();
let conn_a = db.connect();
let conn_b = db.connect();
conn_a.execute("BEGIN IMMEDIATE").unwrap();
conn_a.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
let mv_store = db.get_mvcc_store();
let tx_a = conn_a
.get_mv_tx_id()
.expect("EXCLUSIVE tx_a must be open after INSERT");
assert!(
mv_store.is_exclusive_tx(&tx_a),
"EXCLUSIVE tx_a must own the exclusive_tx atomic"
);
conn_a.set_yield_injector(Some(FixedYieldInjector::new([
CommitYieldPoint::LogRecordPrepared.point(),
])));
{
let mut commit = conn_a.prepare("COMMIT").unwrap();
match commit.step().unwrap() {
crate::StepResult::IO => {}
other => panic!("expected IO yield at LogRecordPrepared; got {other:?}"),
}
}
conn_a.set_yield_injector(None);
assert!(
!mv_store.is_exclusive_tx(&tx_a),
"exclusive_tx must be released by abort-side cleanup"
);
conn_b.execute("BEGIN IMMEDIATE").unwrap();
conn_b.execute("INSERT INTO t VALUES (2, 'b')").unwrap();
conn_b.execute("COMMIT").unwrap();
let rows = get_rows(&conn_b, "SELECT id FROM t ORDER BY id");
assert_eq!(rows.len(), 1);
assert_eq!(rows[0][0].as_int().unwrap(), 2);
}
#[test]
fn dropped_main_commit_rolls_back_attached_mvcc_txs() {
let db = MvccTestDbNoConn::new_with_random_db_with_opts(DatabaseOpts::new().with_attach(true));
let aux_dir = tempfile::TempDir::new().unwrap();
let aux_path = aux_dir.path().join("aux.db");
let conn = db.connect();
conn.attach_database(aux_path.to_str().unwrap(), "aux")
.unwrap();
conn.execute("CREATE TABLE t (id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("CREATE TABLE aux.u (id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("BEGIN CONCURRENT").unwrap();
conn.execute("INSERT INTO t VALUES (1, 'main')").unwrap();
conn.execute("INSERT INTO aux.u VALUES (1, 'aux')").unwrap();
let aux_db_id = conn.get_database_id_by_name("aux").unwrap();
let aux_mv_store = conn
.mv_store_for_db(aux_db_id)
.expect("attached aux database must be MVCC");
let aux_pager = conn.get_pager_from_database_index(&aux_db_id).unwrap();
let aux_tx_id = conn
.get_mv_tx_id_for_db(aux_db_id)
.expect("attached MVCC tx must be open after INSERT");
conn.set_yield_injector(Some(FixedYieldInjector::new([
CommitYieldPoint::LogRecordPrepared.point(),
])));
{
let mut commit = conn.prepare("COMMIT").unwrap();
match commit.step().unwrap() {
crate::StepResult::IO => {}
other => panic!("expected IO yield at LogRecordPrepared; got {other:?}"),
}
}
conn.set_yield_injector(None);
assert!(
conn.get_mv_tx_id_for_db(aux_db_id).is_none(),
"attached MVCC tx slot must be cleared when abandoned COMMIT is rolled back"
);
assert!(
!aux_mv_store.txs.contains_key(&aux_tx_id),
"attached MVCC tx must be removed from txs"
);
assert!(
!aux_pager.holds_read_lock(),
"attached pager read lock must be released"
);
let rows = get_rows(&conn, "SELECT id FROM aux.u ORDER BY id");
assert!(
rows.is_empty(),
"abandoned attached INSERT must not become visible"
);
}
#[test]
fn dropped_attached_commit_releases_attached_read_lock() {
let db = MvccTestDbNoConn::new_with_random_db_with_opts(DatabaseOpts::new().with_attach(true));
let aux_dir = tempfile::TempDir::new().unwrap();
let aux_path = aux_dir.path().join("aux.db");
let conn = db.connect();
conn.attach_database(aux_path.to_str().unwrap(), "aux")
.unwrap();
conn.execute("CREATE TABLE aux.u (id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("BEGIN CONCURRENT").unwrap();
conn.execute("INSERT INTO aux.u VALUES (1, 'aux')").unwrap();
let aux_db_id = conn.get_database_id_by_name("aux").unwrap();
let aux_mv_store = conn
.mv_store_for_db(aux_db_id)
.expect("attached aux database must be MVCC");
let aux_pager = conn.get_pager_from_database_index(&aux_db_id).unwrap();
let aux_tx_id = conn
.get_mv_tx_id_for_db(aux_db_id)
.expect("attached MVCC tx must be open after INSERT");
conn.set_yield_injector(Some(FixedYieldInjector::new([
CommitYieldPoint::LogRecordPrepared.point(),
])));
{
let mut commit = conn.prepare("COMMIT").unwrap();
match commit.step().unwrap() {
crate::StepResult::IO => {}
other => panic!("expected IO yield at attached LogRecordPrepared; got {other:?}"),
}
}
conn.set_yield_injector(None);
assert!(
conn.get_mv_tx_id_for_db(aux_db_id).is_none(),
"attached MVCC tx slot must be cleared"
);
assert!(
!aux_mv_store.txs.contains_key(&aux_tx_id),
"attached MVCC tx must be removed from txs"
);
assert!(
!aux_pager.holds_read_lock(),
"attached pager read lock must be released"
);
}
#[test]
fn dropped_attached_commit_rolls_back_remaining_attached_mvcc_txs() {
let db = MvccTestDbNoConn::new_with_random_db_with_opts(DatabaseOpts::new().with_attach(true));
let aux_dir = tempfile::TempDir::new().unwrap();
let aux1_path = aux_dir.path().join("aux1.db");
let aux2_path = aux_dir.path().join("aux2.db");
let conn = db.connect();
conn.attach_database(aux1_path.to_str().unwrap(), "aux1")
.unwrap();
conn.attach_database(aux2_path.to_str().unwrap(), "aux2")
.unwrap();
conn.execute("CREATE TABLE aux1.u (id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("CREATE TABLE aux2.v (id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("BEGIN CONCURRENT").unwrap();
conn.execute("INSERT INTO aux1.u VALUES (1, 'aux1')")
.unwrap();
conn.execute("INSERT INTO aux2.v VALUES (1, 'aux2')")
.unwrap();
let aux1_db_id = conn.get_database_id_by_name("aux1").unwrap();
let aux2_db_id = conn.get_database_id_by_name("aux2").unwrap();
let aux1_mv_store = conn
.mv_store_for_db(aux1_db_id)
.expect("attached aux1 database must be MVCC");
let aux2_mv_store = conn
.mv_store_for_db(aux2_db_id)
.expect("attached aux2 database must be MVCC");
let aux1_pager = conn.get_pager_from_database_index(&aux1_db_id).unwrap();
let aux2_pager = conn.get_pager_from_database_index(&aux2_db_id).unwrap();
let aux1_tx_id = conn
.get_mv_tx_id_for_db(aux1_db_id)
.expect("attached aux1 MVCC tx must be open after INSERT");
let aux2_tx_id = conn
.get_mv_tx_id_for_db(aux2_db_id)
.expect("attached aux2 MVCC tx must be open after INSERT");
conn.set_yield_injector(Some(FixedYieldInjector::new([
CommitYieldPoint::LogRecordPrepared.point(),
])));
{
let mut commit = conn.prepare("COMMIT").unwrap();
match commit.step().unwrap() {
crate::StepResult::IO => {}
other => panic!("expected IO yield at attached LogRecordPrepared; got {other:?}"),
}
}
conn.set_yield_injector(None);
assert!(
conn.get_mv_tx_id_for_db(aux1_db_id).is_none(),
"attached aux1 MVCC tx slot must be cleared"
);
assert!(
conn.get_mv_tx_id_for_db(aux2_db_id).is_none(),
"attached aux2 MVCC tx slot must be cleared"
);
assert!(
!aux1_mv_store.txs.contains_key(&aux1_tx_id),
"attached aux1 MVCC tx must be removed from txs"
);
assert!(
!aux2_mv_store.txs.contains_key(&aux2_tx_id),
"attached aux2 MVCC tx must be removed from txs"
);
assert!(
!aux1_pager.holds_read_lock(),
"attached aux1 pager read lock must be released"
);
assert!(
!aux2_pager.holds_read_lock(),
"attached aux2 pager read lock must be released"
);
}
#[test]
fn busy_from_log_tx_strands_pager_commit_lock_then_blocks_subsequent_commit() {
use crate::io::FileSyncType;
use crate::mvcc;
use crate::mvcc::database::LogRecord;
use crate::mvcc::persistent_storage::logical_log::{LogHeader, OnSerializationComplete};
use crate::mvcc::persistent_storage::DurableStorage;
use crate::storage::encryption::EncryptionContext;
use crate::{CheckpointResult, File, Result, IO};
use std::time::Duration;
#[derive(Debug)]
struct BusyOnLogTxStorage {
inner: Arc<dyn DurableStorage>,
arm_log_tx_busy: AtomicBool,
}
impl BusyOnLogTxStorage {
fn new(inner: Arc<dyn DurableStorage>) -> Arc<Self> {
Arc::new(Self {
inner,
arm_log_tx_busy: AtomicBool::new(false),
})
}
fn arm(&self) {
self.arm_log_tx_busy.store(true, Ordering::Release);
}
}
impl DurableStorage for BusyOnLogTxStorage {
fn log_tx(
&self,
m: &LogRecord,
c: OnSerializationComplete<'_>,
) -> Result<(Completion, u64)> {
if self.arm_log_tx_busy.swap(false, Ordering::AcqRel) {
return Err(LimboError::Busy);
}
self.inner.log_tx(m, c)
}
fn sync(&self, t: FileSyncType) -> Result<Completion> {
self.inner.sync(t)
}
fn update_header(&self) -> Result<Completion> {
self.inner.update_header()
}
fn truncate(&self) -> Result<Completion> {
self.inner.truncate()
}
fn get_logical_log_file(&self) -> Arc<dyn File> {
self.inner.get_logical_log_file()
}
fn should_checkpoint(&self) -> bool {
self.inner.should_checkpoint()
}
fn set_checkpoint_threshold(&self, t: i64) {
self.inner.set_checkpoint_threshold(t)
}
fn checkpoint_threshold(&self) -> i64 {
self.inner.checkpoint_threshold()
}
fn advance_logical_log_offset_after_success(&self, b: u64) {
self.inner.advance_logical_log_offset_after_success(b)
}
fn restore_logical_log_state_after_recovery(&self, o: u64, c: u32) {
self.inner.restore_logical_log_state_after_recovery(o, c)
}
fn set_header(&self, h: LogHeader) {
self.inner.set_header(h)
}
fn on_checkpoint_start(&self, m: u64) -> Result<()> {
self.inner.on_checkpoint_start(m)
}
fn on_checkpoint_end(&self, m: u64, r: Result<&CheckpointResult>) -> Result<()> {
self.inner.on_checkpoint_end(m, r)
}
fn encryption_ctx(&self) -> Option<EncryptionContext> {
self.inner.encryption_ctx()
}
}
fn drive_to_done_or_timeout(stmt: &mut Statement, budget: usize) {
for _ in 0..budget {
match stmt.step() {
Ok(StepResult::Done) => return,
Ok(StepResult::IO) => std::thread::sleep(Duration::from_millis(10)),
Ok(other) => panic!("unexpected step: {other:?}"),
Err(error) => panic!("received error: {error}"),
}
}
panic!("budged elapsed: {budget} iterations");
}
let temp_dir = tempfile::TempDir::new().unwrap();
let path = temp_dir
.path()
.join(format!("test_{}.db", rand::random::<u64>()));
let path_str = path.to_str().unwrap().to_string();
{
let io: Arc<dyn IO> = Arc::new(PlatformIO::new().unwrap());
let db = Database::open_file_with_flags(
io,
&path_str,
OpenFlags::default(),
DatabaseOpts::new(),
None,
)
.unwrap();
let conn = db.connect().unwrap();
conn.execute("PRAGMA journal_mode = 'mvcc'").unwrap();
conn.close().unwrap();
DATABASE_MANAGER.lock().clear();
}
let log_path = path.with_extension("db-log");
let io: Arc<dyn IO> = Arc::new(PlatformIO::new().unwrap());
let log_file = io
.open_file(log_path.to_str().unwrap(), OpenFlags::default(), false)
.unwrap();
let inner_storage: Arc<dyn DurableStorage> = Arc::new(mvcc::persistent_storage::Storage::new(
log_file,
io.clone(),
None,
));
let busy_storage = BusyOnLogTxStorage::new(inner_storage);
let db = Database::open_file_with_flags_and_durable_storage(
io,
&path_str,
OpenFlags::default(),
DatabaseOpts::new(),
None,
Some(busy_storage.clone() as Arc<dyn DurableStorage>),
)
.unwrap();
let conn_a = db.connect().unwrap();
let conn_b = db.connect().unwrap();
conn_a
.execute("CREATE TABLE t (id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
let mv_store: Arc<MvStore<MvccClock>> = db.get_mv_store().clone().unwrap();
conn_a.execute("BEGIN CONCURRENT").unwrap();
conn_a.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
let tx_a = conn_a
.get_mv_tx_id()
.expect("tx_a must be open after INSERT");
assert!(
!mv_store.is_exclusive_tx(&tx_a),
"tx_a must be CONCURRENT (non-exclusive) so it goes through BeginCommitLogicalLog"
);
busy_storage.arm();
conn_a
.execute("COMMIT")
.expect_err("COMMIT must surface the injected Busy from log_tx");
conn_b.execute("BEGIN CONCURRENT").unwrap();
conn_b.execute("INSERT INTO t VALUES (2, 'b')").unwrap();
let mut commit_b = conn_b.prepare("COMMIT").unwrap();
drive_to_done_or_timeout(&mut commit_b, 30); }
#[test]
fn test_dropped_commit_corrupts_subsequent_insert() {
let db = MvccTestDbNoConn::new_with_random_db();
{
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.close().unwrap();
}
let conn = db.connect();
conn.execute("BEGIN CONCURRENT").unwrap();
conn.execute("INSERT INTO t VALUES (1, 'first')").unwrap();
conn.set_yield_injector(Some(FixedYieldInjector::new([LogRecordPrepared.point()])));
{
let mut commit = conn.prepare("COMMIT").unwrap();
match commit.step().unwrap() {
StepResult::IO | StepResult::Done => {}
other => panic!("unexpected step result: {other:?}"),
};
}
conn.execute("INSERT INTO t VALUES (2, 'second')").unwrap();
}
#[test]
fn abandoned_exclusive_commit_should_not_block_subsequent_concurrent_writer() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn_a = db.connect();
let conn_b = db.connect();
conn_a
.execute("CREATE TABLE t (id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn_a.execute("BEGIN IMMEDIATE").unwrap();
conn_a.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
conn_a.set_yield_injector(Some(FixedYieldInjector::new([
CommitYieldPoint::LogRecordPrepared.point(),
])));
match conn_a.prepare("COMMIT").unwrap().step().unwrap() {
StepResult::IO => {} other => panic!("tx should yield, got: {other:?}"),
}
assert!(
matches!(conn_a.prepare("COMMIT").unwrap().step().err().unwrap(),
LimboError::TxError(msg) if msg == "cannot commit - no transaction is active")
);
conn_b.execute("BEGIN CONCURRENT").unwrap();
conn_b.execute("INSERT INTO t VALUES (2, 'b')").unwrap();
match conn_b.prepare("COMMIT").unwrap().step() {
Ok(StepResult::IO) => {}
Err(err) => panic!("conn_b COMMIT must not error; got: {err:?}"),
_ => {}
}
}
#[test]
fn abandoned_commit_in_committed_state_should_not_block_subsequent_checkpoint() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn_a = db.connect();
let conn_b = db.connect();
conn_a
.execute("CREATE TABLE t (id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn_a.execute("BEGIN IMMEDIATE").unwrap();
conn_a.execute("INSERT INTO t VALUES (1, 'a')").unwrap();
conn_a.set_yield_injector(Some(FixedYieldInjector::new([
CommitYieldPoint::BeforeFinishCommittedTx.point(),
])));
match conn_a.prepare("COMMIT").unwrap().step().unwrap() {
StepResult::IO => {}
other => panic!("tx should yield, got: {other:?}"),
}
let _ = conn_a.prepare("COMMIT").unwrap().step();
conn_b.execute("PRAGMA wal_checkpoint(TRUNCATE)").unwrap();
}
#[test]
fn test_concurrent_explicit_rowid_high_watermark_not_clobbered() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn0 = db.connect();
let conn1 = db.connect();
let conn2 = db.connect();
conn0
.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn1.execute("BEGIN CONCURRENT").unwrap();
conn1
.execute("INSERT INTO t(id, v) VALUES (1000, 'A-explicit')")
.unwrap();
conn2.execute("BEGIN CONCURRENT").unwrap();
conn2.execute("INSERT INTO t(v) VALUES ('B-auto')").unwrap();
conn2.execute("COMMIT").unwrap();
conn1.execute("COMMIT").unwrap();
let rows = get_rows(&conn0, "SELECT rowid, v FROM t ORDER BY rowid");
assert_eq!(rows.len(), 2);
assert_eq!(rows[0][0].as_int().unwrap(), 1000);
assert_eq!(rows[0][1].to_string(), "A-explicit");
assert_eq!(rows[1][0].as_int().unwrap(), 1001);
assert_eq!(rows[1][1].to_string(), "B-auto");
}
#[test]
fn test_concurrent_explicit_rowid_auto_rowid_does_not_walk_back_into_collision() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn0 = db.connect();
let conn1 = db.connect();
let conn2 = db.connect();
conn0
.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn1.execute("BEGIN CONCURRENT").unwrap();
conn1
.execute("INSERT INTO t(id, v) VALUES (5, 'A')")
.unwrap();
conn2.execute("BEGIN CONCURRENT").unwrap();
for i in 0..5 {
conn2
.execute(format!("INSERT INTO t(v) VALUES ('B{i}')"))
.unwrap();
}
conn2.execute("COMMIT").unwrap();
conn1
.execute("COMMIT")
.expect("explicit rowid transaction should not conflict with auto rowids");
let rows = get_rows(&conn0, "SELECT rowid, v FROM t ORDER BY rowid");
assert_eq!(rows.len(), 6);
assert_eq!(rows[0][0].as_int().unwrap(), 5);
assert_eq!(rows[0][1].to_string(), "A");
for i in 0..5 {
assert_eq!(rows[i + 1][0].as_int().unwrap(), 6 + i as i64);
assert_eq!(rows[i + 1][1].to_string(), format!("B{i}"));
}
}
#[test]
fn test_concurrent_explicit_rowid_preserves_auto_rowid_watermark() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn1 = db.connect();
let conn2 = db.connect();
conn1
.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT, k INTEGER)")
.unwrap();
conn1.execute("CREATE INDEX t_k ON t(k)").unwrap();
conn1.execute("BEGIN CONCURRENT").unwrap();
conn1
.execute("INSERT INTO t(id, v, k) VALUES (5, 'A', 999)")
.unwrap();
conn2.execute("BEGIN CONCURRENT").unwrap();
conn2
.execute("INSERT INTO t(v, k) VALUES ('B', 100)")
.unwrap();
conn2.execute("COMMIT").unwrap();
conn1.execute("COMMIT").unwrap();
for (v, k) in [("p2", 200), ("p3", 300), ("p4", 400), ("p5", 500)] {
conn1
.execute(format!("INSERT INTO t(v, k) VALUES ('{v}', {k})"))
.unwrap();
}
let integrity = get_rows(&conn1, "PRAGMA integrity_check");
assert_eq!(integrity.len(), 1);
assert_eq!(
integrity[0][0].to_string(),
"ok",
"integrity_check should not report stale secondary index entries"
);
let indexed = get_rows(
&conn1,
"SELECT rowid, v, k FROM t INDEXED BY t_k WHERE k = 999",
);
assert_eq!(indexed.len(), 1);
assert_eq!(indexed[0][0].as_int().unwrap(), 5);
assert_eq!(indexed[0][1].to_string(), "A");
assert_eq!(indexed[0][2].as_int().unwrap(), 999);
}
#[test]
fn test_auto_rowid_after_negative_explicit_rowid_uses_next_negative() {
let db = MvccTestDbNoConn::new_with_random_db();
let conn = db.connect();
conn.execute("CREATE TABLE t(id INTEGER PRIMARY KEY, v TEXT)")
.unwrap();
conn.execute("INSERT INTO t(id, v) VALUES(-5, 'manual')")
.unwrap();
conn.execute("INSERT INTO t(v) VALUES('auto')").unwrap();
let rows = get_rows(&conn, "SELECT id, v FROM t ORDER BY id");
assert_eq!(rows.len(), 2);
assert_eq!(rows[0][0].as_int().unwrap(), -5);
assert_eq!(rows[0][1].to_string(), "manual");
assert_eq!(rows[1][0].as_int().unwrap(), -4);
assert_eq!(rows[1][1].to_string(), "auto");
}