use crate::storage::block_storage::BlockStorage;
use crate::types::DatabaseError;
const BLOCK_SIZE: usize = 4096;
const DEFAULT_MAX_EXPORT_SIZE: u64 = 2 * 1024 * 1024 * 1024;
const DEFAULT_CHUNK_SIZE: u64 = 10 * 1024 * 1024;
pub type ProgressCallback = Box<dyn Fn(u64, u64) + Send + Sync>;
#[derive(Default)]
pub struct ExportOptions {
pub max_size_bytes: Option<u64>,
pub chunk_size_bytes: Option<u64>,
pub progress_callback: Option<ProgressCallback>,
}
const SQLITE_MAGIC: &[u8; 16] = b"SQLite format 3\0";
const SQLITE_HEADER_SIZE: usize = 100;
const PAGE_SIZE_OFFSET: usize = 16;
const PAGE_COUNT_OFFSET: usize = 28;
const MIN_PAGE_SIZE: usize = 512;
const MAX_PAGE_SIZE: usize = 65536;
pub fn parse_sqlite_header(data: &[u8]) -> Result<(usize, u32), DatabaseError> {
if data.len() < SQLITE_HEADER_SIZE {
return Err(DatabaseError::new(
"INVALID_HEADER",
&format!(
"Header too small: {} bytes (minimum {} required)",
data.len(),
SQLITE_HEADER_SIZE
),
));
}
if &data[0..16] != SQLITE_MAGIC {
let magic_str = String::from_utf8_lossy(&data[0..16]);
return Err(DatabaseError::new(
"INVALID_SQLITE_FILE",
&format!(
"Invalid SQLite magic string. Expected 'SQLite format 3', got: '{}'",
magic_str
),
));
}
let page_size_raw = u16::from_be_bytes([data[PAGE_SIZE_OFFSET], data[PAGE_SIZE_OFFSET + 1]]);
let page_size = if page_size_raw == 1 {
65536
} else {
page_size_raw as usize
};
if !(512..=65536).contains(&page_size) || !page_size.is_power_of_two() {
return Err(DatabaseError::new(
"INVALID_PAGE_SIZE",
&format!(
"Invalid page size: {}. Must be power of 2 between 512 and 65536",
page_size
),
));
}
let page_count = u32::from_be_bytes([
data[PAGE_COUNT_OFFSET],
data[PAGE_COUNT_OFFSET + 1],
data[PAGE_COUNT_OFFSET + 2],
data[PAGE_COUNT_OFFSET + 3],
]);
log::debug!(
"Parsed SQLite header: page_size={}, page_count={}",
page_size,
page_count
);
Ok((page_size, page_count))
}
pub fn validate_export_size(
size_bytes: u64,
max_size_bytes: Option<u64>,
) -> Result<(), DatabaseError> {
let limit = max_size_bytes.unwrap_or(DEFAULT_MAX_EXPORT_SIZE);
if size_bytes > limit {
let size_mb = size_bytes as f64 / (1024.0 * 1024.0);
let limit_mb = limit as f64 / (1024.0 * 1024.0);
return Err(DatabaseError::new(
"DATABASE_TOO_LARGE",
&format!(
"Database too large for export: {:.2} MB exceeds limit of {:.2} MB. \
Consider increasing max_export_size_bytes in DatabaseConfig or exporting in smaller chunks.",
size_mb, limit_mb
),
));
}
Ok(())
}
pub fn validate_sqlite_file(data: &[u8]) -> Result<(), DatabaseError> {
if data.len() < SQLITE_HEADER_SIZE {
return Err(DatabaseError::new(
"INVALID_SQLITE_FILE",
&format!(
"File too small: {} bytes (minimum {} required)",
data.len(),
SQLITE_HEADER_SIZE
),
));
}
if &data[0..16] != SQLITE_MAGIC {
let magic_str = String::from_utf8_lossy(&data[0..16]);
return Err(DatabaseError::new(
"INVALID_SQLITE_FILE",
&format!(
"Invalid SQLite magic string. Expected 'SQLite format 3', got: '{}'",
magic_str.trim_end_matches('\0')
),
));
}
let page_size_raw = u16::from_be_bytes([data[PAGE_SIZE_OFFSET], data[PAGE_SIZE_OFFSET + 1]]);
let page_size = if page_size_raw == 1 {
65536
} else {
page_size_raw as usize
};
if !(MIN_PAGE_SIZE..=MAX_PAGE_SIZE).contains(&page_size) {
return Err(DatabaseError::new(
"INVALID_PAGE_SIZE",
&format!(
"Invalid page size: {}. Must be between {} and {}",
page_size, MIN_PAGE_SIZE, MAX_PAGE_SIZE
),
));
}
if !page_size.is_power_of_two() {
return Err(DatabaseError::new(
"INVALID_PAGE_SIZE",
&format!("Invalid page size: {}. Must be a power of 2", page_size),
));
}
let page_count = u32::from_be_bytes([
data[PAGE_COUNT_OFFSET],
data[PAGE_COUNT_OFFSET + 1],
data[PAGE_COUNT_OFFSET + 2],
data[PAGE_COUNT_OFFSET + 3],
]);
if page_count == 0 {
return Err(DatabaseError::new(
"INVALID_PAGE_COUNT",
"Invalid page count: 0. Database must have at least one page",
));
}
let expected_size = (page_size as u64) * (page_count as u64);
let actual_size = data.len() as u64;
if actual_size != expected_size {
return Err(DatabaseError::new(
"SIZE_MISMATCH",
&format!(
"File size mismatch: expected {} bytes ({} pages × {} bytes), got {} bytes",
expected_size, page_count, page_size, actual_size
),
));
}
log::debug!(
"SQLite file validation passed: {} pages × {} bytes = {} bytes",
page_count,
page_size,
expected_size
);
Ok(())
}
#[cfg(target_arch = "wasm32")]
pub async fn export_database_to_bytes(
storage: &BlockStorage,
max_size_bytes: Option<u64>,
) -> Result<Vec<u8>, DatabaseError> {
export_database_to_bytes_impl(storage, max_size_bytes).await
}
#[cfg(not(target_arch = "wasm32"))]
pub async fn export_database_to_bytes(
storage: &mut BlockStorage,
max_size_bytes: Option<u64>,
) -> Result<Vec<u8>, DatabaseError> {
export_database_to_bytes_impl(storage, max_size_bytes).await
}
#[allow(invalid_reference_casting)]
async fn export_database_to_bytes_impl(
storage: &BlockStorage,
max_size_bytes: Option<u64>,
) -> Result<Vec<u8>, DatabaseError> {
log::info!("Starting database export");
log::debug!("Reading block 0 for header");
let header_block = storage.read_block(0).await?;
log::debug!(
"Block 0 size: {} bytes, first 16 bytes: {:?}",
header_block.len(),
&header_block.get(0..16).unwrap_or(&[])
);
let (page_size, page_count) = parse_sqlite_header(&header_block)?;
let total_db_size = (page_size as u64) * (page_count as u64);
validate_export_size(total_db_size, max_size_bytes)?;
const MB_100: u64 = 100 * 1024 * 1024;
if total_db_size > MB_100 {
log::warn!(
"Exporting large database: {} bytes ({:.2} MB). This may consume significant memory.",
total_db_size,
total_db_size as f64 / (1024.0 * 1024.0)
);
}
log::info!(
"Export: page_size={}, page_count={}, total_size={}",
page_size,
page_count,
total_db_size
);
let total_blocks = total_db_size.div_ceil(BLOCK_SIZE as u64);
let block_ids: Vec<u64> = (0..total_blocks).collect();
log::debug!("Reading {} blocks for export", block_ids.len());
#[cfg(target_arch = "wasm32")]
{
use crate::storage::vfs_sync::with_global_storage;
with_global_storage(|storage_map| {
if let Some(db_storage) = storage_map.borrow().get(storage.get_db_name()) {
web_sys::console::log_1(
&format!("[EXPORT] GLOBAL_STORAGE has {} blocks", db_storage.len()).into(),
);
web_sys::console::log_1(
&format!(
"[EXPORT] Block IDs in GLOBAL_STORAGE: {:?}",
db_storage.keys().collect::<Vec<_>>()
)
.into(),
);
}
});
web_sys::console::log_1(
&format!(
"[EXPORT] Requesting {} blocks: {:?}",
block_ids.len(),
block_ids
)
.into(),
);
}
let blocks = storage.read_blocks(&block_ids).await?;
#[cfg(target_arch = "wasm32")]
web_sys::console::log_1(&format!("[EXPORT] Actually read {} blocks", blocks.len()).into());
let mut result = Vec::with_capacity(total_db_size as usize);
for (i, block) in blocks.iter().enumerate() {
result.extend_from_slice(block);
#[cfg(target_arch = "wasm32")]
if i < 5 {
web_sys::console::log_1(
&format!(
"[EXPORT] Block {} has {} bytes, first 16: {:02x?}",
i,
block.len(),
&block[..16.min(block.len())]
)
.into(),
);
}
#[cfg(not(target_arch = "wasm32"))]
let _ = i; }
result.truncate(total_db_size as usize);
log::info!("Export complete: {} bytes", result.len());
#[cfg(target_arch = "wasm32")]
{
web_sys::console::log_1(&format!("[EXPORT] Final result: {} bytes", result.len()).into());
if result.len() >= 100 {
web_sys::console::log_1(
&format!("[EXPORT] Header bytes 28-39: {:02x?}", &result[28..40]).into(),
);
web_sys::console::log_1(
&format!("[EXPORT] Header bytes 40-60: {:02x?}", &result[40..60]).into(),
);
let largest_root_page =
u32::from_be_bytes([result[52], result[53], result[54], result[55]]);
web_sys::console::log_1(
&format!(
"[EXPORT] Largest root b-tree page (bytes 52-55): {}",
largest_root_page
)
.into(),
);
}
}
Ok(result)
}
#[cfg(target_arch = "wasm32")]
pub async fn export_database_with_options(
storage: &BlockStorage,
options: ExportOptions,
) -> Result<Vec<u8>, DatabaseError> {
export_database_with_options_impl(storage, options).await
}
#[cfg(not(target_arch = "wasm32"))]
pub async fn export_database_with_options(
storage: &mut BlockStorage,
options: ExportOptions,
) -> Result<Vec<u8>, DatabaseError> {
export_database_with_options_impl(storage, options).await
}
#[allow(invalid_reference_casting)]
async fn export_database_with_options_impl(
storage: &BlockStorage,
options: ExportOptions,
) -> Result<Vec<u8>, DatabaseError> {
log::info!("Starting streaming database export");
#[cfg(target_arch = "wasm32")]
storage.sync().await?;
#[cfg(not(target_arch = "wasm32"))]
{
let storage_mut = unsafe { &mut *(storage as *const _ as *mut BlockStorage) };
storage_mut.sync().await?;
}
log::debug!("Reading block 0 for header");
let header_block = storage.read_block(0).await?;
let (page_size, page_count) = parse_sqlite_header(&header_block)?;
let total_db_size = (page_size as u64) * (page_count as u64);
validate_export_size(total_db_size, options.max_size_bytes)?;
const MB_100: u64 = 100 * 1024 * 1024;
if total_db_size > MB_100 {
log::warn!(
"Exporting large database: {} bytes ({:.2} MB). Using streaming export with chunks.",
total_db_size,
total_db_size as f64 / (1024.0 * 1024.0)
);
}
log::info!(
"Export: page_size={}, page_count={}, total_size={}",
page_size,
page_count,
total_db_size
);
let total_blocks = total_db_size.div_ceil(BLOCK_SIZE as u64);
let chunk_size = options.chunk_size_bytes.unwrap_or(DEFAULT_CHUNK_SIZE);
let blocks_per_chunk = (chunk_size / BLOCK_SIZE as u64).max(1);
let mut result = Vec::with_capacity(total_db_size as usize);
for chunk_start in (0..total_blocks).step_by(blocks_per_chunk as usize) {
let chunk_end = (chunk_start + blocks_per_chunk).min(total_blocks);
let block_ids: Vec<u64> = (chunk_start..chunk_end).collect();
log::debug!(
"Reading blocks {}-{} ({} blocks)",
chunk_start,
chunk_end - 1,
block_ids.len()
);
let blocks = storage.read_blocks(&block_ids).await?;
for block in blocks {
result.extend_from_slice(&block);
}
let bytes_exported = result.len() as u64;
if let Some(ref callback) = options.progress_callback {
callback(bytes_exported.min(total_db_size), total_db_size);
}
#[cfg(target_arch = "wasm32")]
{
wasm_bindgen_futures::JsFuture::from(js_sys::Promise::resolve(
&wasm_bindgen::JsValue::NULL,
))
.await
.ok();
}
#[cfg(not(target_arch = "wasm32"))]
{
tokio::task::yield_now().await;
}
}
result.truncate(total_db_size as usize);
if let Some(ref callback) = options.progress_callback {
callback(total_db_size, total_db_size);
}
log::info!("Streaming export complete: {} bytes", result.len());
Ok(result)
}
#[cfg(target_arch = "wasm32")]
pub async fn export_database_to_bytes_streaming(
storage: &BlockStorage,
max_size_bytes: Option<u64>,
chunk_size_bytes: Option<u64>,
progress_callback: Option<ProgressCallback>,
) -> Result<Vec<u8>, DatabaseError> {
let options = ExportOptions {
max_size_bytes,
chunk_size_bytes,
progress_callback,
};
export_database_with_options(storage, options).await
}
#[cfg(not(target_arch = "wasm32"))]
pub async fn export_database_to_bytes_streaming(
storage: &mut BlockStorage,
max_size_bytes: Option<u64>,
chunk_size_bytes: Option<u64>,
progress_callback: Option<ProgressCallback>,
) -> Result<Vec<u8>, DatabaseError> {
let options = ExportOptions {
max_size_bytes,
chunk_size_bytes,
progress_callback,
};
export_database_with_options(storage, options).await
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sqlite_magic_constant() {
assert_eq!(SQLITE_MAGIC.len(), 16);
assert_eq!(&SQLITE_MAGIC[0..14], b"SQLite format ");
}
#[test]
fn test_header_size_constant() {
assert_eq!(SQLITE_HEADER_SIZE, 100);
}
#[test]
fn test_page_size_offset() {
assert_eq!(PAGE_SIZE_OFFSET, 16);
}
#[test]
fn test_page_count_offset() {
assert_eq!(PAGE_COUNT_OFFSET, 28);
}
}