absurder_sql/storage/
export.rs

1//! Export and Import functionality for SQLite databases
2//!
3//! This module provides conversion between IndexedDB block storage and standard SQLite .db files.
4//!
5//! # Features
6//! - **Export**: Convert IndexedDB blocks to downloadable .db file
7//! - **Import**: Load .db file into IndexedDB storage
8//! - **Validation**: Verify SQLite file format integrity
9//!
10//! # Architecture
11//! The system works with 4096-byte blocks stored in IndexedDB. Export reads all allocated blocks
12//! and concatenates them into a standard SQLite file. Import splits a .db file into blocks and
13//! writes them to IndexedDB with proper metadata tracking.
14
15use crate::types::DatabaseError;
16use crate::storage::block_storage::BlockStorage;
17
18const BLOCK_SIZE: usize = 4096;
19/// Default maximum export size: 2GB
20/// 
21/// Rationale:
22/// - IndexedDB limits: 10GB (Firefox) to ~60% of disk (Chrome/Safari)
23/// - WASM/Browser memory: ~2-4GB per tab
24/// - Export requires loading entire DB into memory
25/// - 2GB provides safety margin while allowing large databases
26/// - Configurable via DatabaseConfig.max_export_size_bytes
27const DEFAULT_MAX_EXPORT_SIZE: u64 = 2 * 1024 * 1024 * 1024; // 2GB
28
29/// Default chunk size for streaming export: 10MB
30/// 
31/// For databases >100MB, export processes blocks in chunks of this size
32/// to reduce memory pressure and allow event loop yielding
33const DEFAULT_CHUNK_SIZE: u64 = 10 * 1024 * 1024; // 10MB
34
35/// Progress callback type for export operations
36/// 
37/// Parameters: (bytes_exported, total_bytes)
38pub type ProgressCallback = Box<dyn Fn(u64, u64) + Send + Sync>;
39
40/// Options for database export operations
41/// 
42/// Allows configuration of size limits, chunking behavior, and progress tracking
43#[derive(Default)]
44pub struct ExportOptions {
45    /// Maximum allowed database size (bytes). None for no limit.
46    /// Default: 2GB
47    pub max_size_bytes: Option<u64>,
48    
49    /// Chunk size for streaming large exports (bytes).
50    /// Export processes this many bytes at a time, yielding to event loop between chunks.
51    /// Default: 10MB
52    pub chunk_size_bytes: Option<u64>,
53    
54    /// Optional progress callback invoked after each chunk.
55    /// Called with (bytes_exported_so_far, total_bytes)
56    pub progress_callback: Option<ProgressCallback>,
57}
58
59/// SQLite file format constants
60const SQLITE_MAGIC: &[u8; 16] = b"SQLite format 3\0";
61const SQLITE_HEADER_SIZE: usize = 100;
62const PAGE_SIZE_OFFSET: usize = 16;
63const PAGE_COUNT_OFFSET: usize = 28;
64
65/// Minimum and maximum valid page sizes for SQLite
66const MIN_PAGE_SIZE: usize = 512;
67const MAX_PAGE_SIZE: usize = 65536;
68
69/// Parse SQLite database header to extract metadata
70///
71/// # Arguments
72/// * `header` - First 100+ bytes of SQLite database file
73///
74/// # Returns
75/// * `Ok((page_size, page_count))` - Database page size and number of pages
76/// * `Err(DatabaseError)` - If header is invalid or corrupted
77///
78/// # SQLite Header Format
79/// - Bytes 0-15: Magic string "SQLite format 3\0"
80/// - Bytes 16-17: Page size (big-endian u16), special case: 1 = 65536
81/// - Bytes 28-31: Page count (big-endian u32)
82///
83/// # Example
84/// ```rust,no_run
85/// use absurder_sql::storage::export::parse_sqlite_header;
86///
87/// let header_data: Vec<u8> = vec![/* ... header bytes ... */];
88/// match parse_sqlite_header(&header_data) {
89///     Ok((page_size, page_count)) => {
90///         println!("Database: {} pages of {} bytes", page_count, page_size);
91///     }
92///     Err(e) => eprintln!("Invalid header: {}", e),
93/// }
94/// ```
95pub fn parse_sqlite_header(data: &[u8]) -> Result<(usize, u32), DatabaseError> {
96    // Validate minimum header size
97    if data.len() < SQLITE_HEADER_SIZE {
98        return Err(DatabaseError::new(
99            "INVALID_HEADER",
100            &format!(
101                "Header too small: {} bytes (minimum {} required)",
102                data.len(),
103                SQLITE_HEADER_SIZE
104            ),
105        ));
106    }
107
108    // Validate magic string
109    if &data[0..16] != SQLITE_MAGIC {
110        let magic_str = String::from_utf8_lossy(&data[0..16]);
111        return Err(DatabaseError::new(
112            "INVALID_SQLITE_FILE",
113            &format!(
114                "Invalid SQLite magic string. Expected 'SQLite format 3', got: '{}'",
115                magic_str
116            ),
117        ));
118    }
119
120    // Extract page size (big-endian u16 at bytes 16-17)
121    let page_size_raw = u16::from_be_bytes([data[PAGE_SIZE_OFFSET], data[PAGE_SIZE_OFFSET + 1]]);
122
123    // Handle special case: page_size == 1 means 65536
124    let page_size = if page_size_raw == 1 {
125        65536
126    } else {
127        page_size_raw as usize
128    };
129
130    // Validate page size is a power of 2 between 512 and 65536
131    if page_size < 512 || page_size > 65536 || !page_size.is_power_of_two() {
132        return Err(DatabaseError::new(
133            "INVALID_PAGE_SIZE",
134            &format!(
135                "Invalid page size: {}. Must be power of 2 between 512 and 65536",
136                page_size
137            ),
138        ));
139    }
140
141    // Extract page count (big-endian u32 at bytes 28-31)
142    let page_count = u32::from_be_bytes([
143        data[PAGE_COUNT_OFFSET],
144        data[PAGE_COUNT_OFFSET + 1],
145        data[PAGE_COUNT_OFFSET + 2],
146        data[PAGE_COUNT_OFFSET + 3],
147    ]);
148
149    log::debug!(
150        "Parsed SQLite header: page_size={}, page_count={}",
151        page_size,
152        page_count
153    );
154
155    Ok((page_size, page_count))
156}
157
158/// Validate export size against configured limit
159///
160/// Checks if the database size exceeds the maximum allowed export size.
161/// This prevents out-of-memory errors when exporting very large databases.
162///
163/// # Arguments
164/// * `size_bytes` - Size of the database in bytes
165/// * `max_size_bytes` - Maximum allowed size (None for default 500MB)
166///
167/// # Returns
168/// * `Ok(())` - Size is within limits
169/// * `Err(DatabaseError)` - Size exceeds limit
170///
171/// # Default Limit
172/// If `max_size_bytes` is None, defaults to 2GB (2,147,483,648 bytes).
173/// This balances IndexedDB capacity (10GB+) with browser memory limits (~2-4GB per tab).
174///
175/// # Example
176/// ```rust,no_run
177/// use absurder_sql::storage::export::validate_export_size;
178///
179/// // Use default 2GB limit
180/// validate_export_size(100_000_000, None).unwrap();
181///
182/// // Use custom 5GB limit
183/// validate_export_size(3_000_000_000, Some(5 * 1024 * 1024 * 1024)).unwrap();
184/// ```
185pub fn validate_export_size(
186    size_bytes: u64,
187    max_size_bytes: Option<u64>,
188) -> Result<(), DatabaseError> {
189    let limit = max_size_bytes.unwrap_or(DEFAULT_MAX_EXPORT_SIZE);
190    
191    if size_bytes > limit {
192        let size_mb = size_bytes as f64 / (1024.0 * 1024.0);
193        let limit_mb = limit as f64 / (1024.0 * 1024.0);
194        
195        return Err(DatabaseError::new(
196            "DATABASE_TOO_LARGE",
197            &format!(
198                "Database too large for export: {:.2} MB exceeds limit of {:.2} MB. \
199                Consider increasing max_export_size_bytes in DatabaseConfig or exporting in smaller chunks.",
200                size_mb, limit_mb
201            ),
202        ));
203    }
204    
205    Ok(())
206}
207
208/// Validate SQLite database file format
209///
210/// Performs comprehensive validation of a SQLite database file to ensure it can
211/// be safely imported. Checks file structure, magic string, page size validity,
212/// and size consistency.
213///
214/// # Arguments
215/// * `data` - Complete SQLite database file as bytes
216///
217/// # Returns
218/// * `Ok(())` - File is valid and safe to import
219/// * `Err(DatabaseError)` - File is invalid with detailed error message
220///
221/// # Validation Checks
222/// - File size is at least 100 bytes (minimum header size)
223/// - Magic string matches "SQLite format 3\0"
224/// - Page size is valid (power of 2, between 512 and 65536)
225/// - Page count is non-zero
226/// - File size matches (page_size × page_count)
227///
228/// # Example
229/// ```rust,no_run
230/// use absurder_sql::storage::export::validate_sqlite_file;
231///
232/// let file_data = std::fs::read("database.db").unwrap();
233/// match validate_sqlite_file(&file_data) {
234///     Ok(()) => println!("Valid SQLite file"),
235///     Err(e) => eprintln!("Invalid file: {}", e),
236/// }
237/// ```
238pub fn validate_sqlite_file(data: &[u8]) -> Result<(), DatabaseError> {
239    // Check minimum file size
240    if data.len() < SQLITE_HEADER_SIZE {
241        return Err(DatabaseError::new(
242            "INVALID_SQLITE_FILE",
243            &format!(
244                "File too small: {} bytes (minimum {} required)",
245                data.len(),
246                SQLITE_HEADER_SIZE
247            ),
248        ));
249    }
250
251    // Validate magic string
252    if &data[0..16] != SQLITE_MAGIC {
253        let magic_str = String::from_utf8_lossy(&data[0..16]);
254        return Err(DatabaseError::new(
255            "INVALID_SQLITE_FILE",
256            &format!(
257                "Invalid SQLite magic string. Expected 'SQLite format 3', got: '{}'",
258                magic_str.trim_end_matches('\0')
259            ),
260        ));
261    }
262
263    // Parse page size
264    let page_size_raw = u16::from_be_bytes([data[PAGE_SIZE_OFFSET], data[PAGE_SIZE_OFFSET + 1]]);
265    let page_size = if page_size_raw == 1 {
266        65536
267    } else {
268        page_size_raw as usize
269    };
270
271    // Validate page size is power of 2 and within valid range
272    if page_size < MIN_PAGE_SIZE || page_size > MAX_PAGE_SIZE {
273        return Err(DatabaseError::new(
274            "INVALID_PAGE_SIZE",
275            &format!(
276                "Invalid page size: {}. Must be between {} and {}",
277                page_size, MIN_PAGE_SIZE, MAX_PAGE_SIZE
278            ),
279        ));
280    }
281
282    if !page_size.is_power_of_two() {
283        return Err(DatabaseError::new(
284            "INVALID_PAGE_SIZE",
285            &format!(
286                "Invalid page size: {}. Must be a power of 2",
287                page_size
288            ),
289        ));
290    }
291
292    // Parse page count
293    let page_count = u32::from_be_bytes([
294        data[PAGE_COUNT_OFFSET],
295        data[PAGE_COUNT_OFFSET + 1],
296        data[PAGE_COUNT_OFFSET + 2],
297        data[PAGE_COUNT_OFFSET + 3],
298    ]);
299
300    // Validate page count is non-zero
301    if page_count == 0 {
302        return Err(DatabaseError::new(
303            "INVALID_PAGE_COUNT",
304            "Invalid page count: 0. Database must have at least one page",
305        ));
306    }
307
308    // Validate file size matches header information
309    let expected_size = (page_size as u64) * (page_count as u64);
310    let actual_size = data.len() as u64;
311
312    if actual_size != expected_size {
313        return Err(DatabaseError::new(
314            "SIZE_MISMATCH",
315            &format!(
316                "File size mismatch: expected {} bytes ({} pages × {} bytes), got {} bytes",
317                expected_size, page_count, page_size, actual_size
318            ),
319        ));
320    }
321
322    log::debug!(
323        "SQLite file validation passed: {} pages × {} bytes = {} bytes",
324        page_count,
325        page_size,
326        expected_size
327    );
328
329    Ok(())
330}
331
332/// Export database from BlockStorage to SQLite .db file format
333///
334/// Reads all allocated blocks from storage and concatenates them into a standard
335/// SQLite database file that can be opened by any SQLite client.
336///
337/// # Arguments
338/// * `storage` - BlockStorage instance containing the database blocks
339///
340/// # Returns
341/// * `Ok(Vec<u8>)` - Complete SQLite database file as bytes
342/// * `Err(DatabaseError)` - If export fails
343///
344/// # Process
345/// 1. Sync storage to ensure all changes are persisted
346/// 2. Read block 0 (header) to determine database size
347/// 3. Read all allocated blocks
348/// 4. Concatenate blocks and truncate to exact database size
349///
350/// # Example
351/// ```rust,no_run
352/// use absurder_sql::storage::export::export_database_to_bytes;
353/// use absurder_sql::storage::BlockStorage;
354///
355/// async fn export_example(mut storage: BlockStorage) -> Result<Vec<u8>, absurder_sql::types::DatabaseError> {
356///     // Export with default 2GB limit
357///     let db_bytes = export_database_to_bytes(&mut storage, None).await?;
358///     // Save db_bytes to file or send to browser for download
359///     Ok(db_bytes)
360/// }
361/// ```
362pub async fn export_database_to_bytes(
363    storage: &mut BlockStorage,
364    max_size_bytes: Option<u64>,
365) -> Result<Vec<u8>, DatabaseError> {
366    log::info!("Starting database export");
367
368    // Force sync to ensure all data is persisted
369    storage.sync().await?;
370
371    // Read first block to get header
372    log::debug!("Reading block 0 for header");
373    let header_block = storage.read_block(0).await?;
374    log::debug!("Block 0 size: {} bytes, first 16 bytes: {:?}", 
375                header_block.len(), 
376                &header_block.get(0..16).unwrap_or(&[]));
377
378    // Parse header to determine database size
379    let (page_size, page_count) = parse_sqlite_header(&header_block)?;
380
381    // Calculate total database size
382    let total_db_size = (page_size as u64) * (page_count as u64);
383    
384    // Validate size doesn't exceed maximum
385    validate_export_size(total_db_size, max_size_bytes)?;
386    
387    // Warn if database is large (>100MB)
388    const MB_100: u64 = 100 * 1024 * 1024;
389    if total_db_size > MB_100 {
390        log::warn!(
391            "Exporting large database: {} bytes ({:.2} MB). This may consume significant memory.",
392            total_db_size,
393            total_db_size as f64 / (1024.0 * 1024.0)
394        );
395    }
396
397    log::info!(
398        "Export: page_size={}, page_count={}, total_size={}",
399        page_size,
400        page_count,
401        total_db_size
402    );
403    let total_blocks = ((total_db_size + BLOCK_SIZE as u64 - 1) / BLOCK_SIZE as u64) as u64;
404
405    // Build list of block IDs to read
406    let block_ids: Vec<u64> = (0..total_blocks).collect();
407
408    log::debug!("Reading {} blocks for export", block_ids.len());
409
410    // Read all blocks at once
411    let blocks = storage.read_blocks(&block_ids).await?;
412
413    // Concatenate all blocks
414    let mut result = Vec::with_capacity(total_db_size as usize);
415    for block in blocks {
416        result.extend_from_slice(&block);
417    }
418
419    // Truncate to exact database size
420    result.truncate(total_db_size as usize);
421
422    log::info!("Export complete: {} bytes", result.len());
423
424    Ok(result)
425}
426
427/// Export database with advanced options (streaming, progress callbacks)
428///
429/// For large databases (>100MB), this function processes blocks in chunks,
430/// yields to the event loop between chunks, and reports progress.
431///
432/// # Arguments
433/// * `storage` - Block storage containing the database
434/// * `options` - Export configuration (size limits, chunk size, progress callback)
435///
436/// # Returns
437/// Complete database as bytes
438///
439/// # Example
440/// ```rust,no_run
441/// use absurder_sql::storage::export::{export_database_with_options, ExportOptions};
442/// use absurder_sql::storage::BlockStorage;
443///
444/// async fn export_with_progress(mut storage: BlockStorage) -> Result<Vec<u8>, absurder_sql::types::DatabaseError> {
445///     let options = ExportOptions {
446///         max_size_bytes: Some(1024 * 1024 * 1024), // 1GB limit
447///         chunk_size_bytes: Some(10 * 1024 * 1024), // 10MB chunks
448///         progress_callback: Some(Box::new(|exported, total| {
449///             println!("Progress: {}/{} bytes ({:.1}%)", 
450///                 exported, total, (exported as f64 / total as f64) * 100.0);
451///         })),
452///     };
453///     export_database_with_options(&mut storage, options).await
454/// }
455/// ```
456pub async fn export_database_with_options(
457    storage: &mut BlockStorage,
458    options: ExportOptions,
459) -> Result<Vec<u8>, DatabaseError> {
460    log::info!("Starting streaming database export");
461
462    // Force sync to ensure all data is persisted
463    storage.sync().await?;
464
465    // Read first block to get header
466    log::debug!("Reading block 0 for header");
467    let header_block = storage.read_block(0).await?;
468
469    // Parse header to determine database size
470    let (page_size, page_count) = parse_sqlite_header(&header_block)?;
471    let total_db_size = (page_size as u64) * (page_count as u64);
472    
473    // Validate size doesn't exceed maximum
474    validate_export_size(total_db_size, options.max_size_bytes)?;
475    
476    // Warn if database is large (>100MB)
477    const MB_100: u64 = 100 * 1024 * 1024;
478    if total_db_size > MB_100 {
479        log::warn!(
480            "Exporting large database: {} bytes ({:.2} MB). Using streaming export with chunks.",
481            total_db_size,
482            total_db_size as f64 / (1024.0 * 1024.0)
483        );
484    }
485
486    log::info!(
487        "Export: page_size={}, page_count={}, total_size={}",
488        page_size,
489        page_count,
490        total_db_size
491    );
492
493    let total_blocks = ((total_db_size + BLOCK_SIZE as u64 - 1) / BLOCK_SIZE as u64) as u64;
494    let chunk_size = options.chunk_size_bytes.unwrap_or(DEFAULT_CHUNK_SIZE);
495    let blocks_per_chunk = (chunk_size / BLOCK_SIZE as u64).max(1);
496
497    // Preallocate result vector
498    let mut result = Vec::with_capacity(total_db_size as usize);
499
500    // Process blocks in chunks
501    for chunk_start in (0..total_blocks).step_by(blocks_per_chunk as usize) {
502        let chunk_end = (chunk_start + blocks_per_chunk).min(total_blocks);
503        let block_ids: Vec<u64> = (chunk_start..chunk_end).collect();
504
505        log::debug!("Reading blocks {}-{} ({} blocks)", chunk_start, chunk_end - 1, block_ids.len());
506
507        // Read chunk of blocks
508        let blocks = storage.read_blocks(&block_ids).await?;
509
510        // Concatenate blocks in this chunk
511        for block in blocks {
512            result.extend_from_slice(&block);
513        }
514
515        let bytes_exported = result.len() as u64;
516
517        // Invoke progress callback if provided
518        if let Some(ref callback) = options.progress_callback {
519            callback(bytes_exported.min(total_db_size), total_db_size);
520        }
521
522        // Yield to event loop between chunks to prevent blocking
523        #[cfg(target_arch = "wasm32")]
524        {
525            // In WASM, yield to browser event loop
526            wasm_bindgen_futures::JsFuture::from(js_sys::Promise::resolve(&wasm_bindgen::JsValue::NULL))
527                .await
528                .ok();
529        }
530        #[cfg(not(target_arch = "wasm32"))]
531        {
532            // In native, yield to tokio runtime
533            tokio::task::yield_now().await;
534        }
535    }
536
537    // Truncate to exact database size
538    result.truncate(total_db_size as usize);
539
540    // Final progress callback
541    if let Some(ref callback) = options.progress_callback {
542        callback(total_db_size, total_db_size);
543    }
544
545    log::info!("Streaming export complete: {} bytes", result.len());
546
547    Ok(result)
548}
549
550/// Streaming export with basic parameters (convenience wrapper)
551///
552/// Simplified interface for streaming export with progress callback.
553/// For full control, use `export_database_with_options`.
554///
555/// # Arguments
556/// * `storage` - Block storage containing the database
557/// * `max_size_bytes` - Maximum allowed size (None for default 2GB)
558/// * `chunk_size_bytes` - Chunk size for streaming (None for default 10MB)
559/// * `progress_callback` - Optional progress callback
560///
561/// # Example
562/// ```rust,no_run
563/// use absurder_sql::storage::export::export_database_to_bytes_streaming;
564/// use absurder_sql::storage::BlockStorage;
565///
566/// async fn export_example(mut storage: BlockStorage) -> Result<Vec<u8>, absurder_sql::types::DatabaseError> {
567///     let progress = Box::new(|exported: u64, total: u64| {
568///         println!("Exported {}/{} bytes", exported, total);
569///     });
570///     
571///     export_database_to_bytes_streaming(
572///         &mut storage,
573///         None,
574///         Some(10 * 1024 * 1024), // 10MB chunks
575///         Some(progress)
576///     ).await
577/// }
578/// ```
579pub async fn export_database_to_bytes_streaming(
580    storage: &mut BlockStorage,
581    max_size_bytes: Option<u64>,
582    chunk_size_bytes: Option<u64>,
583    progress_callback: Option<ProgressCallback>,
584) -> Result<Vec<u8>, DatabaseError> {
585    let options = ExportOptions {
586        max_size_bytes,
587        chunk_size_bytes,
588        progress_callback,
589    };
590    
591    export_database_with_options(storage, options).await
592}
593
594#[cfg(test)]
595mod tests {
596    use super::*;
597
598    #[test]
599    fn test_sqlite_magic_constant() {
600        assert_eq!(SQLITE_MAGIC.len(), 16);
601        assert_eq!(&SQLITE_MAGIC[0..14], b"SQLite format ");
602    }
603
604    #[test]
605    fn test_header_size_constant() {
606        assert_eq!(SQLITE_HEADER_SIZE, 100);
607    }
608
609    #[test]
610    fn test_page_size_offset() {
611        assert_eq!(PAGE_SIZE_OFFSET, 16);
612    }
613
614    #[test]
615    fn test_page_count_offset() {
616        assert_eq!(PAGE_COUNT_OFFSET, 28);
617    }
618}