absurder_sql/storage/
import.rs

1//! Import functionality for SQLite databases
2//!
3//! This module handles importing SQLite .db files into the block-based storage system.
4
5use crate::types::DatabaseError;
6use super::block_storage::BLOCK_SIZE;
7use super::export::validate_sqlite_file;
8
9/// Clear all storage data for a specific database
10///
11/// Removes all blocks, metadata, commit markers, and allocation maps for the specified
12/// database from global storage. This is a destructive operation used primarily before
13/// importing a new database.
14///
15/// # Arguments
16/// * `db_name` - Name of the database to clear
17///
18/// # Returns
19/// * `Ok(())` - Storage cleared successfully
20/// * `Err(DatabaseError)` - If clearing fails
21///
22/// # Safety
23/// This operation clears data from global storage but does not affect:
24/// - Open database connections (they may still reference cleared data)
25/// - IndexedDB persistence (for WASM, requires separate clearing)
26///
27/// # Example
28/// ```rust,no_run
29/// use absurder_sql::storage::import::clear_database_storage;
30///
31/// # async fn example() -> Result<(), absurder_sql::types::DatabaseError> {
32/// // Clear all data for "mydb"
33/// clear_database_storage("mydb").await?;
34/// # Ok(())
35/// # }
36/// ```
37pub async fn clear_database_storage(db_name: &str) -> Result<(), DatabaseError> {
38    use super::vfs_sync::{
39        with_global_storage, 
40        with_global_commit_marker,
41        with_global_allocation_map
42    };
43    
44    log::info!("Clearing storage for database: {}", db_name);
45    
46    // Clear GLOBAL_STORAGE blocks
47    with_global_storage(|gs| {
48        let mut storage = gs.borrow_mut();
49        if let Some(blocks) = storage.get_mut(db_name) {
50            let count = blocks.len();
51            blocks.clear();
52            log::debug!("Cleared {} blocks from GLOBAL_STORAGE for {}", count, db_name);
53        }
54        // Remove the database entry entirely
55        storage.remove(db_name);
56    });
57    
58    // Clear metadata - platform specific
59    #[cfg(target_arch = "wasm32")]
60    {
61        use super::vfs_sync::with_global_metadata;
62        with_global_metadata(|gm| {
63            let mut metadata = gm.borrow_mut();
64            if let Some(meta) = metadata.get_mut(db_name) {
65                let count = meta.len();
66                meta.clear();
67                log::debug!("Cleared {} metadata entries for {} (WASM)", count, db_name);
68            }
69            metadata.remove(db_name);
70        });
71    }
72    
73    #[cfg(all(not(target_arch = "wasm32"), any(test, debug_assertions), not(feature = "fs_persist")))]
74    {
75        use super::block_storage::GLOBAL_METADATA_TEST;
76        GLOBAL_METADATA_TEST.with(|gm| {
77            let mut metadata = gm.borrow_mut();
78            if let Some(meta) = metadata.get_mut(db_name) {
79                let count = meta.len();
80                meta.clear();
81                log::debug!("Cleared {} metadata entries from GLOBAL_METADATA_TEST for {} (native test)", count, db_name);
82            }
83            metadata.remove(db_name);
84        });
85    }
86    
87    // Reset GLOBAL_COMMIT_MARKER
88    with_global_commit_marker(|gcm| {
89        let mut markers = gcm.borrow_mut();
90        if markers.contains_key(db_name) {
91            markers.insert(db_name.to_string(), 0);
92            log::debug!("Reset commit marker for {}", db_name);
93        }
94        markers.remove(db_name);
95    });
96    
97    // Clear GLOBAL_ALLOCATION_MAP
98    with_global_allocation_map(|gam| {
99        let mut alloc = gam.borrow_mut();
100        if let Some(ids) = alloc.get_mut(db_name) {
101            let count = ids.len();
102            ids.clear();
103            log::debug!("Cleared {} allocation IDs for {}", count, db_name);
104        }
105        alloc.remove(db_name);
106    });
107    
108    // For WASM, also clear IndexedDB (if needed)
109    #[cfg(target_arch = "wasm32")]
110    {
111        // Note: IndexedDB clearing would be done via JavaScript
112        // The VFS layer will handle actual persistence clearing
113        log::debug!("WASM: In-memory storage cleared for {}. IndexedDB clearing requires VFS interaction.", db_name);
114    }
115    
116    log::info!("Storage cleared successfully for: {}", db_name);
117    Ok(())
118}
119
120/// Import SQLite database from bytes into BlockStorage
121///
122/// Takes a complete SQLite .db file and imports it into the block-based storage system.
123/// This is the inverse of `export_database_to_bytes()`.
124///
125/// # Arguments
126/// * `db_name` - Name of the database to import into
127/// * `data` - Complete SQLite database file as bytes
128///
129/// # Returns
130/// * `Ok(())` - Import successful
131/// * `Err(DatabaseError)` - If validation or import fails
132///
133/// # Process
134/// 1. Validate SQLite file format
135/// 2. Clear existing storage for the database
136/// 3. Split data into BLOCK_SIZE (4096-byte) chunks
137/// 4. Pad last block with zeros if needed
138/// 5. Write all blocks to GLOBAL_STORAGE
139/// 6. Update allocation map
140///
141/// # Example
142/// ```rust,no_run
143/// use absurder_sql::storage::import::import_database_from_bytes;
144///
145/// # async fn example() -> Result<(), absurder_sql::types::DatabaseError> {
146/// let db_bytes = std::fs::read("mydb.db").unwrap();
147/// import_database_from_bytes("mydb", db_bytes).await?;
148/// # Ok(())
149/// # }
150/// ```
151pub async fn import_database_from_bytes(
152    db_name: &str,
153    data: Vec<u8>,
154) -> Result<(), DatabaseError> {
155    use super::vfs_sync::{with_global_storage, with_global_allocation_map};
156    use std::collections::{HashMap, HashSet};
157    
158    log::info!("Starting database import for: {} ({} bytes)", db_name, data.len());
159    
160    // Step 1: Validate SQLite file format
161    validate_sqlite_file(&data)?;
162    log::debug!("SQLite file validation passed");
163    
164    // Step 2: Clear existing storage
165    clear_database_storage(db_name).await?;
166    log::debug!("Existing storage cleared");
167    
168    // Step 3: Split data into BLOCK_SIZE chunks
169    let total_blocks = (data.len() + BLOCK_SIZE - 1) / BLOCK_SIZE;
170    log::debug!(
171        "Splitting {} bytes into {} blocks of {} bytes",
172        data.len(),
173        total_blocks,
174        BLOCK_SIZE
175    );
176    
177    let mut blocks = HashMap::new();
178    let mut allocated_ids = HashSet::new();
179    
180    for block_id in 0..total_blocks {
181        let start = block_id * BLOCK_SIZE;
182        let end = std::cmp::min(start + BLOCK_SIZE, data.len());
183        
184        let mut block_data = Vec::with_capacity(BLOCK_SIZE);
185        block_data.extend_from_slice(&data[start..end]);
186        
187        // Step 4: Pad last block with zeros if needed
188        if block_data.len() < BLOCK_SIZE {
189            let padding = BLOCK_SIZE - block_data.len();
190            block_data.resize(BLOCK_SIZE, 0);
191            log::debug!(
192                "Block {} padded with {} zero bytes ({} -> {} bytes)",
193                block_id,
194                padding,
195                end - start,
196                BLOCK_SIZE
197            );
198        }
199        
200        blocks.insert(block_id as u64, block_data);
201        allocated_ids.insert(block_id as u64);
202    }
203    
204    log::debug!("Created {} blocks for import", blocks.len());
205    
206    // Step 5: Write blocks to GLOBAL_STORAGE
207    with_global_storage(|gs| {
208        let mut storage = gs.borrow_mut();
209        storage.insert(db_name.to_string(), blocks.clone());
210    });
211    
212    log::debug!("Blocks written to GLOBAL_STORAGE");
213    
214    // Step 6: Update allocation map
215    with_global_allocation_map(|gam| {
216        let mut alloc = gam.borrow_mut();
217        alloc.insert(db_name.to_string(), allocated_ids.clone());
218    });
219    
220    log::debug!("Allocation map updated");
221    
222    // Step 7: Set up metadata for imported blocks (for visibility tracking)
223    // This ensures imported blocks are immediately visible when read
224    
225    // For WASM, set up metadata in global storage
226    #[cfg(target_arch = "wasm32")]
227    {
228        use super::vfs_sync::with_global_metadata;
229        use super::metadata::{BlockMetadataPersist, ChecksumManager, ChecksumAlgorithm};
230        
231        with_global_metadata(|gm| {
232            let mut metadata = gm.borrow_mut();
233            let mut db_metadata = std::collections::HashMap::new();
234            
235            for block_id in allocated_ids.iter() {
236                // Calculate checksum for each block using CRC32 (standard algorithm)
237                let checksum = if let Some(block_data) = blocks.get(block_id) {
238                    ChecksumManager::compute_checksum_with(block_data, ChecksumAlgorithm::CRC32)
239                } else {
240                    0
241                };
242                
243                db_metadata.insert(*block_id, BlockMetadataPersist {
244                    version: 1,  // All imported blocks start at version 1
245                    checksum,
246                    last_modified_ms: 0,
247                    algo: ChecksumAlgorithm::CRC32,
248                });
249            }
250            
251            metadata.insert(db_name.to_string(), db_metadata);
252        });
253        
254        log::debug!("Metadata created for {} blocks in global storage (WASM)", allocated_ids.len());
255    }
256    
257    // For native tests (without fs_persist), use GLOBAL_METADATA_TEST directly
258    #[cfg(all(not(target_arch = "wasm32"), any(test, debug_assertions), not(feature = "fs_persist")))]
259    {
260        use super::block_storage::GLOBAL_METADATA_TEST;
261        use super::metadata::{BlockMetadataPersist, ChecksumManager, ChecksumAlgorithm};
262        
263        GLOBAL_METADATA_TEST.with(|gm| {
264            let mut metadata = gm.borrow_mut();
265            let mut db_metadata = std::collections::HashMap::new();
266            
267            for block_id in allocated_ids.iter() {
268                // Calculate checksum for each block using CRC32 (standard algorithm)
269                let checksum = if let Some(block_data) = blocks.get(block_id) {
270                    ChecksumManager::compute_checksum_with(block_data, ChecksumAlgorithm::CRC32)
271                } else {
272                    0
273                };
274                
275                db_metadata.insert(*block_id, BlockMetadataPersist {
276                    version: 1,  // All imported blocks start at version 1
277                    checksum,
278                    last_modified_ms: 0,
279                    algo: ChecksumAlgorithm::CRC32,
280                });
281            }
282            
283            metadata.insert(db_name.to_string(), db_metadata);
284        });
285        
286        log::debug!("Metadata created for {} blocks in GLOBAL_METADATA_TEST (native test)", allocated_ids.len());
287    }
288    
289    // For fs_persist (including tests), write metadata to filesystem
290    #[cfg(all(not(target_arch = "wasm32"), feature = "fs_persist"))]
291    {
292        use super::metadata::{ChecksumManager, ChecksumAlgorithm};
293        use std::path::PathBuf;
294        
295        let base_path = std::env::var("ABSURDERSQL_FS_BASE")
296            .unwrap_or_else(|_| "./test_storage".to_string());
297        let mut meta_path = PathBuf::from(&base_path);
298        meta_path.push(db_name);
299        
300        // Create directory if needed
301        if let Err(e) = std::fs::create_dir_all(&meta_path) {
302            log::warn!("Failed to create metadata directory during import: {}", e);
303        }
304        
305        meta_path.push("metadata.json");
306        
307        // Build metadata structure
308        let mut meta_entries = Vec::new();
309        for block_id in allocated_ids.iter() {
310            let checksum = if let Some(block_data) = blocks.get(block_id) {
311                ChecksumManager::compute_checksum_with(block_data, ChecksumAlgorithm::CRC32)
312            } else {
313                0
314            };
315            
316            meta_entries.push((
317                *block_id,
318                super::metadata::BlockMetadataPersist {
319                    version: 1,
320                    checksum,
321                    last_modified_ms: 0,
322                    algo: ChecksumAlgorithm::CRC32,
323                },
324            ));
325        }
326        
327        let meta_json = serde_json::json!({
328            "entries": meta_entries,
329        });
330        
331        if let Err(e) = std::fs::write(&meta_path, serde_json::to_string_pretty(&meta_json).unwrap()) {
332            log::warn!("Failed to write metadata during import: {}", e);
333        } else {
334            log::debug!("Metadata written to filesystem for {} blocks", allocated_ids.len());
335        }
336        
337        // Write block data files to filesystem
338        let mut blocks_dir = PathBuf::from(&base_path);
339        blocks_dir.push(db_name);
340        blocks_dir.push("blocks");
341        
342        if let Err(e) = std::fs::create_dir_all(&blocks_dir) {
343            log::warn!("Failed to create blocks directory during import: {}", e);
344        }
345        
346        for (block_id, block_data) in blocks.iter() {
347            let mut block_path = blocks_dir.clone();
348            block_path.push(format!("block_{}.bin", block_id));
349            
350            if let Err(e) = std::fs::write(&block_path, block_data) {
351                log::warn!("Failed to write block {} during import: {}", block_id, e);
352            }
353        }
354        
355        log::debug!("Wrote {} block files to filesystem", blocks.len());
356        
357        // Write allocations.json
358        let mut alloc_path = PathBuf::from(&base_path);
359        alloc_path.push(db_name);
360        alloc_path.push("allocations.json");
361        
362        let alloc_json = serde_json::json!({
363            "allocated": allocated_ids.iter().copied().collect::<Vec<_>>(),
364        });
365        
366        if let Err(e) = std::fs::write(&alloc_path, serde_json::to_string_pretty(&alloc_json).unwrap()) {
367            log::warn!("Failed to write allocations during import: {}", e);
368        } else {
369            log::debug!("Allocations written to filesystem");
370        }
371    }
372    
373    // Step 8: Set commit marker to 1 to make all imported blocks visible
374    use super::vfs_sync::with_global_commit_marker;
375    with_global_commit_marker(|gcm| {
376        let mut markers = gcm.borrow_mut();
377        markers.insert(db_name.to_string(), 1);
378    });
379    
380    log::debug!("Commit marker set to 1 for immediate visibility");
381    
382    // Step 10: For WASM, sync to IndexedDB to make data persistent
383    #[cfg(target_arch = "wasm32")]
384    {
385        log::debug!("Syncing imported data to IndexedDB for {}", db_name);
386        
387        // We need to trigger a sync to persist the imported data to IndexedDB
388        // Use the wasm_vfs_sync module to sync the data
389        use super::wasm_vfs_sync::vfs_sync_database;
390        
391        vfs_sync_database(db_name).map_err(|e| {
392            log::error!("Failed to sync imported data to IndexedDB: {}", e);
393            DatabaseError::new("IMPORT_SYNC_FAILED", &format!("Failed to persist imported data: {}", e))
394        })?;
395        
396        log::debug!("IndexedDB sync complete for {}", db_name);
397    }
398    
399    log::info!(
400        "Database import complete: {} ({} blocks, {} bytes)",
401        db_name,
402        total_blocks,
403        data.len()
404    );
405    
406    Ok(())
407}
408
409/// Invalidate BlockStorage caches for a specific database
410///
411/// This function signals that any BlockStorage instances for the specified database
412/// should invalidate their caches. This is important after importing to prevent
413/// reading stale cached data.
414///
415/// **Note**: This is a best-effort notification mechanism. BlockStorage instances
416/// must call `on_database_import()` on themselves to actually clear their caches.
417/// This function serves as documentation and a placeholder for future automatic
418/// cache invalidation mechanisms.
419///
420/// # Arguments
421/// * `db_name` - Name of the database whose caches should be invalidated
422///
423/// # Example
424/// ```rust
425/// use absurder_sql::storage::import::invalidate_block_storage_caches;
426///
427/// // After importing a database, signal that caches should be cleared
428/// invalidate_block_storage_caches("mydb");
429/// ```
430pub fn invalidate_block_storage_caches(db_name: &str) {
431    log::info!(
432        "Cache invalidation signal sent for database '{}'. \
433        BlockStorage instances should call on_database_import() to clear caches.",
434        db_name
435    );
436    
437    // Currently this is a documentation/logging function
438    // In the future, this could:
439    // - Send a broadcast message via channels
440    // - Update a global "invalidation generation" counter
441    // - Trigger callbacks registered by BlockStorage instances
442    //
443    // For now, applications should manually call storage.on_database_import()
444    // after importing a database if they have active BlockStorage instances.
445}