sqlitegraph 2.0.7

Embedded graph database with full ACID transactions, HNSW vector search, dual backend support, and comprehensive graph algorithms library
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
//! I/O operations and data transfer for GraphFile
//!
//! This module provides core I/O operations including byte-level reading/writing,
//! buffer management, and file size operations for GraphFile instances.

use crate::backend::native::{
    graph_file::buffers::WriteBuffer,
    types::NativeResult,
};
#[cfg(feature = "v2_experimental")]
use crate::backend::native::NativeBackendError;

#[cfg(feature = "v2_experimental")]
use memmap2::{MmapMut, MmapOptions};
use std::io::{Read, Seek, SeekFrom, Write};

/// I/O operations management utilities for GraphFile
pub struct IOOperationsManager;

impl IOOperationsManager {
    /// Read bytes from file using standard I/O
    ///
    /// Provides basic byte-level reading with proper error handling
    /// and position management. Used when no specialized I/O mode is active.
    pub fn read_bytes_std(
        file: &mut std::fs::File,
        offset: u64,
        buffer: &mut [u8],
    ) -> NativeResult<()> {
        file.seek(SeekFrom::Start(offset))?;
        file.read_exact(buffer)?;
        Ok(())
    }

    /// Write bytes to file using standard I/O
    ///
    /// Provides basic byte-level writing with proper error handling
    /// and position management.
    pub fn write_bytes_std(file: &mut std::fs::File, offset: u64, data: &[u8]) -> NativeResult<()> {
        file.seek(SeekFrom::Start(offset))?;
        file.write_all(data)?;
        Ok(())
    }

    /// Direct write operation without buffering
    ///
    /// Writes data directly to file without going through write buffer,
    /// ensuring immediate persistence.
    pub fn write_bytes_direct(
        graph_file: &mut crate::backend::native::graph_file::GraphFile,
        offset: u64,
        data: &[u8],
    ) -> NativeResult<()> {
        use std::io::{Seek, SeekFrom, Write};
        let file = graph_file.file_mut();
        file.seek(SeekFrom::Start(offset))?;
        file.write_all(data)?;
        file.flush()?;
        Ok(())
    }

    /// Read bytes with read-ahead optimization
    ///
    /// Attempts to optimize sequential reads by reading larger blocks
    /// when possible to reduce system call overhead.
    pub fn read_with_ahead(
        file: &mut std::fs::File,
        offset: u64,
        buffer: &mut [u8],
    ) -> NativeResult<()> {
        // Simple implementation - can be enhanced with actual read-ahead logic
        file.seek(SeekFrom::Start(offset))?;
        file.read_exact(buffer)?;
        Ok(())
    }

    /// Flush pending write buffer operations
    ///
    /// Commits all pending write buffer operations to disk
    /// in optimal order to minimize disk seeks.
    pub fn flush_write_buffer(
        file: &mut std::fs::File,
        write_buffer: &mut WriteBuffer,
    ) -> NativeResult<usize> {
        let operations = write_buffer.flush();
        let mut bytes_written = 0;

        // Sort operations by offset for sequential disk access
        let mut sorted_ops: Vec<_> = operations.into_iter().collect();
        sorted_ops.sort_by_key(|(offset, _)| *offset);

        for (offset, data) in sorted_ops {
            file.seek(SeekFrom::Start(offset))?;
            file.write_all(&data)?;
            bytes_written += data.len();
        }

        file.flush()?;
        Ok(bytes_written)
    }

    /// Invalidate read buffer
    ///
    /// Clears any cached read data to ensure fresh reads
    /// from disk for subsequent operations.
    pub fn invalidate_read_buffer(
        _read_buffer: &mut crate::backend::native::graph_file::buffers::ReadBuffer,
    ) {
        // Implementation depends on ReadBuffer structure
        // This is a placeholder for the actual buffer invalidation logic
    }

    /// Ensure file is at least the specified size
    ///
    /// Grows file if necessary to accommodate data at the specified offset.
    /// Uses sparse file allocation when supported by the filesystem.
    pub fn ensure_file_len_at_least(
        file: &mut std::fs::File,
        required_size: u64,
    ) -> NativeResult<()> {
        let metadata = file.metadata()?;
        let current_size = metadata.len();

        if current_size < required_size {
            file.set_len(required_size)?;
        }

        Ok(())
    }

    /// Read bytes using memory mapping (exclusive mode)
    #[cfg(all(feature = "v2_experimental", feature = "v2_io_exclusive_mmap"))]
    pub fn read_bytes_mmap_exclusive(
        mmap: Option<&MmapMut>,
        offset: u64,
        buffer: &mut [u8],
    ) -> NativeResult<()> {
        let mmap = mmap.ok_or(NativeBackendError::CorruptNodeRecord {
            node_id: -1,
            reason: "mmap not initialized in exclusive mmap mode".to_string(),
        })?;

        if offset as usize + buffer.len() > mmap.len() {
            return Err(NativeBackendError::CorruptNodeRecord {
                node_id: -1,
                reason: format!(
                    "Read beyond mmap region: offset={}, len={}, mmap_size={}",
                    offset,
                    buffer.len(),
                    mmap.len()
                ),
            });
        }

        let start = offset as usize;
        let end = start + buffer.len();
        buffer.copy_from_slice(&mmap[start..end]);
        Ok(())
    }

    /// Write bytes using memory mapping (exclusive mode)
    #[cfg(all(feature = "v2_experimental", feature = "v2_io_exclusive_mmap"))]
    pub fn write_bytes_mmap_exclusive(
        mmap: Option<&mut MmapMut>,
        offset: u64,
        data: &[u8],
    ) -> NativeResult<()> {
        let mmap = mmap.ok_or(NativeBackendError::CorruptNodeRecord {
            node_id: -1,
            reason: "mmap not initialized in exclusive mmap mode".to_string(),
        })?;

        let end_offset = offset + data.len() as u64;
        if end_offset as usize > mmap.len() {
            return Err(NativeBackendError::CorruptNodeRecord {
                node_id: -1,
                reason: format!(
                    "mmap write out of bounds: offset={}, len={}, mmap_len={}",
                    offset,
                    data.len(),
                    mmap.len()
                ),
            });
        }

        let start = offset as usize;
        let end = start + data.len();
        mmap[start..end].copy_from_slice(data);
        mmap.flush()?;
        Ok(())
    }

    /// Read bytes using exclusive standard I/O mode
    #[cfg(all(feature = "v2_experimental", feature = "v2_io_exclusive_std"))]
    pub fn read_bytes_std_exclusive(
        file: &mut std::fs::File,
        offset: u64,
        buffer: &mut [u8],
        write_buffer: &mut WriteBuffer,
    ) -> NativeResult<()> {
        // Clear pending write buffer operations before reading
        if !write_buffer.operations.is_empty() {
            let ops_count = write_buffer.operations.len();
            if std::env::var("WRITEBUF_DEBUG").is_ok() {
                println!(
                    "[WRITEBUF_DEBUG] EXCLUSIVE_STD: CLEARING {} pending ops without flush",
                    ops_count
                );
            }
            write_buffer.operations.clear();
        }

        file.seek(SeekFrom::Start(offset))?;
        file.read_exact(buffer)?;
        Ok(())
    }

    /// Write bytes using exclusive standard I/O mode
    #[cfg(all(feature = "v2_experimental", feature = "v2_io_exclusive_std"))]
    pub fn write_bytes_std_exclusive(
        file: &mut std::fs::File,
        offset: u64,
        data: &[u8],
        write_buffer: &mut WriteBuffer,
    ) -> NativeResult<()> {
        // Clear pending write buffer operations before writing
        if !write_buffer.operations.is_empty() {
            let ops_count = write_buffer.operations.len();
            if std::env::var("WRITEBUF_DEBUG").is_ok() {
                println!(
                    "[WRITEBUF_DEBUG] EXCLUSIVE_STD: CLEARING {} pending ops without flush",
                    ops_count
                );
            }
            write_buffer.operations.clear();
        }

        file.seek(SeekFrom::Start(offset))?;
        file.write_all(data)?;
        Ok(())
    }

    /// Write buffered bytes using standard I/O
    ///
    /// Uses write buffer for optimized batched writes.
    pub fn write_buffered_bytes_std(
        file: &mut std::fs::File,
        data: &[u8],
        offset: u64,
        write_buffer: &mut WriteBuffer,
    ) -> NativeResult<()> {
        // Use write_buffer for optimized batched writes
        let data_vec = data.to_vec();
        let added = write_buffer.add(offset, data_vec);

        if !added {
            // Buffer is full, flush and write directly
            let operations = write_buffer.flush();
            for (op_offset, op_data) in operations {
                file.seek(SeekFrom::Start(op_offset))?;
                file.write_all(&op_data)?;
            }

            // Write current data directly
            file.seek(SeekFrom::Start(offset))?;
            file.write_all(data)?;
        }

        Ok(())
    }

    /// Read bytes from GraphFile (alias for compatibility with existing code)
    pub fn read_bytes(
        graph_file: &mut crate::backend::native::graph_file::GraphFile,
        offset: u64,
        buffer: &mut [u8],
    ) -> NativeResult<()> {
        graph_file.read_bytes(offset, buffer)
    }

    /// Write bytes to GraphFile (alias for compatibility with existing code)
    pub fn write_bytes(
        graph_file: &mut crate::backend::native::graph_file::GraphFile,
        offset: u64,
        data: &[u8],
    ) -> NativeResult<()> {
        graph_file.write_bytes(offset, data)
    }

    /// Flush file buffers to disk (alias for compatibility with existing code)
    pub fn flush(
        graph_file: &mut crate::backend::native::graph_file::GraphFile,
    ) -> NativeResult<()> {
        graph_file.sync()
    }

    /// Prefetch data for optimal read performance (alias for compatibility)
    pub fn prefetch(
        graph_file: &mut crate::backend::native::graph_file::GraphFile,
        offset: u64,
        length: u64,
    ) -> NativeResult<()> {
        let required_size = offset + length;
        graph_file.ensure_file_len_at_least(required_size)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::io::{Read, Seek, SeekFrom, Write};
    use tempfile::tempfile;

    #[test]
    fn test_read_write_bytes_std() {
        let mut temp_file = tempfile().unwrap();

        // Write test data
        let test_data = b"Hello, I/O Operations!";
        IOOperationsManager::write_bytes_std(&mut temp_file, 0, test_data).unwrap();

        // Read back test data
        let mut buffer = vec![0u8; test_data.len()];
        IOOperationsManager::read_bytes_std(&mut temp_file, 0, &mut buffer).unwrap();

        assert_eq!(buffer, test_data);
    }

    #[test]
    fn test_write_bytes_direct() {
        let mut temp_file = tempfile().unwrap();

        // Write test data directly using GraphFile
        let test_data = b"Direct write test";
        IOOperationsManager::write_bytes_std(&mut temp_file, 0, test_data).unwrap();

        // Verify data was written
        let mut buffer = vec![0u8; test_data.len()];
        temp_file.seek(SeekFrom::Start(0)).unwrap();
        temp_file.read_exact(&mut buffer).unwrap();

        assert_eq!(buffer, test_data);
    }

    #[test]
    fn test_read_with_ahead() {
        let mut temp_file = tempfile().unwrap();

        // Write test data
        let test_data = b"Read-ahead test data";
        temp_file.seek(SeekFrom::Start(0)).unwrap();
        temp_file.write_all(test_data).unwrap();

        // Read using read_with_ahead
        let mut buffer = vec![0u8; test_data.len()];
        IOOperationsManager::read_with_ahead(&mut temp_file, 0, &mut buffer).unwrap();

        assert_eq!(buffer, test_data);
    }

    #[test]
    fn test_ensure_file_len_at_least() {
        let mut temp_file = tempfile().unwrap();

        // Ensure file is at least 1024 bytes
        IOOperationsManager::ensure_file_len_at_least(&mut temp_file, 1024).unwrap();

        // Verify file size
        let metadata = temp_file.metadata().unwrap();
        assert!(metadata.len() >= 1024);
    }

    #[test]
    fn test_flush_write_buffer() {
        let mut temp_file = tempfile().unwrap();
        let mut write_buffer = WriteBuffer::new(10);

        // Add some operations to buffer (use offsets beyond HEADER_SIZE = 80)
        write_buffer.add(100, b"data1".to_vec());
        write_buffer.add(110, b"data2".to_vec());

        // Flush buffer
        let bytes_written =
            IOOperationsManager::flush_write_buffer(&mut temp_file, &mut write_buffer).unwrap();

        assert_eq!(bytes_written, 10); // 5 + 5
        assert!(write_buffer.operations.is_empty());
    }

    #[cfg(all(feature = "v2_experimental", feature = "v2_io_exclusive_std"))]
    #[test]
    fn test_std_exclusive_operations() {
        let mut temp_file = tempfile().unwrap();
        let mut write_buffer = WriteBuffer::new(10);

        // Write using exclusive std mode
        let test_data = b"Exclusive std mode test";
        IOOperationsManager::write_bytes_std_exclusive(
            &mut temp_file,
            0,
            test_data,
            &mut write_buffer,
        )
        .unwrap();

        // Read using exclusive std mode
        let mut buffer = vec![0u8; test_data.len()];
        IOOperationsManager::read_bytes_std_exclusive(
            &mut temp_file,
            0,
            &mut buffer,
            &mut write_buffer,
        )
        .unwrap();

        assert_eq!(buffer, test_data);
    }
}