1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
//! File validation and corruption detection utilities
//!
//! This module provides validation functions for ensuring file integrity,
//! detecting corruption, and verifying commit markers in graph files.
use crate::backend::native::{
constants::HEADER_SIZE,
types::{NativeBackendError, NativeResult},
};
/// Graph file validation utilities
pub struct GraphFileValidator;
impl GraphFileValidator {
/// Validate that the file size meets minimum requirements
///
/// Ensures the file is large enough for the header and any declared data.
/// For native backend, we only require file to be large enough for actual data written.
pub fn validate_file_size(
file_size: u64,
persistent_header: &crate::backend::native::persistent_header::PersistentHeaderV2,
) -> NativeResult<()> {
if file_size < HEADER_SIZE {
return Err(NativeBackendError::FileTooSmall {
size: file_size,
min_size: HEADER_SIZE,
});
}
// Basic sanity check: file should be at least large enough for declared records
// edge_data_offset is a reservation for future edge data, not a current requirement
let min_expected_size = if persistent_header.edge_count > 0 {
// If edges exist, file must be large enough to contain them
std::cmp::max(
persistent_header.edge_data_offset,
persistent_header.node_data_offset,
)
} else {
// If no edges exist, file only needs to be large enough for header and node data
persistent_header.node_data_offset
};
if file_size < min_expected_size {
return Err(NativeBackendError::FileTooSmall {
size: file_size,
min_size: min_expected_size,
});
}
Ok(())
}
/// Verify the commit marker indicates a clean commit state
///
/// Detects incomplete clustered commits by checking the commit marker.
/// Returns error if the marker indicates an incomplete transaction.
pub fn verify_commit_marker(marker: u64) -> NativeResult<()> {
const COMMIT_MARKER_CLEAN: u64 = 0x434C45414E5F454F; // "CLEAN_EO" in hex
if marker != COMMIT_MARKER_CLEAN {
return Err(NativeBackendError::InvalidHeader {
field: "commit_marker".to_string(),
reason: format!(
"incomplete clustered commit detected (marker=0x{:016X})",
marker
),
});
}
Ok(())
}
/// Get the clean commit marker value
pub const fn clean_commit_marker() -> u64 {
0x434C45414E5F454F // "CLEAN_EO" in hex
}
/// Get the commit marker offset in the header
///
/// CRITICAL: Commit marker is positioned AFTER the header region (bytes 0-79)
/// to prevent collision with header fields. The header occupies bytes 0-79 (HEADER_SIZE=80),
/// so the commit marker is at offset 80, immediately following the header.
pub const fn commit_marker_offset() -> u64 {
80 // Position commit marker after header (bytes 0-79) to prevent collision with free_space_offset field
}
/// Calculate minimum expected file size based on header state
pub fn calculate_minimum_expected_size(
persistent_header: &crate::backend::native::persistent_header::PersistentHeaderV2,
) -> u64 {
if persistent_header.edge_count > 0 {
// If edges exist, file must be large enough to contain them
std::cmp::max(
persistent_header.edge_data_offset,
persistent_header.node_data_offset,
)
} else {
// If no edges exist, file only needs to be large enough for header and node data
persistent_header.node_data_offset
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::backend::native::persistent_header::PersistentHeaderV2;
#[test]
fn test_validate_file_size_minimum_header() {
let header = PersistentHeaderV2::new_v2();
let result = GraphFileValidator::validate_file_size(50, &header);
assert!(result.is_err()); // File too small for header
}
#[test]
fn test_validate_file_size_minimum_with_data() {
let mut header = PersistentHeaderV2::new_v2();
header.node_data_offset = 1024;
header.edge_data_offset = 2048;
// File large enough for header and declared node data
let result = GraphFileValidator::validate_file_size(1500, &header);
assert!(result.is_ok());
// File too small for declared data
let result = GraphFileValidator::validate_file_size(1000, &header);
assert!(result.is_err());
}
#[test]
fn test_verify_commit_marker_clean() {
let result =
GraphFileValidator::verify_commit_marker(GraphFileValidator::clean_commit_marker());
assert!(result.is_ok());
}
#[test]
fn test_verify_commit_marker_dirty() {
let result = GraphFileValidator::verify_commit_marker(0x1234567890ABCDEF);
assert!(result.is_err());
}
#[test]
fn test_calculate_minimum_expected_size() {
let mut header = PersistentHeaderV2::new_v2();
header.node_data_offset = 1024;
header.edge_data_offset = 2048;
// No edges - should return node data offset
let min_size = GraphFileValidator::calculate_minimum_expected_size(&header);
assert_eq!(min_size, 1024);
// With edges - should return max of node and edge data offsets
header.edge_count = 1;
let min_size = GraphFileValidator::calculate_minimum_expected_size(&header);
assert_eq!(min_size, 2048);
}
}