matchy_format/mmdb/
format.rs1use super::types::{record_size_from_bits, IpVersion, MmdbError, RecordSize, METADATA_MARKER};
13use matchy_data_format::{DataDecoder, DataValue};
14
15#[derive(Debug, Clone, Copy)]
20pub struct MmdbHeader {
21 pub node_count: u32,
23 pub record_size: RecordSize,
25 pub ip_version: IpVersion,
27 pub tree_size: usize,
29}
30
31impl MmdbHeader {
32 pub fn from_file(data: &[u8]) -> Result<Self, MmdbError> {
36 let marker_offset = find_metadata_marker(data)?;
38
39 let metadata_offset = marker_offset + METADATA_MARKER.len();
42 let metadata_bytes = &data[metadata_offset..];
43
44 let decoder = DataDecoder::new(metadata_bytes, 0);
46 let metadata_value = decoder
47 .decode(0)
48 .map_err(|e| MmdbError::InvalidMetadata(format!("Failed to decode metadata: {e}")))?;
49
50 let (node_count, record_size_bits, ip_version_num) = match metadata_value {
52 DataValue::Map(ref map) => {
53 let node_count = extract_uint(map, "node_count")?;
54 let record_size = u16::try_from(extract_uint(map, "record_size")?)
55 .map_err(|_| MmdbError::InvalidMetadata("record_size too large".to_string()))?;
56 let ip_version = extract_uint(map, "ip_version")?;
57 (node_count, record_size, ip_version)
58 }
59 _ => {
60 return Err(MmdbError::InvalidMetadata(
61 "Metadata is not a map".to_string(),
62 ))
63 }
64 };
65
66 let record_size = record_size_from_bits(record_size_bits)?;
67
68 let ip_version = match ip_version_num {
69 4 => IpVersion::V4,
70 6 => IpVersion::V6,
71 _ => {
72 return Err(MmdbError::InvalidMetadata(format!(
73 "Invalid IP version: {ip_version_num}"
74 )))
75 }
76 };
77
78 let node_count_u32 = u32::try_from(node_count)
80 .map_err(|_| MmdbError::InvalidMetadata("node_count exceeds u32::MAX".to_string()))?;
81 let tree_size = usize::try_from(node_count)
82 .map_err(|_| MmdbError::InvalidMetadata("node_count exceeds usize".to_string()))?
83 * record_size.node_bytes();
84
85 Ok(Self {
86 node_count: node_count_u32,
87 record_size,
88 ip_version,
89 tree_size,
90 })
91 }
92}
93
94pub struct MmdbMetadata<'a> {
99 raw_data: &'a [u8],
100 metadata_offset: usize,
101}
102
103impl<'a> MmdbMetadata<'a> {
104 pub fn from_file(data: &'a [u8]) -> Result<Self, MmdbError> {
106 let metadata_start = find_metadata_marker(data)?;
107 let metadata_offset = metadata_start + METADATA_MARKER.len();
108
109 Ok(MmdbMetadata {
110 raw_data: data,
111 metadata_offset,
112 })
113 }
114
115 pub fn as_value(&self) -> Result<DataValue, MmdbError> {
117 let decoder = DataDecoder::new(&self.raw_data[self.metadata_offset..], 0);
118 decoder
119 .decode(0)
120 .map_err(|e| MmdbError::InvalidMetadata(e.to_string()))
121 }
122}
123
124pub fn find_metadata_marker(data: &[u8]) -> Result<usize, MmdbError> {
131 const SEARCH_SIZE: usize = 128 * 1024; if data.len() < METADATA_MARKER.len() {
134 return Err(MmdbError::MetadataNotFound);
135 }
136
137 let search_start = if data.len() > SEARCH_SIZE {
139 data.len() - SEARCH_SIZE
140 } else {
141 0
142 };
143
144 let mut last_marker = None;
147 for i in search_start..=(data.len() - METADATA_MARKER.len()) {
148 if &data[i..i + METADATA_MARKER.len()] == METADATA_MARKER {
149 last_marker = Some(i);
150 }
151 }
152
153 last_marker.ok_or(MmdbError::MetadataNotFound)
154}
155
156fn extract_uint(
159 map: &std::collections::HashMap<String, DataValue>,
160 key: &str,
161) -> Result<u64, MmdbError> {
162 match map.get(key) {
163 Some(DataValue::Uint16(n)) => Ok(u64::from(*n)),
164 Some(DataValue::Uint32(n)) => Ok(u64::from(*n)),
165 Some(DataValue::Uint64(n)) => Ok(*n),
166 Some(_) => Err(MmdbError::InvalidMetadata(format!(
167 "Field '{key}' is not an unsigned integer"
168 ))),
169 None => Err(MmdbError::InvalidMetadata(format!(
170 "Required field '{key}' not found"
171 ))),
172 }
173}
174
175#[cfg(test)]
176mod tests {
177 use super::*;
178
179 #[test]
180 fn test_find_metadata_marker() {
181 let data = include_bytes!("../../tests/data/GeoLite2-Country.mmdb");
182 let marker_offset = find_metadata_marker(data);
183 assert!(marker_offset.is_ok(), "Should find metadata marker");
184
185 let offset = marker_offset.unwrap();
186 println!("Total file size: {} bytes", data.len());
187 println!("Marker found at offset: {offset}");
188 println!(
189 "Marker: {:?}",
190 &data[offset..offset + METADATA_MARKER.len()]
191 );
192
193 assert!(offset > 0, "Marker should not be at start of file");
194 assert_eq!(
195 &data[offset..offset + METADATA_MARKER.len()],
196 METADATA_MARKER
197 );
198
199 let after_marker = offset + METADATA_MARKER.len();
201 let before_marker = offset.saturating_sub(20);
202 println!(
203 "20 bytes before marker: {:02x?}",
204 &data[before_marker..offset]
205 );
206 println!(
207 "Bytes after marker: {} bytes remaining",
208 data.len() - after_marker
209 );
210 if data.len() > after_marker {
211 println!(
212 "First 20 bytes after marker: {:02x?}",
213 &data[after_marker..after_marker.min(data.len())]
214 );
215 }
216 }
217
218 #[test]
219 fn test_parse_header_minimal() {
220 let data = include_bytes!("../../tests/data/GeoLite2-Country.mmdb");
221 let header = MmdbHeader::from_file(data);
222 if let Err(ref e) = header {
223 println!("Error parsing header: {e}");
224 }
225 assert!(header.is_ok(), "Should parse header successfully");
226
227 let header = header.unwrap();
228 assert!(header.node_count > 0, "Should have nodes");
229 assert!(header.tree_size > 0, "Tree should have size");
230
231 match header.record_size {
233 RecordSize::Bits24 | RecordSize::Bits28 | RecordSize::Bits32 => {}
234 }
235
236 match header.ip_version {
238 IpVersion::V4 | IpVersion::V6 => {}
239 }
240
241 println!("Header: {header:?}");
242 println!("Heap usage: ~{} bytes", std::mem::size_of_val(&header));
243 }
244
245 #[test]
246 fn test_metadata_on_demand() {
247 let data = include_bytes!("../../tests/data/GeoLite2-Country.mmdb");
248 let metadata = MmdbMetadata::from_file(data);
249 assert!(metadata.is_ok(), "Should create metadata accessor");
250
251 let metadata = metadata.unwrap();
252
253 let metadata_value = metadata.as_value();
255 assert!(metadata_value.is_ok());
256
257 if let DataValue::Map(ref map) = metadata_value.unwrap() {
258 if let Some(DataValue::String(db_type)) = map.get("database_type") {
260 assert_eq!(db_type, "GeoLite2-Country");
261 }
262
263 if let Some(epoch_value) = map.get("build_epoch") {
265 let epoch_num = match epoch_value {
266 DataValue::Uint32(n) => u64::from(*n),
267 DataValue::Uint64(n) => *n,
268 _ => panic!("build_epoch has unexpected type"),
269 };
270 println!("Build epoch: {epoch_num}");
271 assert!(epoch_num > 0);
272 }
273 } else {
274 panic!("Metadata should be a map");
275 }
276 }
277
278 #[test]
279 fn test_metadata_not_found() {
280 let data = b"not a valid mmdb file";
281 let result = find_metadata_marker(data);
282 assert!(result.is_err());
283 assert!(matches!(result, Err(MmdbError::MetadataNotFound)));
284 }
285}