Skip to main content

diskann_disk/data_model/
graph_metadata.rs

1/*
2 * Copyright (c) Microsoft Corporation.
3 * Licensed under the MIT license.
4 */
5
6use std::io::Cursor;
7
8use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
9use diskann::{ANNError, ANNResult};
10
11/// Index graph metadata. The metadata is stored in the first sector of the disk index file, or the first segment of the BigStorageStream.
12/// The metadata is like a "header" of the index graph.
13#[derive(Debug, Clone)]
14pub struct GraphMetadata {
15    // Number of points.
16    pub num_pts: u64,
17
18    // Data dimension.
19    pub dims: usize,
20
21    // Medoid index.
22    pub medoid: u64,
23
24    // Node length.
25    pub node_len: u64,
26
27    // Number of nodes per sector.
28    pub num_nodes_per_block: u64,
29
30    // Number of frozen nodes in Vamana index.
31    pub vamana_frozen_num: u64,
32
33    // Location of frozen nodes in Vamana index.
34    pub vamana_frozen_loc: u64,
35
36    // Size of the disk index file.
37    pub disk_index_file_size: u64,
38
39    // Length of the associated data.
40    pub associated_data_length: usize,
41}
42
43#[allow(clippy::too_many_arguments)]
44impl GraphMetadata {
45    /// Create a new `GraphMetadata` object.
46    pub fn new(
47        num_pts: u64,
48        dims: usize,
49        medoid: u64,
50        node_len: u64,
51        num_nodes_per_sector: u64,
52        vamana_frozen_num: u64,
53        vamana_frozen_loc: u64,
54        disk_index_file_size: u64,
55        data_size: usize,
56    ) -> Self {
57        Self {
58            num_pts,
59            dims,
60            medoid,
61            node_len,
62            num_nodes_per_block: num_nodes_per_sector,
63            vamana_frozen_num,
64            vamana_frozen_loc,
65            disk_index_file_size,
66            associated_data_length: data_size,
67        }
68    }
69
70    /// Serialize the `GraphMetadata` object to a byte vector.
71    ///
72    /// Layout:
73    /// |number_of_points (8 bytes)| dimensions (8 bytes) | medoid (8 bytes) | node_len (8 bytes) | num_nodes_per_sector (8 bytes) | vamana_frozen_point_num (8 bytes) |
74    /// ...| vamana_frozen_loc (8 bytes) | append_reorder_data (8 bytes) | disk_index_file_size (8 bytes) | associated_data_length (8 bytes) |
75    /// The append_reorder_data is not used in the rust version. We are just keeping it in the stream to make the file based disk index layout consistent with the C++ version.
76    pub fn to_bytes(&self) -> ANNResult<Vec<u8>> {
77        let mut buffer = vec![];
78
79        let mut cursor = Cursor::new(&mut buffer);
80        cursor.write_u64::<LittleEndian>(self.num_pts)?;
81        cursor.write_u64::<LittleEndian>(self.dims as u64)?;
82        cursor.write_u64::<LittleEndian>(self.medoid)?;
83        cursor.write_u64::<LittleEndian>(self.node_len)?;
84        cursor.write_u64::<LittleEndian>(self.num_nodes_per_block)?;
85        cursor.write_u64::<LittleEndian>(self.vamana_frozen_num)?;
86        cursor.write_u64::<LittleEndian>(self.vamana_frozen_loc)?;
87
88        // Append_reorder_data. This is not used in the rust version. We are just keeping it in the stream to make the file based disk index layout consistent with the C++ version.
89        cursor.write_u64::<LittleEndian>(false as u64)?;
90        cursor.write_u64::<LittleEndian>(self.disk_index_file_size)?;
91
92        cursor.write_u64::<LittleEndian>(self.associated_data_length as u64)?;
93        Ok(buffer)
94    }
95
96    // Size of the metadata after serialization.
97    #[inline]
98    pub fn get_size() -> usize {
99        std::mem::size_of::<u64>() * 10
100    }
101}
102
103impl<'a> TryFrom<&'a [u8]> for GraphMetadata {
104    type Error = ANNError;
105    /// Try creating a new `GraphMetadata` object from a byte slice. The try_from syntax is used here instead of from because this operation can fail.
106    ///
107    /// Layout:
108    /// |number_of_points (8 bytes)| dimensions (8 bytes) | medoid (8 bytes) | node_len (8 bytes) | num_nodes_per_sector (8 bytes) | vamana_frozen_point_num (8 bytes) |
109    /// ...| vamana_frozen_loc (8 bytes) | append_reorder_data (8 bytes) | disk_index_file_size (8 bytes) | associated_data_length (8 bytes) |
110    fn try_from(value: &'a [u8]) -> ANNResult<Self> {
111        if value.len() < Self::get_size() {
112            return Err(ANNError::log_parse_slice_error(
113                "&[u8]".to_string(),
114                "GraphMetadata".to_string(),
115                "The given bytes are not long enough to create a valid graph metadata.".to_string(),
116            ));
117        }
118
119        let mut cursor = Cursor::new(value);
120        let num_pts = cursor.read_u64::<LittleEndian>()?;
121        let dims = cursor.read_u64::<LittleEndian>()?;
122        let medoid = cursor.read_u64::<LittleEndian>()?;
123        let node_len = cursor.read_u64::<LittleEndian>()?;
124        let num_nodes_per_sector = cursor.read_u64::<LittleEndian>()?;
125        let vamana_frozen_num = cursor.read_u64::<LittleEndian>()?;
126        let vamana_frozen_loc = cursor.read_u64::<LittleEndian>()?;
127
128        // append_reorder_data. This is not used in the rust version. We are just keeping it in the stream to make the file based disk index layout consistent with the C++ version.
129        cursor.read_u64::<LittleEndian>()?;
130        let disk_index_file_size = cursor.read_u64::<LittleEndian>()?;
131        let associated_data_length = cursor.read_u64::<LittleEndian>()? as usize;
132
133        Ok(Self {
134            num_pts,
135            dims: dims as usize,
136            medoid,
137            node_len,
138            num_nodes_per_block: num_nodes_per_sector,
139            vamana_frozen_num,
140            vamana_frozen_loc,
141            disk_index_file_size,
142            associated_data_length,
143        })
144    }
145}
146
147#[cfg(test)]
148mod tests {
149    use super::GraphMetadata;
150
151    #[test]
152    fn test_graph_metadata_serialized_size() {
153        let num_pts = 1000;
154        let dims = 32;
155        let medoid = 500;
156        let node_len = 64;
157        let num_nodes_per_sector = 4;
158        let vamana_frozen_num = 20;
159        let vamana_frozen_loc = 50;
160        let disk_index_file_size = 1024;
161        let data_size = 256;
162
163        let metadata = GraphMetadata::new(
164            num_pts,
165            dims,
166            medoid,
167            node_len,
168            num_nodes_per_sector,
169            vamana_frozen_num,
170            vamana_frozen_loc,
171            disk_index_file_size,
172            data_size,
173        );
174
175        let bytes = metadata.to_bytes().unwrap();
176        assert_eq!(bytes.len(), GraphMetadata::get_size());
177    }
178
179    #[test]
180    fn test_graph_metadata_to_bytes_and_try_from() {
181        let num_pts = 1000;
182        let dims = 32;
183        let medoid = 500;
184        let node_len = 64;
185        let num_nodes_per_sector = 4;
186        let vamana_frozen_num = 20;
187        let vamana_frozen_loc = 50;
188        let disk_index_file_size = 1024;
189        let data_size = 256;
190
191        let metadata = GraphMetadata::new(
192            num_pts,
193            dims,
194            medoid,
195            node_len,
196            num_nodes_per_sector,
197            vamana_frozen_num,
198            vamana_frozen_loc,
199            disk_index_file_size,
200            data_size,
201        );
202
203        let bytes = metadata.to_bytes().unwrap();
204        let deserialized_metadata = GraphMetadata::try_from(bytes.as_slice()).unwrap();
205        assert_eq!(metadata.num_pts, deserialized_metadata.num_pts);
206        assert_eq!(metadata.dims, deserialized_metadata.dims);
207        assert_eq!(metadata.medoid, deserialized_metadata.medoid);
208        assert_eq!(metadata.node_len, deserialized_metadata.node_len);
209        assert_eq!(
210            metadata.num_nodes_per_block,
211            deserialized_metadata.num_nodes_per_block
212        );
213        assert_eq!(
214            metadata.vamana_frozen_num,
215            deserialized_metadata.vamana_frozen_num
216        );
217        assert_eq!(
218            metadata.vamana_frozen_loc,
219            deserialized_metadata.vamana_frozen_loc
220        );
221        assert_eq!(
222            metadata.disk_index_file_size,
223            deserialized_metadata.disk_index_file_size
224        );
225        assert_eq!(
226            metadata.associated_data_length,
227            deserialized_metadata.associated_data_length
228        );
229    }
230
231    #[test]
232    fn test_graph_metadata_try_from_error() {
233        let bytes = vec![2; GraphMetadata::get_size() - 1];
234        let result = GraphMetadata::try_from(&bytes[..]);
235        assert!(result.is_err());
236    }
237}