Skip to main content

eat_rocks/
meta.rs

1/// rocks backup meta file parse errors
2#[derive(Debug, PartialEq, thiserror::Error)]
3pub enum ParseError {
4    #[error("empty meta file")]
5    Empty,
6    #[error("could not parse schema version from {0:?} (v1 and v2 supported)")]
7    InvalidSchemaVersion(String),
8    #[error("missing timestamp line")]
9    MissingTimestamp,
10    #[error("invalid timestamp: {0:?}")]
11    InvalidTimestamp(String),
12    #[error("missing sequence number line")]
13    MissingSequenceNumber,
14    #[error("invalid sequence number: {0:?}")]
15    InvalidSequenceNumber(String),
16    #[error("unexpected end of meta file while looking for file count")]
17    UnexpectedEndBeforeFileCount,
18    #[error("invalid file count: {0:?}")]
19    InvalidFileCount(String),
20    #[error("expected {expected} file entries but file ended after {actual}")]
21    FileCountMismatch { expected: usize, actual: usize },
22    #[error("empty file entry at position {0}")]
23    EmptyFileEntry(usize),
24    #[error("field {0:?} missing its value")]
25    MissingFieldValue(String),
26    #[error("invalid crc32 value: {0:?}")]
27    InvalidCrc32(String),
28    #[error("invalid size value: {0:?}")]
29    InvalidSize(String),
30    #[error("unrecognized non-ignorable field: {0:?}")]
31    NonIgnorableField(String),
32    #[error("unrecognized value {value:?} for field {field:?}")]
33    UnrecognizedFieldValue { field: String, value: String },
34}
35
36/// Parsed contents of a rocks backup meta file (`meta/<id>`).
37///
38/// See [`BackupMeta::parse`] and the
39/// [backup format docs](https://github.com/facebook/rocksdb/wiki/How-to-backup-RocksDB).
40#[derive(Debug, PartialEq)]
41pub struct BackupMeta {
42    pub timestamp: u64,
43    pub sequence_number: u64,
44    pub metadata: Option<String>,
45    pub files: Vec<BackupFile>,
46}
47
48/// Each file entry in a rocks backup meta file
49#[derive(Debug, PartialEq)]
50pub struct BackupFile {
51    /// path is relative to backup root, eg. `shared_checksum/000007_123_456.sst`
52    pub path: String,
53    pub crc32c: Option<u32>,
54    pub size: Option<u64>,
55    pub excluded: bool,
56}
57
58impl BackupMeta {
59    /// Parse a RocksDB backup meta file.
60    ///
61    /// works with schema v1 (implicit) and v2.
62    pub fn parse(content: &str) -> Result<Self, ParseError> {
63        let mut lines = content.lines();
64
65        let first = lines.next().ok_or(ParseError::Empty)?;
66
67        // schema v1: first line is the timestamp.
68        // schema v2 starts with "schema_version 2.x".
69        let ts_line = if let Some(ver_str) = first.strip_prefix("schema_version ") {
70            let Some("2") = ver_str.split('.').next() else {
71                return Err(ParseError::InvalidSchemaVersion(ver_str.to_string()));
72            };
73            lines.next().ok_or(ParseError::MissingTimestamp)?
74        } else {
75            first
76        };
77
78        let timestamp: u64 = ts_line
79            .parse()
80            .map_err(|_| ParseError::InvalidTimestamp(ts_line.to_string()))?;
81
82        let seq_line = lines.next().ok_or(ParseError::MissingSequenceNumber)?;
83        let sequence_number: u64 = seq_line
84            .parse()
85            .map_err(|_| ParseError::InvalidSequenceNumber(seq_line.to_string()))?;
86
87        // optional metadata lines before file count (first purely numeric line)
88        let mut metadata = None;
89        let num_files: usize = loop {
90            let line = lines
91                .next()
92                .ok_or(ParseError::UnexpectedEndBeforeFileCount)?;
93            if let Ok(n) = line.parse::<usize>() {
94                break n;
95            }
96            if let Some(hex) = line.strip_prefix("metadata ") {
97                metadata = Some(hex.to_string());
98            } else if line.starts_with("ni::") {
99                let field = line.split_whitespace().next().unwrap_or(line);
100                return Err(ParseError::NonIgnorableField(field.to_string()));
101            }
102            // safe to skip unknown ignorable (non-"ni::") fields
103        };
104
105        // file entries, finally
106        let mut files = Vec::new();
107        for i in 0..num_files {
108            let line = lines.next().ok_or(ParseError::FileCountMismatch {
109                expected: num_files,
110                actual: i, // 0-indexed so it's last round's count (what we actually did)
111            })?;
112            files.push(parse_file_line(line, i)?);
113        }
114
115        Ok(BackupMeta {
116            timestamp,
117            sequence_number,
118            metadata,
119            files,
120        })
121    }
122}
123
124fn parse_file_line(line: &str, position: usize) -> Result<BackupFile, ParseError> {
125    let mut parts = line.split_whitespace();
126    let path = parts
127        .next()
128        .ok_or(ParseError::EmptyFileEntry(position))?
129        .to_string();
130
131    let mut crc32c = None;
132    let mut size = None;
133    let mut excluded = false;
134
135    while let Some(field_name) = parts.next() {
136        let field_value = parts
137            .next()
138            .ok_or_else(|| ParseError::MissingFieldValue(field_name.to_string()))?;
139
140        match field_name {
141            "crc32" => {
142                crc32c = Some(
143                    field_value
144                        .parse()
145                        .map_err(|_| ParseError::InvalidCrc32(field_value.to_string()))?,
146                );
147            }
148            "size" => {
149                size = Some(
150                    field_value
151                        .parse()
152                        .map_err(|_| ParseError::InvalidSize(field_value.to_string()))?,
153                );
154            }
155            "ni::excluded" => match field_value {
156                "true" => excluded = true,
157                "false" => excluded = false,
158                _ => {
159                    return Err(ParseError::UnrecognizedFieldValue {
160                        field: field_name.to_string(),
161                        value: field_value.to_string(),
162                    });
163                }
164            },
165            "temp" => {}
166            other if other.starts_with("ni::") => {
167                return Err(ParseError::NonIgnorableField(other.to_string()));
168            }
169            _ => {} // unknown non-"ni" fields are safe to ignore
170        }
171    }
172
173    Ok(BackupFile {
174        path,
175        crc32c,
176        size,
177        excluded,
178    })
179}
180
181#[cfg(test)]
182mod tests {
183    use super::*;
184
185    #[test]
186    fn parse_schema_v1() {
187        let content = "\
1881498774076
189590
1903
191private/1/CURRENT crc32 123456
192private/1/MANIFEST-000008 crc32 789012
193shared_checksum/000007_1498774076_590.sst crc32 345678";
194
195        let meta = BackupMeta::parse(content).unwrap();
196        assert_eq!(meta.timestamp, 1498774076);
197        assert_eq!(meta.sequence_number, 590);
198        assert_eq!(meta.files.len(), 3);
199        assert_eq!(meta.files[0].path, "private/1/CURRENT");
200        assert_eq!(meta.files[0].crc32c, Some(123456));
201    }
202
203    #[test]
204    fn parse_schema_v2() {
205        let content = "\
206schema_version 2.1
2071498774076
208590
209metadata 48656c6c6f
2103
211private/1/CURRENT crc32 123456 size 16
212private/1/MANIFEST-000008 crc32 789012 size 1024
213shared_checksum/000007_1498774076_590.sst crc32 345678 size 65536 temp kCold";
214
215        let meta = BackupMeta::parse(content).unwrap();
216        assert_eq!(meta.timestamp, 1498774076);
217        assert_eq!(meta.sequence_number, 590);
218        assert_eq!(meta.metadata.as_deref(), Some("48656c6c6f"));
219        assert_eq!(meta.files.len(), 3);
220        assert_eq!(meta.files[2].size, Some(65536));
221    }
222
223    #[test]
224    fn parse_empty() {
225        assert!(matches!(BackupMeta::parse(""), Err(ParseError::Empty)));
226    }
227
228    #[test]
229    fn parse_unsupported_schema() {
230        let content = "schema_version 3.0\n0\n0\n0\n";
231        assert_eq!(
232            BackupMeta::parse(content),
233            Err(ParseError::InvalidSchemaVersion("3.0".to_string()))
234        );
235    }
236
237    #[test]
238    fn non_ignorable_field_rejected() {
239        let content = "\
2401498774076
241590
2421
243private/1/CURRENT ni::unknown_field true";
244
245        assert!(matches!(
246            BackupMeta::parse(content),
247            Err(ParseError::NonIgnorableField(_))
248        ));
249    }
250
251    #[test]
252    fn bogus_file_count_does_not_allocate() {
253        // Regression: "2\n2\n64406400" claimed 64M files with 0 lines remaining,
254        // causing a multi-GB Vec::with_capacity before the loop could fail.
255        let content = "2\n2\n64406400";
256        assert!(matches!(
257            BackupMeta::parse(content),
258            Err(ParseError::FileCountMismatch {
259                expected: 64406400,
260                actual: 0
261            })
262        ));
263    }
264
265    #[test]
266    fn explicit_schema_v1_rejected() {
267        let content = "schema_version 1.0\n1000\n100\n0\n";
268        assert_eq!(
269            BackupMeta::parse(content),
270            Err(ParseError::InvalidSchemaVersion("1.0".to_string()))
271        );
272    }
273
274    #[test]
275    fn non_ignorable_header_field_rejected() {
276        let content = "\
277schema_version 2.1
2781498774076
279590
280ni::future_breaking_field something
2810";
282        assert!(matches!(
283            BackupMeta::parse(content),
284            Err(ParseError::NonIgnorableField(_))
285        ));
286    }
287
288    #[test]
289    fn unknown_ignorable_header_field_skipped() {
290        let content = "\
291schema_version 2.1
2921498774076
293590
294some_future_field data
2950";
296        let meta = BackupMeta::parse(content).unwrap();
297        assert_eq!(meta.timestamp, 1498774076);
298        assert_eq!(meta.files.len(), 0);
299    }
300
301    #[test]
302    fn duplicate_metadata_uses_last() {
303        let content = "\
304schema_version 2.1
3051498774076
306590
307metadata aaa
308metadata bbb
3090";
310        let meta = BackupMeta::parse(content).unwrap();
311        assert_eq!(meta.metadata.as_deref(), Some("bbb"));
312    }
313
314    #[test]
315    fn file_fields_in_any_order() {
316        let content = "\
3171498774076
318590
3191
320private/1/CURRENT size 16 crc32 123456 temp kHot";
321        let meta = BackupMeta::parse(content).unwrap();
322        assert_eq!(meta.files[0].crc32c, Some(123456));
323        assert_eq!(meta.files[0].size, Some(16));
324    }
325
326    #[test]
327    fn duplicate_file_fields_use_last() {
328        let content = "\
3291498774076
330590
3311
332private/1/CURRENT crc32 111 crc32 222";
333        let meta = BackupMeta::parse(content).unwrap();
334        assert_eq!(meta.files[0].crc32c, Some(222));
335    }
336
337    #[test]
338    fn unknown_ignorable_file_field_skipped() {
339        let content = "\
3401498774076
341590
3421
343private/1/CURRENT crc32 123 future_field value123";
344        let meta = BackupMeta::parse(content).unwrap();
345        assert_eq!(meta.files[0].crc32c, Some(123));
346    }
347
348    #[test]
349    fn excluded_bad_value_rejected() {
350        let content = "\
3511498774076
352590
3531
354private/1/CURRENT ni::excluded banana";
355        assert!(matches!(
356            BackupMeta::parse(content),
357            Err(ParseError::UnrecognizedFieldValue { .. })
358        ));
359    }
360
361    #[test]
362    fn excluded_file_parsed() {
363        let content = "\
364schema_version 2.1
3651498774076
366590
3671
368shared_checksum/000007_123_456.sst crc32 999 ni::excluded true";
369
370        let meta = BackupMeta::parse(content).unwrap();
371        assert!(meta.files[0].excluded);
372    }
373}