1#[derive(Debug, PartialEq, thiserror::Error)]
3pub enum ParseError {
4 #[error("empty meta file")]
5 Empty,
6 #[error("could not parse schema version from {0:?} (v1 and v2 supported)")]
7 InvalidSchemaVersion(String),
8 #[error("missing timestamp line")]
9 MissingTimestamp,
10 #[error("invalid timestamp: {0:?}")]
11 InvalidTimestamp(String),
12 #[error("missing sequence number line")]
13 MissingSequenceNumber,
14 #[error("invalid sequence number: {0:?}")]
15 InvalidSequenceNumber(String),
16 #[error("unexpected end of meta file while looking for file count")]
17 UnexpectedEndBeforeFileCount,
18 #[error("invalid file count: {0:?}")]
19 InvalidFileCount(String),
20 #[error("expected {expected} file entries but file ended after {actual}")]
21 FileCountMismatch { expected: usize, actual: usize },
22 #[error("empty file entry at position {0}")]
23 EmptyFileEntry(usize),
24 #[error("field {0:?} missing its value")]
25 MissingFieldValue(String),
26 #[error("invalid crc32 value: {0:?}")]
27 InvalidCrc32(String),
28 #[error("invalid size value: {0:?}")]
29 InvalidSize(String),
30 #[error("unrecognized non-ignorable field: {0:?}")]
31 NonIgnorableField(String),
32 #[error("unrecognized value {value:?} for field {field:?}")]
33 UnrecognizedFieldValue { field: String, value: String },
34}
35
36#[derive(Debug, PartialEq)]
41pub struct BackupMeta {
42 pub timestamp: u64,
43 pub sequence_number: u64,
44 pub metadata: Option<String>,
45 pub files: Vec<BackupFile>,
46}
47
48#[derive(Debug, PartialEq)]
50pub struct BackupFile {
51 pub path: String,
53 pub crc32c: Option<u32>,
54 pub size: Option<u64>,
55 pub excluded: bool,
56}
57
58impl BackupMeta {
59 pub fn parse(content: &str) -> Result<Self, ParseError> {
63 let mut lines = content.lines();
64
65 let first = lines.next().ok_or(ParseError::Empty)?;
66
67 let ts_line = if let Some(ver_str) = first.strip_prefix("schema_version ") {
70 let Some("2") = ver_str.split('.').next() else {
71 return Err(ParseError::InvalidSchemaVersion(ver_str.to_string()));
72 };
73 lines.next().ok_or(ParseError::MissingTimestamp)?
74 } else {
75 first
76 };
77
78 let timestamp: u64 = ts_line
79 .parse()
80 .map_err(|_| ParseError::InvalidTimestamp(ts_line.to_string()))?;
81
82 let seq_line = lines.next().ok_or(ParseError::MissingSequenceNumber)?;
83 let sequence_number: u64 = seq_line
84 .parse()
85 .map_err(|_| ParseError::InvalidSequenceNumber(seq_line.to_string()))?;
86
87 let mut metadata = None;
89 let num_files: usize = loop {
90 let line = lines
91 .next()
92 .ok_or(ParseError::UnexpectedEndBeforeFileCount)?;
93 if let Ok(n) = line.parse::<usize>() {
94 break n;
95 }
96 if let Some(hex) = line.strip_prefix("metadata ") {
97 metadata = Some(hex.to_string());
98 } else if line.starts_with("ni::") {
99 let field = line.split_whitespace().next().unwrap_or(line);
100 return Err(ParseError::NonIgnorableField(field.to_string()));
101 }
102 };
104
105 let mut files = Vec::new();
107 for i in 0..num_files {
108 let line = lines.next().ok_or(ParseError::FileCountMismatch {
109 expected: num_files,
110 actual: i, })?;
112 files.push(parse_file_line(line, i)?);
113 }
114
115 Ok(BackupMeta {
116 timestamp,
117 sequence_number,
118 metadata,
119 files,
120 })
121 }
122}
123
124fn parse_file_line(line: &str, position: usize) -> Result<BackupFile, ParseError> {
125 let mut parts = line.split_whitespace();
126 let path = parts
127 .next()
128 .ok_or(ParseError::EmptyFileEntry(position))?
129 .to_string();
130
131 let mut crc32c = None;
132 let mut size = None;
133 let mut excluded = false;
134
135 while let Some(field_name) = parts.next() {
136 let field_value = parts
137 .next()
138 .ok_or_else(|| ParseError::MissingFieldValue(field_name.to_string()))?;
139
140 match field_name {
141 "crc32" => {
142 crc32c = Some(
143 field_value
144 .parse()
145 .map_err(|_| ParseError::InvalidCrc32(field_value.to_string()))?,
146 );
147 }
148 "size" => {
149 size = Some(
150 field_value
151 .parse()
152 .map_err(|_| ParseError::InvalidSize(field_value.to_string()))?,
153 );
154 }
155 "ni::excluded" => match field_value {
156 "true" => excluded = true,
157 "false" => excluded = false,
158 _ => {
159 return Err(ParseError::UnrecognizedFieldValue {
160 field: field_name.to_string(),
161 value: field_value.to_string(),
162 });
163 }
164 },
165 "temp" => {}
166 other if other.starts_with("ni::") => {
167 return Err(ParseError::NonIgnorableField(other.to_string()));
168 }
169 _ => {} }
171 }
172
173 Ok(BackupFile {
174 path,
175 crc32c,
176 size,
177 excluded,
178 })
179}
180
181#[cfg(test)]
182mod tests {
183 use super::*;
184
185 #[test]
186 fn parse_schema_v1() {
187 let content = "\
1881498774076
189590
1903
191private/1/CURRENT crc32 123456
192private/1/MANIFEST-000008 crc32 789012
193shared_checksum/000007_1498774076_590.sst crc32 345678";
194
195 let meta = BackupMeta::parse(content).unwrap();
196 assert_eq!(meta.timestamp, 1498774076);
197 assert_eq!(meta.sequence_number, 590);
198 assert_eq!(meta.files.len(), 3);
199 assert_eq!(meta.files[0].path, "private/1/CURRENT");
200 assert_eq!(meta.files[0].crc32c, Some(123456));
201 }
202
203 #[test]
204 fn parse_schema_v2() {
205 let content = "\
206schema_version 2.1
2071498774076
208590
209metadata 48656c6c6f
2103
211private/1/CURRENT crc32 123456 size 16
212private/1/MANIFEST-000008 crc32 789012 size 1024
213shared_checksum/000007_1498774076_590.sst crc32 345678 size 65536 temp kCold";
214
215 let meta = BackupMeta::parse(content).unwrap();
216 assert_eq!(meta.timestamp, 1498774076);
217 assert_eq!(meta.sequence_number, 590);
218 assert_eq!(meta.metadata.as_deref(), Some("48656c6c6f"));
219 assert_eq!(meta.files.len(), 3);
220 assert_eq!(meta.files[2].size, Some(65536));
221 }
222
223 #[test]
224 fn parse_empty() {
225 assert!(matches!(BackupMeta::parse(""), Err(ParseError::Empty)));
226 }
227
228 #[test]
229 fn parse_unsupported_schema() {
230 let content = "schema_version 3.0\n0\n0\n0\n";
231 assert_eq!(
232 BackupMeta::parse(content),
233 Err(ParseError::InvalidSchemaVersion("3.0".to_string()))
234 );
235 }
236
237 #[test]
238 fn non_ignorable_field_rejected() {
239 let content = "\
2401498774076
241590
2421
243private/1/CURRENT ni::unknown_field true";
244
245 assert!(matches!(
246 BackupMeta::parse(content),
247 Err(ParseError::NonIgnorableField(_))
248 ));
249 }
250
251 #[test]
252 fn bogus_file_count_does_not_allocate() {
253 let content = "2\n2\n64406400";
256 assert!(matches!(
257 BackupMeta::parse(content),
258 Err(ParseError::FileCountMismatch {
259 expected: 64406400,
260 actual: 0
261 })
262 ));
263 }
264
265 #[test]
266 fn explicit_schema_v1_rejected() {
267 let content = "schema_version 1.0\n1000\n100\n0\n";
268 assert_eq!(
269 BackupMeta::parse(content),
270 Err(ParseError::InvalidSchemaVersion("1.0".to_string()))
271 );
272 }
273
274 #[test]
275 fn non_ignorable_header_field_rejected() {
276 let content = "\
277schema_version 2.1
2781498774076
279590
280ni::future_breaking_field something
2810";
282 assert!(matches!(
283 BackupMeta::parse(content),
284 Err(ParseError::NonIgnorableField(_))
285 ));
286 }
287
288 #[test]
289 fn unknown_ignorable_header_field_skipped() {
290 let content = "\
291schema_version 2.1
2921498774076
293590
294some_future_field data
2950";
296 let meta = BackupMeta::parse(content).unwrap();
297 assert_eq!(meta.timestamp, 1498774076);
298 assert_eq!(meta.files.len(), 0);
299 }
300
301 #[test]
302 fn duplicate_metadata_uses_last() {
303 let content = "\
304schema_version 2.1
3051498774076
306590
307metadata aaa
308metadata bbb
3090";
310 let meta = BackupMeta::parse(content).unwrap();
311 assert_eq!(meta.metadata.as_deref(), Some("bbb"));
312 }
313
314 #[test]
315 fn file_fields_in_any_order() {
316 let content = "\
3171498774076
318590
3191
320private/1/CURRENT size 16 crc32 123456 temp kHot";
321 let meta = BackupMeta::parse(content).unwrap();
322 assert_eq!(meta.files[0].crc32c, Some(123456));
323 assert_eq!(meta.files[0].size, Some(16));
324 }
325
326 #[test]
327 fn duplicate_file_fields_use_last() {
328 let content = "\
3291498774076
330590
3311
332private/1/CURRENT crc32 111 crc32 222";
333 let meta = BackupMeta::parse(content).unwrap();
334 assert_eq!(meta.files[0].crc32c, Some(222));
335 }
336
337 #[test]
338 fn unknown_ignorable_file_field_skipped() {
339 let content = "\
3401498774076
341590
3421
343private/1/CURRENT crc32 123 future_field value123";
344 let meta = BackupMeta::parse(content).unwrap();
345 assert_eq!(meta.files[0].crc32c, Some(123));
346 }
347
348 #[test]
349 fn excluded_bad_value_rejected() {
350 let content = "\
3511498774076
352590
3531
354private/1/CURRENT ni::excluded banana";
355 assert!(matches!(
356 BackupMeta::parse(content),
357 Err(ParseError::UnrecognizedFieldValue { .. })
358 ));
359 }
360
361 #[test]
362 fn excluded_file_parsed() {
363 let content = "\
364schema_version 2.1
3651498774076
366590
3671
368shared_checksum/000007_123_456.sst crc32 999 ni::excluded true";
369
370 let meta = BackupMeta::parse(content).unwrap();
371 assert!(meta.files[0].excluded);
372 }
373}