conserve/
index.rs

1// Conserve backup system.
2// Copyright 2015-2023 Martin Pool.
3
4// This program is free software; you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation; either version 2 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU General Public License for more details.
13
14//! Index lists the files in a band in the archive.
15
16use std::cmp::Ordering;
17use std::iter::Peekable;
18use std::path::Path;
19use std::sync::Arc;
20use std::vec;
21
22use itertools::Itertools;
23use time::OffsetDateTime;
24use tracing::{debug, debug_span, error};
25
26use crate::compress::snappy::{Compressor, Decompressor};
27use crate::counters::Counter;
28use crate::entry::KindMeta;
29use crate::monitor::Monitor;
30use crate::stats::IndexReadStats;
31use crate::transport::local::LocalTransport;
32use crate::unix_time::FromUnixAndNanos;
33use crate::*;
34
35pub const HUNKS_PER_SUBDIR: u32 = 10_000;
36
37/// Description of one archived file.
38///
39/// This struct is directly encoded/decoded to the json index file, and also can be constructed by
40/// stat-ing (but not reading) a live file.
41// GRCOV_EXCLUDE_START
42#[derive(Debug, Clone, Eq, PartialEq, serde::Serialize, serde::Deserialize)]
43pub struct IndexEntry {
44    /// Path of this entry relative to the base of the backup, in `apath` form.
45    pub apath: Apath,
46
47    /// Type of file.
48    pub kind: Kind,
49
50    /// File modification time, in whole seconds past the Unix epoch.
51    #[serde(default)]
52    pub mtime: i64,
53
54    /// Discretionary Access Control permissions (such as read/write/execute on unix)
55    #[serde(default)]
56    pub unix_mode: UnixMode,
57
58    /// User and Group names of the owners of the file
59    #[serde(default, flatten, skip_serializing_if = "Owner::is_none")]
60    pub owner: Owner,
61
62    /// Fractional nanoseconds for modification time.
63    ///
64    /// This is zero in indexes written prior to 0.6.2, but treating it as
65    /// zero is harmless - around the transition files will be seen as
66    /// potentially touched.
67    ///
68    /// It seems moderately common that the nanos are zero, probably because
69    /// the time was set by something that didn't preserve them. In that case,
70    /// skip serializing.
71    #[serde(default)]
72    #[serde(skip_serializing_if = "crate::misc::zero_u32")]
73    pub mtime_nanos: u32,
74
75    /// For stored files, the blocks holding the file contents.
76    #[serde(default)]
77    #[serde(skip_serializing_if = "Vec::is_empty")]
78    pub addrs: Vec<blockdir::Address>,
79
80    /// For symlinks only, the target of the symlink.
81    #[serde(default)]
82    #[serde(skip_serializing_if = "Option::is_none")]
83    pub target: Option<String>,
84}
85// GRCOV_EXCLUDE_STOP
86
87impl From<IndexEntry> for EntryValue {
88    fn from(index_entry: IndexEntry) -> EntryValue {
89        let kind_meta = match index_entry.kind {
90            Kind::File => KindMeta::File {
91                size: index_entry.addrs.iter().map(|a| a.len).sum(),
92            },
93            Kind::Symlink => KindMeta::Symlink {
94                // TODO: Should not be fatal
95                target: index_entry
96                    .target
97                    .expect("symlink entry should have a target"),
98            },
99            Kind::Dir => KindMeta::Dir,
100            Kind::Unknown => KindMeta::Unknown,
101        };
102        EntryValue {
103            apath: index_entry.apath,
104            kind_meta,
105            mtime: OffsetDateTime::from_unix_seconds_and_nanos(
106                index_entry.mtime,
107                index_entry.mtime_nanos,
108            ),
109            unix_mode: index_entry.unix_mode,
110            owner: index_entry.owner,
111        }
112    }
113}
114
115impl EntryTrait for IndexEntry {
116    /// Return apath relative to the top of the tree.
117    fn apath(&self) -> &Apath {
118        &self.apath
119    }
120
121    #[inline]
122    fn kind(&self) -> Kind {
123        self.kind
124    }
125
126    #[inline]
127    fn mtime(&self) -> OffsetDateTime {
128        OffsetDateTime::from_unix_seconds_and_nanos(self.mtime, self.mtime_nanos)
129    }
130
131    /// Size of the file, if it is a file. None for directories and symlinks.
132    fn size(&self) -> Option<u64> {
133        Some(self.addrs.iter().map(|a| a.len).sum())
134    }
135
136    /// Target of the symlink, if this is a symlink.
137    #[inline]
138    fn symlink_target(&self) -> Option<&str> {
139        self.target.as_deref()
140    }
141
142    fn unix_mode(&self) -> UnixMode {
143        self.unix_mode
144    }
145
146    fn owner(&self) -> &Owner {
147        &self.owner
148    }
149}
150
151impl IndexEntry {
152    /// Copy the metadata, but not the body content, from another entry.
153    ///
154    /// The result has no blocks.
155    pub(crate) fn metadata_from(source: &EntryValue) -> IndexEntry {
156        let mtime = source.mtime();
157        assert_eq!(
158            source.symlink_target().is_some(),
159            source.kind() == Kind::Symlink
160        );
161        IndexEntry {
162            apath: source.apath().clone(),
163            kind: source.kind(),
164            addrs: Vec::new(),
165            target: source.symlink_target().map(|t| t.to_owned()),
166            mtime: mtime.unix_timestamp(),
167            mtime_nanos: mtime.nanosecond(),
168            unix_mode: source.unix_mode(),
169            owner: source.owner().to_owned(),
170        }
171    }
172}
173
174/// Write out index hunks.
175///
176/// This class is responsible for: remembering the hunk number, and checking that the
177/// hunks preserve apath order.
178pub struct IndexWriter {
179    /// The `i` directory within the band where all files for this index are written.
180    transport: Arc<dyn Transport>,
181
182    /// Currently queued entries to be written out, in arbitrary order.
183    entries: Vec<IndexEntry>,
184
185    /// Index hunk number, starting at 0.
186    sequence: u32,
187
188    /// Number of hunks actually written.
189    hunks_written: usize,
190
191    /// The last filename from the previous hunk, to enforce ordering. At the
192    /// start of the first hunk this is empty; at the start of a later hunk it's
193    /// the last path from the previous hunk.
194    check_order: apath::DebugCheckOrder,
195
196    compressor: Compressor,
197}
198
199/// Accumulate and write out index entries into files in an index directory.
200impl IndexWriter {
201    /// Make a new builder that will write files into the given directory.
202    pub fn new(transport: Arc<dyn Transport>) -> IndexWriter {
203        IndexWriter {
204            transport,
205            entries: Vec::new(),
206            sequence: 0,
207            hunks_written: 0,
208            check_order: apath::DebugCheckOrder::new(),
209            compressor: Compressor::new(),
210        }
211    }
212
213    /// Finish the last hunk of this index, and return the stats.
214    pub fn finish(mut self, monitor: Arc<dyn Monitor>) -> Result<usize> {
215        self.finish_hunk(monitor)?;
216        Ok(self.hunks_written)
217    }
218
219    /// Write new index entries.
220    ///
221    /// Entries within one hunk may be added in arbitrary order, but they must all
222    /// sort after previously-written content.
223    ///
224    /// The new entry must sort after everything already written to the index.
225    pub(crate) fn push_entry(&mut self, entry: IndexEntry) {
226        self.entries.push(entry);
227    }
228
229    pub(crate) fn append_entries(&mut self, entries: &mut Vec<IndexEntry>) {
230        self.entries.append(entries);
231    }
232
233    /// Finish this hunk of the index.
234    ///
235    /// This writes all the currently queued entries into a new index file
236    /// in the band directory, and then clears the buffer to start receiving
237    /// entries for the next hunk.
238    pub fn finish_hunk(&mut self, monitor: Arc<dyn Monitor>) -> Result<()> {
239        if self.entries.is_empty() {
240            return Ok(());
241        }
242        self.entries.sort_unstable_by(|a, b| {
243            debug_assert!(a.apath != b.apath);
244            a.apath.cmp(&b.apath)
245        });
246        self.check_order.check(&self.entries[0].apath);
247        if self.entries.len() > 1 {
248            self.check_order.check(&self.entries.last().unwrap().apath);
249        }
250        let relpath = hunk_relpath(self.sequence);
251        let json = serde_json::to_vec(&self.entries)?;
252        if (self.sequence % HUNKS_PER_SUBDIR) == 0 {
253            self.transport.create_dir(&subdir_relpath(self.sequence))?;
254        }
255        let compressed_bytes = self.compressor.compress(&json)?;
256        self.transport.write_file(&relpath, &compressed_bytes)?;
257        self.hunks_written += 1;
258        monitor.count(Counter::IndexWrites, 1);
259        monitor.count(Counter::IndexWriteCompressedBytes, compressed_bytes.len());
260        monitor.count(Counter::IndexWriteUncompressedBytes, json.len());
261        self.entries.clear(); // Ready for the next hunk.
262        self.sequence += 1;
263        Ok(())
264    }
265}
266
267/// Return the transport-relative path for a subdirectory.
268fn subdir_relpath(hunk_number: u32) -> String {
269    format!("{:05}", hunk_number / HUNKS_PER_SUBDIR)
270}
271
272/// Return the relative path for a hunk.
273#[mutants::skip] // By default it returns "" which causes a loop. TODO: Avoid the loop.
274fn hunk_relpath(hunk_number: u32) -> String {
275    format!("{:05}/{:09}", hunk_number / HUNKS_PER_SUBDIR, hunk_number)
276}
277
278// TODO: Maybe this isn't adding much on top of the hunk iter?
279#[derive(Debug, Clone)]
280pub struct IndexRead {
281    /// Transport pointing to this index directory.
282    transport: Arc<dyn Transport>,
283}
284
285impl IndexRead {
286    #[allow(unused)]
287    pub(crate) fn open_path(path: &Path) -> IndexRead {
288        IndexRead::open(Arc::new(LocalTransport::new(path)))
289    }
290
291    pub(crate) fn open(transport: Arc<dyn Transport>) -> IndexRead {
292        IndexRead { transport }
293    }
294
295    /// Make an iterator that will return all entries in this band.
296    pub fn iter_entries(self) -> IndexEntryIter<IndexHunkIter> {
297        // TODO: An option to pass in a subtree?
298        IndexEntryIter::new(self.iter_hunks(), Apath::root(), Exclude::nothing())
299    }
300
301    /// Make an iterator that returns hunks of entries from this index.
302    pub fn iter_hunks(&self) -> IndexHunkIter {
303        let _span = debug_span!("iter_hunks", ?self.transport).entered();
304        // All hunk numbers present in all directories.
305        let subdirs = self
306            .transport
307            .list_dir("")
308            .expect("list index dir") // TODO: Don't panic
309            .dirs
310            .into_iter()
311            .sorted()
312            .collect_vec();
313        debug!(?subdirs);
314        let hunks = subdirs
315            .into_iter()
316            .filter_map(|dir| self.transport.list_dir(&dir).ok())
317            .flat_map(|list| list.files)
318            .filter_map(|f| f.parse::<u32>().ok())
319            .sorted()
320            .collect_vec();
321        debug!(?hunks);
322        IndexHunkIter {
323            hunks: hunks.into_iter(),
324            transport: Arc::clone(&self.transport),
325            decompressor: Decompressor::new(),
326            stats: IndexReadStats::default(),
327            after: None,
328        }
329    }
330}
331
332/// Read hunks of entries from a stored index, in apath order.
333///
334/// Each returned item is a vec of (typically up to a thousand) index entries.
335pub struct IndexHunkIter {
336    hunks: std::vec::IntoIter<u32>,
337    /// The `i` directory within the band where all files for this index are written.
338    transport: Arc<dyn Transport>,
339    decompressor: Decompressor,
340    pub stats: IndexReadStats,
341    /// If set, yield only entries ordered after this apath.
342    after: Option<Apath>,
343}
344
345impl Iterator for IndexHunkIter {
346    type Item = Vec<IndexEntry>;
347
348    fn next(&mut self) -> Option<Self::Item> {
349        loop {
350            let hunk_number = self.hunks.next()?;
351            let entries = match self.read_next_hunk(hunk_number) {
352                Ok(None) => return None,
353                Ok(Some(entries)) => entries,
354                Err(err) => {
355                    self.stats.errors += 1;
356                    error!("Error reading index hunk {hunk_number:?}: {err}");
357                    continue;
358                }
359            };
360            if let Some(ref after) = self.after {
361                if let Some(last) = entries.last() {
362                    if last.apath <= *after {
363                        continue;
364                    }
365                }
366                if let Some(first) = entries.first() {
367                    if first.apath > *after {
368                        self.after = None; // don't need to look again
369                        return Some(entries);
370                    }
371                }
372                let idx = match entries.binary_search_by_key(&after, |entry| &entry.apath) {
373                    Ok(idx) => idx + 1, // after the point it was found
374                    Err(idx) => idx,    // from the point it would have been
375                };
376                return Some(Vec::from(&entries[idx..]));
377            }
378            if !entries.is_empty() {
379                return Some(entries);
380            }
381        }
382    }
383}
384
385impl IndexHunkIter {
386    /// Advance self so that it returns only entries with apaths ordered after `apath`.
387    #[must_use]
388    pub fn advance_to_after(self, apath: &Apath) -> Self {
389        IndexHunkIter {
390            after: Some(apath.clone()),
391            ..self
392        }
393    }
394
395    fn read_next_hunk(&mut self, hunk_number: u32) -> Result<Option<Vec<IndexEntry>>> {
396        let path = hunk_relpath(hunk_number);
397        let compressed_bytes = match self.transport.read_file(&path) {
398            Ok(b) => b,
399            Err(err) if err.is_not_found() => {
400                // TODO: Cope with one hunk being missing, while there are still
401                // later-numbered hunks. This would require reading the whole
402                // list of hunks first.
403                return Ok(None);
404            }
405            Err(source) => return Err(Error::Transport { source }),
406        };
407        self.stats.index_hunks += 1;
408        self.stats.compressed_index_bytes += compressed_bytes.len() as u64;
409        let index_bytes = self.decompressor.decompress(&compressed_bytes)?;
410        self.stats.uncompressed_index_bytes += index_bytes.len() as u64;
411        let entries: Vec<IndexEntry> =
412            serde_json::from_slice(&index_bytes).map_err(|source| Error::DeserializeJson {
413                path: path.clone(),
414                source,
415            })?;
416        if entries.is_empty() {
417            // It's legal, it's just weird - and it can be produced by some old Conserve versions.
418        }
419        Ok(Some(entries))
420    }
421}
422
423/// Read out all the entries from a stored index, in apath order.
424// TODO: Maybe fold this into stitch.rs; we'd rarely want them without stitching...
425pub struct IndexEntryIter<HI: Iterator<Item = Vec<IndexEntry>>> {
426    /// Temporarily buffered entries, read from the index files but not yet
427    /// returned to the client.
428    buffered_entries: Peekable<vec::IntoIter<IndexEntry>>,
429    hunk_iter: HI,
430    subtree: Apath,
431    exclude: Exclude,
432}
433
434impl<HI: Iterator<Item = Vec<IndexEntry>>> IndexEntryIter<HI> {
435    pub(crate) fn new(hunk_iter: HI, subtree: Apath, exclude: Exclude) -> Self {
436        IndexEntryIter {
437            buffered_entries: Vec::<IndexEntry>::new().into_iter().peekable(),
438            hunk_iter,
439            subtree,
440            exclude,
441        }
442    }
443}
444
445impl<HI: Iterator<Item = Vec<IndexEntry>>> Iterator for IndexEntryIter<HI> {
446    type Item = IndexEntry;
447
448    fn next(&mut self) -> Option<IndexEntry> {
449        loop {
450            if let Some(entry) = self.buffered_entries.next() {
451                // TODO: We could be smarter about skipping ahead if nothing
452                // in this page matches; or terminating early if we know
453                // nothing else in the index can be under this subtree.
454                if !self.subtree.is_prefix_of(&entry.apath) {
455                    continue;
456                }
457                if self.exclude.matches(&entry.apath) {
458                    continue;
459                }
460                return Some(entry);
461            }
462            if !self.refill_entry_buffer_or_warn() {
463                return None;
464            }
465        }
466    }
467}
468
469impl<HI: Iterator<Item = Vec<IndexEntry>>> IndexEntryIter<HI> {
470    /// Return the entry for given apath, if it is present, otherwise None.
471    /// It follows this will also return None at the end of the index.
472    ///
473    /// After this is called, the iter has skipped forward to this apath,
474    /// discarding entries for any earlier files. However, even if the apath
475    /// is not present, other entries coming after it can still be read.
476    pub fn advance_to(&mut self, apath: &Apath) -> Option<IndexEntry> {
477        // This takes some care because we don't want to consume the entry
478        // that tells us we went too far.
479        loop {
480            if let Some(cand) = self.buffered_entries.peek() {
481                match cand.apath.cmp(apath) {
482                    Ordering::Less => {
483                        // Discard this and continue looking
484                        self.buffered_entries.next().unwrap();
485                    }
486                    Ordering::Equal => {
487                        return Some(self.buffered_entries.next().unwrap());
488                    }
489                    Ordering::Greater => {
490                        // We passed the point where this entry would have been:
491                        return None;
492                    }
493                }
494            } else if !self.refill_entry_buffer_or_warn() {
495                return None;
496            }
497        }
498    }
499
500    /// Read another hunk file and put it into buffered_entries.
501    ///
502    /// Returns true if another hunk could be found, otherwise false.
503    fn refill_entry_buffer_or_warn(&mut self) -> bool {
504        assert!(
505            self.buffered_entries.next().is_none(),
506            "refill_entry_buffer called with non-empty buffer"
507        );
508        if let Some(new_entries) = self.hunk_iter.next() {
509            self.buffered_entries = new_entries.into_iter().peekable();
510            true
511        } else {
512            false
513        }
514    }
515}
516
517#[cfg(test)]
518mod tests {
519    use tempfile::TempDir;
520
521    use crate::monitor::test::TestMonitor;
522
523    use super::*;
524
525    fn setup() -> (TempDir, IndexWriter) {
526        let testdir = TempDir::new().unwrap();
527        let ib = IndexWriter::new(Arc::new(LocalTransport::new(testdir.path())));
528        (testdir, ib)
529    }
530
531    fn sample_entry(apath: &str) -> IndexEntry {
532        IndexEntry {
533            apath: apath.into(),
534            mtime: 1_461_736_377,
535            mtime_nanos: 0,
536            kind: Kind::File,
537            addrs: vec![],
538            target: None,
539            unix_mode: Default::default(),
540            owner: Default::default(),
541        }
542    }
543
544    #[test]
545    fn serialize_index() {
546        let entries = [IndexEntry {
547            apath: "/a/b".into(),
548            mtime: 1_461_736_377,
549            mtime_nanos: 0,
550            kind: Kind::File,
551            addrs: vec![],
552            target: None,
553            unix_mode: Default::default(),
554            owner: Default::default(),
555        }];
556        let index_json = serde_json::to_string(&entries).unwrap();
557        println!("{index_json}");
558        assert_eq!(
559            index_json,
560            "[{\"apath\":\"/a/b\",\
561             \"kind\":\"File\",\
562             \"mtime\":1461736377,\
563             \"unix_mode\":null}]"
564        );
565    }
566
567    #[test]
568    fn index_builder_sorts_entries() {
569        let (_testdir, mut ib) = setup();
570        ib.push_entry(sample_entry("/zzz"));
571        ib.push_entry(sample_entry("/aaa"));
572        ib.finish_hunk(TestMonitor::arc()).unwrap();
573    }
574
575    #[test]
576    #[should_panic]
577    fn index_builder_checks_names() {
578        let (_testdir, mut ib) = setup();
579        ib.push_entry(sample_entry("../escapecat"));
580        ib.finish_hunk(TestMonitor::arc()).unwrap();
581    }
582
583    #[test]
584    #[cfg(debug_assertions)]
585    #[should_panic]
586    fn no_duplicate_paths() {
587        let (_testdir, mut ib) = setup();
588        ib.push_entry(sample_entry("/again"));
589        ib.push_entry(sample_entry("/again"));
590        ib.finish_hunk(TestMonitor::arc()).unwrap();
591    }
592
593    #[test]
594    #[cfg(debug_assertions)]
595    #[should_panic]
596    fn no_duplicate_paths_across_hunks() {
597        let (_testdir, mut ib) = setup();
598        ib.push_entry(sample_entry("/again"));
599        ib.finish_hunk(TestMonitor::arc()).unwrap();
600        ib.push_entry(sample_entry("/again"));
601        ib.finish_hunk(TestMonitor::arc()).unwrap();
602    }
603
604    #[test]
605    fn path_for_hunk() {
606        assert_eq!(super::hunk_relpath(0), "00000/000000000");
607    }
608
609    #[test]
610    fn basic() {
611        let (testdir, mut ib) = setup();
612        let monitor = TestMonitor::arc();
613        ib.append_entries(&mut vec![sample_entry("/apple"), sample_entry("/banana")]);
614        let hunks = ib.finish(monitor.clone()).unwrap();
615        assert_eq!(monitor.get_counter(Counter::IndexWrites), 1);
616
617        assert_eq!(hunks, 1);
618        let counters = monitor.counters();
619        dbg!(&counters);
620        assert!(counters.get(Counter::IndexWriteCompressedBytes) > 30);
621        assert!(counters.get(Counter::IndexWriteCompressedBytes) < 125,);
622        assert!(counters.get(Counter::IndexWriteUncompressedBytes) > 100);
623        assert!(counters.get(Counter::IndexWriteUncompressedBytes) < 250);
624
625        assert!(
626            std::fs::metadata(testdir.path().join("00000").join("000000000"))
627                .unwrap()
628                .is_file(),
629            "Index hunk file not found"
630        );
631
632        let mut it = IndexRead::open_path(testdir.path()).iter_entries();
633        let entry = it.next().expect("Get first entry");
634        assert_eq!(&entry.apath, "/apple");
635        let entry = it.next().expect("Get second entry");
636        assert_eq!(&entry.apath, "/banana");
637        assert!(it.next().is_none(), "Expected no more entries");
638    }
639
640    #[test]
641    fn multiple_hunks() {
642        let (testdir, mut ib) = setup();
643        ib.append_entries(&mut vec![sample_entry("/1.1"), sample_entry("/1.2")]);
644        ib.finish_hunk(TestMonitor::arc()).unwrap();
645        ib.append_entries(&mut vec![sample_entry("/2.1"), sample_entry("/2.2")]);
646        ib.finish_hunk(TestMonitor::arc()).unwrap();
647
648        let index_read = IndexRead::open_path(testdir.path());
649        let it = index_read.iter_entries();
650        let names: Vec<String> = it.map(|x| x.apath.into()).collect();
651        assert_eq!(names, &["/1.1", "/1.2", "/2.1", "/2.2"]);
652
653        // Read it out as hunks.
654        let hunks: Vec<Vec<IndexEntry>> =
655            IndexRead::open_path(testdir.path()).iter_hunks().collect();
656        assert_eq!(hunks.len(), 2);
657        assert_eq!(
658            hunks[0]
659                .iter()
660                .map(|entry| entry.apath())
661                .collect::<Vec<_>>(),
662            vec!["/1.1", "/1.2"]
663        );
664        assert_eq!(
665            hunks[1]
666                .iter()
667                .map(|entry| entry.apath())
668                .collect::<Vec<_>>(),
669            vec!["/2.1", "/2.2"]
670        );
671    }
672
673    #[test]
674    fn iter_hunks_advance_to_after() {
675        let (testdir, mut ib) = setup();
676        ib.append_entries(&mut vec![sample_entry("/1.1"), sample_entry("/1.2")]);
677        ib.finish_hunk(TestMonitor::arc()).unwrap();
678        ib.append_entries(&mut vec![sample_entry("/2.1"), sample_entry("/2.2")]);
679        ib.finish_hunk(TestMonitor::arc()).unwrap();
680
681        let index_read = IndexRead::open_path(testdir.path());
682        let names: Vec<String> = index_read
683            .iter_hunks()
684            .advance_to_after(&"/".into())
685            .flatten()
686            .map(|entry| entry.apath.into())
687            .collect();
688        assert_eq!(names, ["/1.1", "/1.2", "/2.1", "/2.2"]);
689
690        let names: Vec<String> = index_read
691            .iter_hunks()
692            .advance_to_after(&"/nonexistent".into())
693            .flatten()
694            .map(|entry| entry.apath.into())
695            .collect();
696        assert_eq!(names, [""; 0]);
697
698        let names: Vec<String> = index_read
699            .iter_hunks()
700            .advance_to_after(&"/1.1".into())
701            .flatten()
702            .map(|entry| entry.apath.into())
703            .collect();
704        assert_eq!(names, ["/1.2", "/2.1", "/2.2"]);
705
706        let names: Vec<String> = index_read
707            .iter_hunks()
708            .advance_to_after(&"/1.1.1".into())
709            .flatten()
710            .map(|entry| entry.apath.into())
711            .collect();
712        assert_eq!(names, ["/1.2", "/2.1", "/2.2"]);
713
714        let names: Vec<String> = index_read
715            .iter_hunks()
716            .advance_to_after(&"/1.2".into())
717            .flatten()
718            .map(|entry| entry.apath.into())
719            .collect();
720        assert_eq!(names, ["/2.1", "/2.2"]);
721
722        let names: Vec<String> = index_read
723            .iter_hunks()
724            .advance_to_after(&"/1.3".into())
725            .flatten()
726            .map(|entry| entry.apath.into())
727            .collect();
728        assert_eq!(names, ["/2.1", "/2.2"]);
729
730        let names: Vec<String> = index_read
731            .iter_hunks()
732            .advance_to_after(&"/2.0".into())
733            .flatten()
734            .map(|entry| entry.apath.into())
735            .collect();
736        assert_eq!(names, ["/2.1", "/2.2"]);
737
738        let names: Vec<String> = index_read
739            .iter_hunks()
740            .advance_to_after(&"/2.1".into())
741            .flatten()
742            .map(|entry| entry.apath.into())
743            .collect();
744        assert_eq!(names, ["/2.2"]);
745
746        let names: Vec<String> = index_read
747            .iter_hunks()
748            .advance_to_after(&"/2.2".into())
749            .flatten()
750            .map(|entry| entry.apath.into())
751            .collect();
752        assert_eq!(names, [] as [&str; 0]);
753    }
754
755    #[test]
756    fn advance() {
757        let (testdir, mut ib) = setup();
758        ib.push_entry(sample_entry("/bar"));
759        ib.push_entry(sample_entry("/foo"));
760        ib.push_entry(sample_entry("/foobar"));
761        ib.finish_hunk(TestMonitor::arc()).unwrap();
762
763        // Make multiple hunks to test traversal across hunks.
764        ib.push_entry(sample_entry("/g01"));
765        ib.push_entry(sample_entry("/g02"));
766        ib.push_entry(sample_entry("/g03"));
767        ib.finish_hunk(TestMonitor::arc()).unwrap();
768
769        // Advance to /foo and read on from there.
770        let mut it = IndexRead::open_path(testdir.path()).iter_entries();
771        assert_eq!(it.advance_to(&Apath::from("/foo")).unwrap().apath, "/foo");
772        assert_eq!(it.next().unwrap().apath, "/foobar");
773        assert_eq!(it.next().unwrap().apath, "/g01");
774
775        // Advance to before /g01
776        let mut it = IndexRead::open_path(testdir.path()).iter_entries();
777        assert_eq!(it.advance_to(&Apath::from("/fxxx")), None);
778        assert_eq!(it.next().unwrap().apath, "/g01");
779        assert_eq!(it.next().unwrap().apath, "/g02");
780
781        // Advance to before the first entry
782        let mut it = IndexRead::open_path(testdir.path()).iter_entries();
783        assert_eq!(it.advance_to(&Apath::from("/aaaa")), None);
784        assert_eq!(it.next().unwrap().apath, "/bar");
785        assert_eq!(it.next().unwrap().apath, "/foo");
786
787        // Advance to after the last entry
788        let mut it = IndexRead::open_path(testdir.path()).iter_entries();
789        assert_eq!(it.advance_to(&Apath::from("/zz")), None);
790        assert_eq!(it.next(), None);
791    }
792
793    /// Exactly fill the first hunk: there shouldn't be an empty second hunk.
794    ///
795    /// https://github.com/sourcefrog/conserve/issues/95
796    #[test]
797    fn no_final_empty_hunk() -> Result<()> {
798        let (testdir, mut ib) = setup();
799        for i in 0..100_000 {
800            ib.push_entry(sample_entry(&format!("/{i:0>10}")));
801        }
802        ib.finish_hunk(TestMonitor::arc())?;
803        // Think about, but don't actually add some files
804        ib.finish_hunk(TestMonitor::arc())?;
805        let read_index = IndexRead::open_path(testdir.path());
806        assert_eq!(read_index.iter_hunks().count(), 1);
807        Ok(())
808    }
809}