flatdata/
tarstorage.rs

1use crate::storage::{ResourceStorage, StorageHandle, Stream};
2
3use memmap2::Mmap;
4use std::{
5    collections::HashMap,
6    fs::File,
7    io,
8    ops::Range,
9    path::{Path, PathBuf},
10    slice,
11    sync::Arc,
12};
13
14/// Internal storage of file entries in tar archive.
15#[derive(Debug)]
16struct MemoryMappedTarArchiveStorage {
17    archive_map: Mmap,
18    file_ranges: HashMap<PathBuf, Range<usize>>,
19}
20
21impl MemoryMappedTarArchiveStorage {
22    pub fn new(tar_path: &Path) -> Result<Self, io::Error> {
23        let file = File::open(tar_path)?;
24        let archive_map = unsafe { Mmap::map(&file)? };
25        let mut archive = tar::Archive::new(std::io::Cursor::new(&archive_map[..]));
26
27        let file_ranges = archive
28            .entries_with_seek()?
29            .map(|entry| {
30                let entry = entry?;
31                let path = entry.path()?;
32                let path = if let Ok(stripped_path) = path.strip_prefix(".") {
33                    stripped_path.to_path_buf()
34                } else {
35                    path.to_path_buf()
36                };
37                let offset = entry.raw_file_position() as usize;
38                let size = entry.size() as usize;
39                if entry.header().entry_size()? != entry.size() {
40                    // We can only memory-map contiguous files
41                    return Err(io::Error::new(
42                        io::ErrorKind::InvalidData,
43                        "Sparse files are not supported",
44                    ));
45                }
46
47                Ok((path, offset..offset + size))
48            })
49            .collect::<Result<HashMap<PathBuf, Range<usize>>, io::Error>>()?;
50
51        Ok(Self {
52            archive_map,
53            file_ranges,
54        })
55    }
56
57    pub fn read(&self, path: &Path) -> Option<&[u8]> {
58        self.file_ranges.get(path).map(|range| {
59            // We cannot prove to Rust that the buffer will live as long as the storage
60            // (we never delete mappings), so we need to manually extend lifetime
61            let extended_lifetime_archive_map =
62                unsafe { slice::from_raw_parts(self.archive_map.as_ptr(), self.archive_map.len()) };
63
64            &extended_lifetime_archive_map[range.clone()]
65        })
66    }
67}
68
69/// Read-only resource storage on disk using a memory mapped tar archive.
70///
71/// Used to read flatdata archives from a tar archive on disk.
72///
73/// # Examples
74///
75/// ```rust,no_run
76/// use flatdata::{TarArchiveResourceStorage, Vector};
77/// use flatdata::test::X;
78///
79/// let storage = TarArchiveResourceStorage::new("/root/to/my/archive.tar")
80///     .expect("failed to read tar archive");
81/// let archive = X::open(storage).expect("failed to open");
82/// // read data
83/// archive.data();
84/// ```
85#[derive(Debug)]
86pub struct TarArchiveResourceStorage {
87    storage: Arc<MemoryMappedTarArchiveStorage>,
88    sub_path: PathBuf,
89}
90
91impl TarArchiveResourceStorage {
92    /// Create a memory mapped tar archive resource storage for a tar archive at a given path.
93    pub fn new<P: Into<PathBuf>>(tar_path: P) -> Result<Arc<Self>, io::Error> {
94        Ok(Arc::new(Self {
95            storage: Arc::new(MemoryMappedTarArchiveStorage::new(&tar_path.into())?),
96            sub_path: PathBuf::new(),
97        }))
98    }
99}
100
101impl ResourceStorage for TarArchiveResourceStorage {
102    fn subdir(&self, dir: &str) -> StorageHandle {
103        Arc::new(Self {
104            storage: self.storage.clone(),
105            sub_path: self.sub_path.join(dir),
106        })
107    }
108
109    fn exists(&self, resource_name: &str) -> bool {
110        self.storage
111            .read(&self.sub_path.join(resource_name))
112            .is_some()
113    }
114
115    fn read_resource(&self, resource_name: &str) -> Result<&[u8], io::Error> {
116        let resource_path = self.sub_path.join(resource_name);
117        if let Some(data) = self.storage.read(&resource_path) {
118            Ok(data)
119        } else {
120            Err(io::Error::new(
121                io::ErrorKind::NotFound,
122                String::from(resource_path.to_str().unwrap_or(resource_name)),
123            ))
124        }
125    }
126
127    fn create_output_stream(&self, _resource_name: &str) -> Result<Box<dyn Stream>, io::Error> {
128        Err(io::Error::new(
129            io::ErrorKind::Other,
130            "Writing to tar archives is not supported",
131        ))
132    }
133}