1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
use crate::storage::{ResourceStorage, StorageHandle, Stream};

use memmap2::Mmap;
use std::{
    collections::HashMap,
    fs::File,
    io,
    ops::Range,
    path::{Path, PathBuf},
    slice,
    sync::Arc,
};

/// Internal storage of file entries in tar archive.
#[derive(Debug)]
struct MemoryMappedTarArchiveStorage {
    archive_map: Mmap,
    file_ranges: HashMap<PathBuf, Range<usize>>,
}

impl MemoryMappedTarArchiveStorage {
    pub fn new(tar_path: &Path) -> Result<Self, io::Error> {
        let file = File::open(tar_path)?;
        let archive_map = unsafe { Mmap::map(&file)? };
        let mut archive = tar::Archive::new(std::io::Cursor::new(&archive_map[..]));

        let file_ranges = archive
            .entries_with_seek()?
            .map(|entry| {
                let entry = entry?;
                let path = entry.path()?;
                let path = if let Ok(stripped_path) = path.strip_prefix(".") {
                    stripped_path.to_path_buf()
                } else {
                    path.to_path_buf()
                };
                let offset = entry.raw_file_position() as usize;
                let size = entry.size() as usize;
                if entry.header().entry_size()? != entry.size() {
                    // We can only memory-map contiguous files
                    return Err(io::Error::new(
                        io::ErrorKind::InvalidData,
                        "Sparse files are not supported",
                    ));
                }

                Ok((path, offset..offset + size))
            })
            .collect::<Result<HashMap<PathBuf, Range<usize>>, io::Error>>()?;

        Ok(Self {
            archive_map,
            file_ranges,
        })
    }

    pub fn read(&self, path: &Path) -> Option<&[u8]> {
        self.file_ranges.get(path).map(|range| {
            // We cannot prove to Rust that the buffer will live as long as the storage
            // (we never delete mappings), so we need to manually extend lifetime
            let extended_lifetime_archive_map =
                unsafe { slice::from_raw_parts(self.archive_map.as_ptr(), self.archive_map.len()) };

            &extended_lifetime_archive_map[range.clone()]
        })
    }
}

/// Read-only resource storage on disk using a memory mapped tar archive.
///
/// Used to read flatdata archives from a tar archive on disk.
///
/// # Examples
///
/// ```rust,no_run
/// use flatdata::{TarArchiveResourceStorage, Vector};
/// use flatdata::test::X;
///
/// let storage = TarArchiveResourceStorage::new("/root/to/my/archive.tar")
///     .expect("failed to read tar archive");
/// let archive = X::open(storage).expect("failed to open");
/// // read data
/// archive.data();
/// ```
#[derive(Debug)]
pub struct TarArchiveResourceStorage {
    storage: Arc<MemoryMappedTarArchiveStorage>,
    sub_path: PathBuf,
}

impl TarArchiveResourceStorage {
    /// Create a memory mapped tar archive resource storage for a tar archive at a given path.
    pub fn new<P: Into<PathBuf>>(tar_path: P) -> Result<Arc<Self>, io::Error> {
        Ok(Arc::new(Self {
            storage: Arc::new(MemoryMappedTarArchiveStorage::new(&tar_path.into())?),
            sub_path: PathBuf::new(),
        }))
    }
}

impl ResourceStorage for TarArchiveResourceStorage {
    fn subdir(&self, dir: &str) -> StorageHandle {
        Arc::new(Self {
            storage: self.storage.clone(),
            sub_path: self.sub_path.join(dir),
        })
    }

    fn exists(&self, resource_name: &str) -> bool {
        self.storage
            .read(&self.sub_path.join(resource_name))
            .is_some()
    }

    fn read_resource(&self, resource_name: &str) -> Result<&[u8], io::Error> {
        let resource_path = self.sub_path.join(resource_name);
        if let Some(data) = self.storage.read(&resource_path) {
            Ok(data)
        } else {
            Err(io::Error::new(
                io::ErrorKind::NotFound,
                String::from(resource_path.to_str().unwrap_or(resource_name)),
            ))
        }
    }

    fn create_output_stream(&self, _resource_name: &str) -> Result<Box<dyn Stream>, io::Error> {
        Err(io::Error::new(
            io::ErrorKind::Other,
            "Writing to tar archives is not supported",
        ))
    }
}