Skip to main content

pglite_oxide/pglite/
data_dir.rs

1use std::collections::BTreeMap;
2use std::fs::{self, File};
3use std::io::{Cursor, Read, Write};
4use std::path::{Component, Path, PathBuf};
5
6use anyhow::{Context, Result, bail};
7use flate2::Compression;
8use flate2::read::GzDecoder;
9use flate2::write::GzEncoder;
10use tar::{Archive, Builder, EntryType, Header};
11
12const PGDATA_OVERLAY_MANIFEST_NAME: &str = ".pglite-oxide-pgdata-overlay.json";
13const RUNTIME_STATE_FILES: &[&str] = &["postmaster.pid", "postmaster.opts"];
14const OVERLAY_WHITEOUT_PREFIX: &str = ".wh.";
15
16/// Compression format for physical PGDATA archives.
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub enum DataDirArchiveFormat {
19    Tar,
20    TarGz,
21}
22
23#[derive(Debug, Clone)]
24enum EntrySource {
25    Directory,
26    File(PathBuf),
27}
28
29pub(crate) fn dump_pgdata_archive(
30    pgdata_upper: &Path,
31    pgdata_lower: Option<&Path>,
32    format: DataDirArchiveFormat,
33) -> Result<Vec<u8>> {
34    let materialized = materialize_pgdata_view(pgdata_upper, pgdata_lower)?;
35    dump_materialized_pgdata_archive(materialized.path(), format)
36}
37
38fn dump_materialized_pgdata_archive(
39    pgdata: &Path,
40    format: DataDirArchiveFormat,
41) -> Result<Vec<u8>> {
42    let mut entries = BTreeMap::<PathBuf, EntrySource>::new();
43    collect_pgdata_entries(pgdata, pgdata, &mut entries)?;
44
45    let mut tar_bytes = Vec::new();
46    {
47        let mut builder = Builder::new(&mut tar_bytes);
48        for (relative, source) in entries {
49            let archive_path = archive_path(&relative);
50            match source {
51                EntrySource::Directory => {
52                    let mut header = Header::new_gnu();
53                    header.set_entry_type(EntryType::Directory);
54                    header.set_mode(0o755);
55                    header.set_mtime(0);
56                    header.set_size(0);
57                    header.set_cksum();
58                    builder
59                        .append_data(&mut header, archive_path, Cursor::new(Vec::<u8>::new()))
60                        .context("append PGDATA directory to archive")?;
61                }
62                EntrySource::File(path) => {
63                    let mut file =
64                        File::open(&path).with_context(|| format!("open {}", path.display()))?;
65                    let size = file
66                        .metadata()
67                        .with_context(|| format!("stat {}", path.display()))?
68                        .len();
69                    let mut header = Header::new_gnu();
70                    header.set_entry_type(EntryType::Regular);
71                    header.set_mode(0o644);
72                    header.set_mtime(0);
73                    header.set_size(size);
74                    header.set_cksum();
75                    builder
76                        .append_data(&mut header, archive_path, &mut file)
77                        .with_context(|| format!("append {}", path.display()))?;
78                }
79            }
80        }
81        builder.finish().context("finish PGDATA tar archive")?;
82    }
83
84    match format {
85        DataDirArchiveFormat::Tar => Ok(tar_bytes),
86        DataDirArchiveFormat::TarGz => {
87            let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
88            encoder
89                .write_all(&tar_bytes)
90                .context("gzip PGDATA archive")?;
91            encoder.finish().context("finish gzipped PGDATA archive")
92        }
93    }
94}
95
96fn materialize_pgdata_view(
97    pgdata_upper: &Path,
98    pgdata_lower: Option<&Path>,
99) -> Result<tempfile::TempDir> {
100    let temp = tempfile::TempDir::new().context("create materialized PGDATA archive view")?;
101    if let Some(lower) = pgdata_lower {
102        copy_pgdata_tree(lower, lower, temp.path(), false)?;
103    }
104    copy_pgdata_tree(pgdata_upper, pgdata_upper, temp.path(), true)?;
105    Ok(temp)
106}
107
108pub(crate) fn unpack_pgdata_archive(bytes: &[u8], destination: &Path) -> Result<()> {
109    let reader: Box<dyn Read> = if bytes.starts_with(&[0x1f, 0x8b]) {
110        Box::new(GzDecoder::new(Cursor::new(bytes)))
111    } else {
112        Box::new(Cursor::new(bytes))
113    };
114    let mut archive = Archive::new(reader);
115    for entry in archive.entries().context("read PGDATA archive entries")? {
116        let mut entry = entry.context("read PGDATA archive entry")?;
117        let path = entry
118            .path()
119            .context("read PGDATA archive entry path")?
120            .into_owned();
121        let relative = normalize_archive_path(&path)?;
122        if relative.as_os_str().is_empty() {
123            continue;
124        }
125        if should_skip_relative(&relative) {
126            continue;
127        }
128        let dest = destination.join(&relative);
129        let entry_type = entry.header().entry_type();
130        if entry_type.is_dir() {
131            fs::create_dir_all(&dest)
132                .with_context(|| format!("create PGDATA directory {}", dest.display()))?;
133            continue;
134        }
135        if !entry_type.is_file() {
136            bail!(
137                "PGDATA archive entry {} has unsupported type {:?}",
138                path.display(),
139                entry_type
140            );
141        }
142        if let Some(parent) = dest.parent() {
143            fs::create_dir_all(parent)
144                .with_context(|| format!("create PGDATA directory {}", parent.display()))?;
145        }
146        entry
147            .unpack(&dest)
148            .with_context(|| format!("unpack PGDATA archive entry {}", path.display()))?;
149    }
150    Ok(())
151}
152
153fn collect_pgdata_entries(
154    root: &Path,
155    current: &Path,
156    entries: &mut BTreeMap<PathBuf, EntrySource>,
157) -> Result<()> {
158    if !current.exists() {
159        return Ok(());
160    }
161    let mut children = fs::read_dir(current)
162        .with_context(|| format!("read PGDATA directory {}", current.display()))?
163        .collect::<std::io::Result<Vec<_>>>()
164        .with_context(|| format!("read PGDATA directory entries {}", current.display()))?;
165    children.sort_by_key(|entry| entry.path());
166
167    for child in children {
168        let path = child.path();
169        let relative = path
170            .strip_prefix(root)
171            .with_context(|| format!("strip PGDATA root {}", root.display()))?
172            .to_path_buf();
173        if should_skip_relative(&relative) {
174            continue;
175        }
176        let file_type = child
177            .file_type()
178            .with_context(|| format!("stat {}", path.display()))?;
179        if file_type.is_dir() {
180            entries.insert(relative.clone(), EntrySource::Directory);
181            collect_pgdata_entries(root, &path, entries)?;
182        } else if file_type.is_file() {
183            entries.insert(relative, EntrySource::File(path));
184        }
185    }
186    Ok(())
187}
188
189fn copy_pgdata_tree(
190    root: &Path,
191    current: &Path,
192    destination_root: &Path,
193    apply_whiteouts: bool,
194) -> Result<()> {
195    if !current.exists() {
196        return Ok(());
197    }
198    let mut children = fs::read_dir(current)
199        .with_context(|| format!("read PGDATA directory {}", current.display()))?
200        .collect::<std::io::Result<Vec<_>>>()
201        .with_context(|| format!("read PGDATA directory entries {}", current.display()))?;
202    children.sort_by_key(|entry| entry.path());
203
204    for child in children {
205        let src = child.path();
206        let relative = src
207            .strip_prefix(root)
208            .with_context(|| format!("strip PGDATA root {}", root.display()))?
209            .to_path_buf();
210        if apply_whiteouts && let Some(target) = whiteout_target_relative(&relative) {
211            let dest = destination_root.join(target);
212            remove_materialized_entry(&dest)?;
213            continue;
214        }
215        if should_skip_relative(&relative) {
216            continue;
217        }
218
219        let dest = destination_root.join(&relative);
220        let file_type = child
221            .file_type()
222            .with_context(|| format!("stat {}", src.display()))?;
223        if file_type.is_dir() {
224            fs::create_dir_all(&dest).with_context(|| {
225                format!("create materialized PGDATA directory {}", dest.display())
226            })?;
227            copy_pgdata_tree(root, &src, destination_root, apply_whiteouts)?;
228        } else if file_type.is_file() {
229            if let Some(parent) = dest.parent() {
230                fs::create_dir_all(parent).with_context(|| {
231                    format!("create materialized PGDATA directory {}", parent.display())
232                })?;
233            }
234            fs::copy(&src, &dest).with_context(|| {
235                format!(
236                    "copy PGDATA archive file {} -> {}",
237                    src.display(),
238                    dest.display()
239                )
240            })?;
241        }
242    }
243    Ok(())
244}
245
246fn remove_materialized_entry(path: &Path) -> Result<()> {
247    match fs::symlink_metadata(path) {
248        Ok(metadata) if metadata.is_dir() => fs::remove_dir_all(path)
249            .with_context(|| format!("remove materialized whiteout directory {}", path.display())),
250        Ok(_) => fs::remove_file(path)
251            .with_context(|| format!("remove materialized whiteout file {}", path.display())),
252        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
253        Err(err) => Err(err)
254            .with_context(|| format!("stat materialized whiteout target {}", path.display())),
255    }
256}
257
258fn should_skip_relative(relative: &Path) -> bool {
259    relative == Path::new(PGDATA_OVERLAY_MANIFEST_NAME)
260        || whiteout_target_relative(relative).is_some()
261        || RUNTIME_STATE_FILES
262            .iter()
263            .any(|name| relative == Path::new(name))
264}
265
266fn whiteout_target_relative(relative: &Path) -> Option<PathBuf> {
267    let file_name = relative.file_name()?.to_string_lossy();
268    let target_file_name = file_name.strip_prefix(OVERLAY_WHITEOUT_PREFIX)?;
269    let mut target = relative.to_path_buf();
270    target.set_file_name(target_file_name);
271    Some(target)
272}
273
274fn archive_path(relative: &Path) -> String {
275    relative.to_string_lossy().replace('\\', "/")
276}
277
278fn normalize_archive_path(path: &Path) -> Result<PathBuf> {
279    let mut dest = PathBuf::new();
280    for component in path.components() {
281        match component {
282            Component::RootDir | Component::CurDir => {}
283            Component::Normal(part) => dest.push(part),
284            Component::ParentDir | Component::Prefix(_) => {
285                bail!("unsafe PGDATA archive path {}", path.display())
286            }
287        }
288    }
289    Ok(dest)
290}
291
292#[cfg(test)]
293mod tests {
294    use super::*;
295    use std::collections::BTreeSet;
296
297    #[test]
298    fn pgdata_archive_applies_overlay_whiteouts() -> Result<()> {
299        let temp = tempfile::TempDir::new()?;
300        let lower = temp.path().join("lower");
301        let upper = temp.path().join("upper");
302        fs::create_dir_all(lower.join("base/1/tree"))?;
303        fs::create_dir_all(upper.join("base/1"))?;
304        fs::write(lower.join("base/1/deleted"), b"lower-deleted")?;
305        fs::write(lower.join("base/1/kept"), b"lower-kept")?;
306        fs::write(lower.join("base/1/tree/child"), b"lower-child")?;
307        fs::write(upper.join("base/1/.wh.deleted"), b"")?;
308        fs::write(upper.join("base/1/.wh.tree"), b"")?;
309
310        let archive = dump_pgdata_archive(&upper, Some(&lower), DataDirArchiveFormat::Tar)?;
311        let entries = archive_entries(&archive)?;
312
313        assert!(entries.contains("base/1/kept"));
314        assert!(!entries.contains("base/1/deleted"));
315        assert!(!entries.contains("base/1/tree"));
316        assert!(!entries.contains("base/1/tree/child"));
317        assert!(!entries.iter().any(|entry| entry.contains(".wh.")));
318        Ok(())
319    }
320
321    #[test]
322    fn pgdata_archive_keeps_upper_file_recreated_after_whiteout() -> Result<()> {
323        let temp = tempfile::TempDir::new()?;
324        let lower = temp.path().join("lower");
325        let upper = temp.path().join("upper");
326        fs::create_dir_all(lower.join("base/1"))?;
327        fs::create_dir_all(upper.join("base/1"))?;
328        fs::write(lower.join("base/1/recreated"), b"lower")?;
329        fs::write(upper.join("base/1/.wh.recreated"), b"")?;
330        fs::write(upper.join("base/1/recreated"), b"upper")?;
331
332        let archive = dump_pgdata_archive(&upper, Some(&lower), DataDirArchiveFormat::Tar)?;
333        let mut unpacked = Archive::new(Cursor::new(archive));
334        let mut found = false;
335        for entry in unpacked.entries()? {
336            let mut entry = entry?;
337            let path = entry.path()?.into_owned();
338            if normalize_archive_path(&path)? == Path::new("base/1/recreated") {
339                let mut contents = Vec::new();
340                entry.read_to_end(&mut contents)?;
341                assert_eq!(contents, b"upper");
342                found = true;
343            }
344        }
345        assert!(found, "expected recreated upper file in archive");
346        Ok(())
347    }
348
349    fn archive_entries(bytes: &[u8]) -> Result<BTreeSet<String>> {
350        let mut archive = Archive::new(Cursor::new(bytes));
351        let mut paths = BTreeSet::new();
352        for entry in archive.entries()? {
353            let entry = entry?;
354            let path = entry.path()?.into_owned();
355            paths.insert(archive_path(&normalize_archive_path(&path)?));
356        }
357        Ok(paths)
358    }
359}