ddup_bak/
lib.rs

1use flate2::{read::GzDecoder, write::GzEncoder};
2use positioned_io::ReadAt;
3#[cfg(unix)]
4use std::os::unix::fs::PermissionsExt;
5use std::{
6    fmt::{Debug, Formatter},
7    fs::{DirEntry, File, Permissions},
8    io::{Read, Seek, SeekFrom, Write},
9};
10
11pub const FILE_SIGNATURE: &[u8] = b"DDUPBAK\0";
12
13#[derive(Debug, Clone, Copy)]
14pub enum CompressionFormat {
15    None,
16    Gzip,
17}
18
19impl CompressionFormat {
20    pub fn encode(&self) -> u8 {
21        match self {
22            CompressionFormat::None => 0,
23            CompressionFormat::Gzip => 1,
24        }
25    }
26
27    pub fn decode(value: u8) -> Self {
28        match value {
29            0 => CompressionFormat::None,
30            1 => CompressionFormat::Gzip,
31            _ => panic!("Invalid compression format"),
32        }
33    }
34}
35
36pub struct VarInt(u64);
37
38impl VarInt {
39    pub fn new(value: u64) -> Self {
40        VarInt(value)
41    }
42
43    pub fn encode(&self) -> Vec<u8> {
44        let mut value = self.0;
45        let mut buffer = Vec::new();
46
47        while value > 0x7F {
48            buffer.push((value & 0x7F) as u8 | 0x80);
49            value >>= 7;
50        }
51
52        buffer.push(value as u8);
53
54        buffer
55    }
56
57    pub fn decode<S: Read>(stream: &mut S) -> Result<u64, std::io::Error> {
58        let mut value = 0;
59        let mut shift = 0;
60
61        loop {
62            let mut byte = [0; 1];
63            stream.read_exact(&mut byte)?;
64
65            value |= ((byte[0] & 0x7F) as u64) << shift;
66            if byte[0] & 0x80 == 0 {
67                break;
68            }
69            shift += 7;
70        }
71
72        Ok(value)
73    }
74}
75
76fn encode_file_permissions(permissions: Permissions) -> u32 {
77    #[cfg(unix)]
78    {
79        permissions.mode()
80    }
81    #[cfg(windows)]
82    {
83        if permissions.readonly() { 1 } else { 0 }
84    }
85}
86
87fn decode_file_permissions(mode: u32) -> Permissions {
88    #[cfg(unix)]
89    {
90        Permissions::from_mode(mode)
91    }
92    #[cfg(windows)]
93    {
94        let mut permissions = unsafe { std::mem::zeroed::<Permissions>() };
95        if mode == 1 {
96            permissions.set_readonly(true);
97        } else {
98            permissions.set_readonly(false);
99        }
100
101        permissions
102    }
103}
104
105pub struct FileEntry {
106    pub name: String,
107    pub mode: Permissions,
108
109    file: File,
110    decoder: Option<GzDecoder<File>>,
111    size: u64,
112    offset: u64,
113    consumed: u64,
114    compression: CompressionFormat,
115}
116
117impl Debug for FileEntry {
118    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
119        f.debug_struct("FileEntry")
120            .field("name", &self.name)
121            .field("mode", &self.mode)
122            .field("size", &self.size)
123            .field("offset", &self.offset)
124            .field("compression", &self.compression)
125            .finish()
126    }
127}
128
129impl Read for FileEntry {
130    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
131        if self.consumed >= self.size {
132            return Ok(0);
133        }
134
135        let remaining = self.size - self.consumed;
136
137        match self.compression {
138            CompressionFormat::Gzip => {
139                if self.decoder.is_none() {
140                    self.file
141                        .seek(SeekFrom::Start(self.offset + self.consumed))?;
142                    let decoder = GzDecoder::new(self.file.try_clone()?);
143                    self.decoder = Some(decoder);
144                }
145
146                let decoder = self.decoder.as_mut().unwrap();
147                let bytes_read = decoder.read(buf)?;
148
149                if bytes_read > remaining as usize {
150                    self.consumed += remaining;
151                    return Ok(remaining as usize);
152                }
153
154                self.consumed += bytes_read as u64;
155                Ok(bytes_read)
156            }
157            CompressionFormat::None => {
158                let bytes_read = self.file.read_at(self.offset + self.consumed, buf)?;
159
160                if bytes_read > remaining as usize {
161                    self.consumed += remaining;
162                    return Ok(remaining as usize);
163                }
164
165                self.consumed += bytes_read as u64;
166                Ok(bytes_read)
167            }
168        }
169    }
170}
171
172#[derive(Debug)]
173pub struct DirectoryEntry {
174    pub name: String,
175    pub mode: Permissions,
176    pub entries: Vec<Entry>,
177}
178
179#[derive(Debug)]
180pub struct SymlinkEntry {
181    pub name: String,
182    pub mode: Permissions,
183    pub target: String,
184    pub target_dir: bool,
185}
186
187#[derive(Debug)]
188pub enum Entry {
189    File(Box<FileEntry>),
190    Directory(DirectoryEntry),
191    Symlink(SymlinkEntry),
192}
193
194impl Entry {
195    /// Returns the name of the entry.
196    /// This is the name of the file or directory, not the full path.
197    /// For example, if the entry is under `path/to/file.txt`, this will return `file.txt`.
198    pub fn name(&self) -> &str {
199        match self {
200            Entry::File(entry) => &entry.name,
201            Entry::Directory(entry) => &entry.name,
202            Entry::Symlink(entry) => &entry.name,
203        }
204    }
205
206    /// Returns the mode of the entry.
207    /// This is the file permissions of the entry.
208    pub fn mode(&self) -> Permissions {
209        match self {
210            Entry::File(entry) => entry.mode.clone(),
211            Entry::Directory(entry) => entry.mode.clone(),
212            Entry::Symlink(entry) => entry.mode.clone(),
213        }
214    }
215}
216
217pub struct Archive {
218    file: File,
219
220    entries: Vec<Entry>,
221    entries_offset: u64,
222}
223
224impl Debug for Archive {
225    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
226        f.debug_struct("Archive")
227            .field("entries", &self.entries)
228            .finish()
229    }
230}
231
232type ProgressCallback = Option<fn(&std::path::PathBuf)>;
233
234impl Archive {
235    /// Creates a new archive file.
236    /// The file signature is written to the beginning of the file.
237    /// The file is truncated to 0 bytes.
238    pub fn new(mut file: File) -> Self {
239        file.set_len(0).unwrap();
240        file.write_all(FILE_SIGNATURE).unwrap();
241        file.sync_all().unwrap();
242
243        Self {
244            file,
245            entries: Vec::new(),
246            entries_offset: 8,
247        }
248    }
249
250    /// Opens an existing archive file for reading and writing.
251    /// This will not overwrite the file, but append to it.
252    pub fn open(path: &str) -> Result<Self, std::io::Error> {
253        let mut file = File::open(path)?;
254        let len = file.metadata()?.len();
255
256        let mut buffer = [0; 8];
257        file.read_exact(&mut buffer)?;
258        if buffer != FILE_SIGNATURE {
259            return Err(std::io::Error::new(
260                std::io::ErrorKind::InvalidData,
261                "Invalid file signature",
262            ));
263        }
264
265        file.read_exact_at(len - 16, &mut buffer)?;
266        let entries_count = u64::from_le_bytes(buffer);
267        file.read_exact_at(len - 8, &mut buffer)?;
268        let entries_offset = u64::from_le_bytes(buffer);
269
270        println!("Entries count: {}", entries_count);
271        println!("Entries offset: {}", entries_offset);
272
273        let mut entries = Vec::with_capacity(entries_count as usize);
274        file.seek(SeekFrom::Start(entries_offset))?;
275
276        let mut decoder = GzDecoder::new(file.try_clone()?);
277        for _ in 0..entries_count {
278            let file_clone = file.try_clone()?;
279            let entry = Self::decode_entry(&mut decoder, file_clone)?;
280            entries.push(entry);
281        }
282
283        Ok(Self {
284            file,
285            entries,
286            entries_offset,
287        })
288    }
289
290    /// Adds all files in the given directory to the archive. (including subdirectories)
291    /// This will append the directory to the end of the archive, if this directory already exists, it will not be replaced.
292    ///
293    /// After this function is called, the existing header will be trimmed to the end of the archive, then readded upon completion.
294    ///
295    /// # Panics
296    /// This function will panic if any filename is not valid UTF-8 or longer than 255 bytes.
297    pub fn add_directory(
298        &mut self,
299        path: &str,
300        progress: ProgressCallback,
301    ) -> Result<(), std::io::Error> {
302        self.trim_end_header()?;
303
304        for entry in std::fs::read_dir(path)?.flatten() {
305            self.encode_entry(None, entry, progress)?;
306        }
307
308        self.write_end_header()?;
309
310        Ok(())
311    }
312
313    /// Returns the entries in the archive.
314    pub fn entries(&self) -> &[Entry] {
315        &self.entries
316    }
317
318    /// Consumes the archive and returns the entries.
319    pub fn into_entries(self) -> Vec<Entry> {
320        self.entries
321    }
322
323    /// Adds a single file entry to the archive. (including subdirectories)
324    /// This will append the entry to the end of the archive, if this entry already exists, it will not be replaced.
325    ///
326    /// After this function is called, the existing header will be trimmed to the end of the archive, then readded upon completion.
327    ///
328    /// # Panics
329    /// This function will panic if any filename is not valid UTF-8 or longer than 255 bytes.
330    pub fn add_entry(
331        &mut self,
332        entry: DirEntry,
333        progress: ProgressCallback,
334    ) -> Result<(), std::io::Error> {
335        self.trim_end_header()?;
336        self.encode_entry(None, entry, progress)?;
337
338        self.write_end_header()?;
339
340        Ok(())
341    }
342
343    fn trim_end_header(&mut self) -> Result<(), std::io::Error> {
344        if self.entries_offset == 0 {
345            return Ok(());
346        }
347
348        self.file.set_len(self.entries_offset)?;
349
350        Ok(())
351    }
352
353    fn write_end_header(&mut self) -> Result<(), std::io::Error> {
354        let mut encoder = GzEncoder::new(&mut self.file, flate2::Compression::default());
355        for entry in &self.entries {
356            Self::encode_entry_metadata(&mut encoder, entry)?;
357        }
358
359        encoder.finish()?;
360
361        self.file
362            .write_all(&(self.entries.len() as u64).to_le_bytes())?;
363        self.file.write_all(&self.entries_offset.to_le_bytes())?;
364        self.file.sync_all()?;
365
366        Ok(())
367    }
368
369    fn encode_entry_metadata<S: Write>(
370        writer: &mut S,
371        entry: &Entry,
372    ) -> Result<(), std::io::Error> {
373        let name = entry.name();
374        let name_length = name.len() as u8;
375
376        let mut buffer = Vec::with_capacity(1 + name.len() + 4);
377
378        buffer.push(name_length);
379        buffer.extend_from_slice(name.as_bytes());
380
381        let mode = encode_file_permissions(entry.mode());
382        let compression = match entry {
383            Entry::File(file_entry) => file_entry.compression,
384            _ => CompressionFormat::None,
385        };
386        let entry_type = match entry {
387            Entry::File(_) => 0,
388            Entry::Directory(_) => 1,
389            Entry::Symlink(_) => 2,
390        };
391
392        let type_compression_mode =
393            (entry_type << 30) | ((compression.encode() as u32) << 26) | (mode & 0x3FFFFFFF);
394        buffer.extend_from_slice(&type_compression_mode.to_le_bytes()[..4]);
395
396        writer.write_all(&buffer)?;
397
398        match entry {
399            Entry::File(file_entry) => {
400                writer.write_all(VarInt::new(file_entry.size).encode().as_slice())?;
401                writer.write_all(VarInt::new(file_entry.offset).encode().as_slice())?;
402            }
403            Entry::Directory(dir_entry) => {
404                writer.write_all(
405                    VarInt::new(dir_entry.entries.len() as u64)
406                        .encode()
407                        .as_slice(),
408                )?;
409
410                for sub_entry in &dir_entry.entries {
411                    Self::encode_entry_metadata(writer, sub_entry)?;
412                }
413            }
414            Entry::Symlink(link_entry) => {
415                writer.write_all(
416                    VarInt::new(link_entry.target.len() as u64)
417                        .encode()
418                        .as_slice(),
419                )?;
420                writer.write_all(link_entry.target.as_bytes())?;
421                writer.write_all(&[link_entry.target_dir as u8])?;
422            }
423        }
424
425        Ok(())
426    }
427
428    fn encode_entry(
429        &mut self,
430        entries: Option<&mut Vec<Entry>>,
431        fs_entry: DirEntry,
432        progress: ProgressCallback,
433    ) -> Result<(), std::io::Error> {
434        let path = fs_entry.path();
435        if let Some(f) = progress {
436            f(&path)
437        }
438
439        let file_name = path.file_name().unwrap().to_string_lossy();
440        let metadata = path.symlink_metadata()?;
441
442        if metadata.is_file() {
443            let mut file = File::open(&path)?;
444            let mut buffer = vec![0; 1024 * 1024];
445            let mut bytes_read = file.read(&mut buffer)?;
446
447            let compression = if metadata.len() > 16 {
448                CompressionFormat::Gzip
449            } else {
450                CompressionFormat::None
451            };
452            let entry = FileEntry {
453                name: file_name.to_string(),
454                mode: metadata.permissions(),
455                file: self.file.try_clone()?,
456                decoder: None,
457                size: metadata.len(),
458                offset: self.entries_offset,
459                consumed: 0,
460                compression,
461            };
462
463            if let Some(entries) = entries {
464                entries.push(Entry::File(Box::new(entry)));
465            } else {
466                self.entries.push(Entry::File(Box::new(entry)));
467            }
468
469            match compression {
470                CompressionFormat::Gzip => {
471                    let mut encoder =
472                        GzEncoder::new(&mut self.file, flate2::Compression::default());
473                    loop {
474                        encoder.write_all(&buffer[..bytes_read])?;
475
476                        bytes_read = file.read(&mut buffer)?;
477                        if bytes_read == 0 {
478                            break;
479                        }
480                    }
481                    encoder.finish()?;
482
483                    self.entries_offset = self.file.stream_position()?;
484                }
485                CompressionFormat::None => loop {
486                    self.file.write_all(&buffer[..bytes_read])?;
487                    self.entries_offset += bytes_read as u64;
488
489                    bytes_read = file.read(&mut buffer)?;
490                    if bytes_read == 0 {
491                        break;
492                    }
493                },
494            }
495        } else if metadata.is_dir() {
496            let mut dir_entries = Vec::new();
497            for entry in std::fs::read_dir(&path)?.flatten() {
498                self.encode_entry(Some(&mut dir_entries), entry, progress)?;
499            }
500
501            let dir_entry = DirectoryEntry {
502                name: file_name.to_string(),
503                mode: metadata.permissions(),
504                entries: dir_entries,
505            };
506
507            if let Some(entries) = entries {
508                entries.push(Entry::Directory(dir_entry));
509            } else {
510                self.entries.push(Entry::Directory(dir_entry));
511            }
512        } else if metadata.is_symlink() {
513            let target = std::fs::read_link(&path)?;
514            let target = target.to_string_lossy().to_string();
515
516            let link_entry = SymlinkEntry {
517                name: file_name.to_string(),
518                mode: metadata.permissions(),
519                target,
520                target_dir: std::fs::metadata(&path)?.is_dir(),
521            };
522
523            if let Some(entries) = entries {
524                entries.push(Entry::Symlink(link_entry));
525            } else {
526                self.entries.push(Entry::Symlink(link_entry));
527            }
528        }
529
530        Ok(())
531    }
532
533    fn decode_entry<S: Read>(decoder: &mut S, file: File) -> Result<Entry, std::io::Error> {
534        let mut name_length = [0; 1];
535        decoder.read_exact(&mut name_length)?;
536        let name_length = name_length[0] as usize;
537
538        let mut name_bytes = vec![0; name_length];
539        decoder.read_exact(&mut name_bytes)?;
540        let name = String::from_utf8(name_bytes).unwrap();
541
542        let mut type_mode_bytes = [0; 4];
543        decoder.read_exact(&mut type_mode_bytes)?;
544        let type_compression_mode = u32::from_le_bytes(type_mode_bytes);
545
546        let entry_type = (type_compression_mode >> 30) & 0b11;
547        let compression = CompressionFormat::decode(((type_compression_mode >> 26) & 0b1111) as u8);
548        let mode = decode_file_permissions(type_compression_mode & 0x3FFFFFFF);
549        let size = VarInt::decode(decoder)?;
550
551        match entry_type {
552            0 => {
553                let offset = VarInt::decode(decoder)?;
554
555                Ok(Entry::File(Box::new(FileEntry {
556                    name,
557                    mode,
558                    file,
559                    decoder: None,
560                    size,
561                    offset,
562                    consumed: 0,
563                    compression,
564                })))
565            }
566            1 => {
567                let mut entries: Vec<Entry> = Vec::with_capacity(size as usize);
568                for _ in 0..size {
569                    let entry = Self::decode_entry(decoder, file.try_clone()?)?;
570                    entries.push(entry);
571                }
572
573                Ok(Entry::Directory(DirectoryEntry {
574                    name,
575                    mode,
576                    entries,
577                }))
578            }
579            2 => {
580                let mut target_bytes = vec![0; size as usize];
581                decoder.read_exact(&mut target_bytes)?;
582
583                let target = String::from_utf8(target_bytes).unwrap();
584                let target = std::path::PathBuf::from(target);
585
586                let target = target
587                    .canonicalize()
588                    .unwrap_or_else(|_| target.clone())
589                    .to_string_lossy()
590                    .to_string();
591                let metadata = std::fs::metadata(&target)?;
592
593                Ok(Entry::Symlink(SymlinkEntry {
594                    name,
595                    mode,
596                    target,
597                    target_dir: metadata.is_dir(),
598                }))
599            }
600            _ => panic!("Unsupported entry type"),
601        }
602    }
603}