pk2_sync/
fs.rs

1//! File structs representing file entries inside a pk2 archive.
2use std::hash::Hash;
3use std::io::{self, Cursor, Read, Seek, SeekFrom, Write};
4use std::path::{Path, PathBuf};
5use std::time::SystemTime;
6
7use pk2::block_chain::PackBlockChain;
8use pk2::chain_index::ChainIndex;
9use pk2::entry::{NonEmptyEntry, PackEntry};
10use pk2::{ChainOffset, StreamOffset};
11
12use crate::{Lock, LockChoice, Pk2};
13
14/// A readable file entry in a pk2 archive.
15pub struct File<'pk2, Buffer, L: LockChoice> {
16    archive: &'pk2 Pk2<Buffer, L>,
17    /// The chain this file resides in
18    chain: ChainOffset,
19    /// The index of this file in the chain
20    entry_index: usize,
21    /// Relative seek pos to the file entry offset
22    seek_pos: u64,
23}
24
25impl<Buffer, L: LockChoice> Copy for File<'_, Buffer, L> {}
26impl<Buffer, L: LockChoice> Clone for File<'_, Buffer, L> {
27    fn clone(&self) -> Self {
28        *self
29    }
30}
31
32impl<'pk2, Buffer, L: LockChoice> File<'pk2, Buffer, L> {
33    pub(super) fn new(
34        archive: &'pk2 Pk2<Buffer, L>,
35        chain: ChainOffset,
36        entry_index: usize,
37    ) -> Self {
38        File { archive, chain, entry_index, seek_pos: 0 }
39    }
40
41    pub fn modify_time(&self) -> Option<SystemTime> {
42        self.entry().modify_time.into_systime()
43    }
44
45    pub fn access_time(&self) -> Option<SystemTime> {
46        self.entry().access_time.into_systime()
47    }
48
49    pub fn create_time(&self) -> Option<SystemTime> {
50        self.entry().create_time.into_systime()
51    }
52
53    pub fn size(&self) -> u32 {
54        self.entry().file_data().unwrap().1
55    }
56
57    fn pos_data(&self) -> StreamOffset {
58        self.entry().file_data().unwrap().0
59    }
60
61    pub fn name(&self) -> &'pk2 str {
62        self.entry().name()
63    }
64
65    fn entry(&self) -> &'pk2 NonEmptyEntry {
66        self.archive
67            .chain_index
68            .get_entry(self.chain, self.entry_index)
69            .and_then(PackEntry::as_non_empty)
70            .expect("invalid file object")
71    }
72
73    fn remaining_len(&self) -> usize {
74        (self.size() as u64).saturating_sub(self.seek_pos) as usize
75    }
76}
77
78impl<Buffer, L: LockChoice> Seek for File<'_, Buffer, L> {
79    fn seek(&mut self, seek: SeekFrom) -> io::Result<u64> {
80        let size = self.size() as u64;
81        seek_impl(seek, self.seek_pos, size).inspect(|&new_pos| {
82            self.seek_pos = new_pos;
83        })
84    }
85}
86
87impl<Buffer, L> Read for File<'_, Buffer, L>
88where
89    Buffer: Read + Seek,
90    L: LockChoice,
91{
92    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
93        let pos_data = self.pos_data();
94        let rem_len = self.remaining_len();
95        let len = buf.len().min(rem_len);
96        let n = self.archive.stream.with_lock(|stream| {
97            crate::io::read_at(stream, pos_data + self.seek_pos, &mut buf[..len])
98        })?;
99        self.seek(SeekFrom::Current(n as i64))?;
100        Ok(n)
101    }
102
103    fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
104        let pos_data = self.pos_data();
105        let rem_len = self.remaining_len();
106        if buf.len() > rem_len {
107            Err(io::Error::new(io::ErrorKind::UnexpectedEof, "failed to fill whole buffer"))
108        } else {
109            self.archive.stream.with_lock(|stream| {
110                crate::io::read_exact_at(stream, pos_data + self.seek_pos, buf)
111            })?;
112            self.seek_pos += buf.len() as u64;
113            Ok(())
114        }
115    }
116
117    fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
118        let len = buf.len();
119        let rem_len = self.remaining_len();
120        buf.resize(len + rem_len, 0);
121        self.read_exact(&mut buf[len..]).map(|()| rem_len)
122    }
123}
124
125/// A writable file entry in a pk2 archive.
126pub struct FileMut<'pk2, Buffer, L>
127where
128    Buffer: Write + Read + Seek,
129    L: LockChoice,
130{
131    archive: &'pk2 mut Pk2<Buffer, L>,
132    // the chain this file resides in
133    chain: ChainOffset,
134    // the index of this file in the chain
135    entry_index: usize,
136    data: Cursor<Vec<u8>>,
137    update_modify_time: bool,
138    // if true, don't fetch existing data on read/write - start fresh
139    truncated: bool,
140}
141
142impl<'pk2, Buffer, L> FileMut<'pk2, Buffer, L>
143where
144    Buffer: Read + Write + Seek,
145    L: LockChoice,
146{
147    pub(super) fn new(
148        archive: &'pk2 mut Pk2<Buffer, L>,
149        chain: ChainOffset,
150        entry_index: usize,
151    ) -> Self {
152        FileMut {
153            archive,
154            chain,
155            entry_index,
156            data: Cursor::new(Vec::new()),
157            update_modify_time: true,
158            truncated: false,
159        }
160    }
161
162    /// Creates a new `FileMut` that ignores existing file data.
163    /// Any writes will completely replace the file contents.
164    pub(super) fn new_truncated(
165        archive: &'pk2 mut Pk2<Buffer, L>,
166        chain: ChainOffset,
167        entry_index: usize,
168    ) -> Self {
169        FileMut {
170            archive,
171            chain,
172            entry_index,
173            data: Cursor::new(Vec::new()),
174            update_modify_time: true,
175            truncated: true,
176        }
177    }
178
179    pub fn modify_time(&self) -> Option<SystemTime> {
180        self.entry().modify_time.into_systime()
181    }
182
183    pub fn access_time(&self) -> Option<SystemTime> {
184        self.entry().access_time.into_systime()
185    }
186
187    pub fn create_time(&self) -> Option<SystemTime> {
188        self.entry().create_time.into_systime()
189    }
190
191    pub fn set_modify_time(&mut self, time: SystemTime) {
192        self.update_modify_time = false;
193        self.entry_mut().modify_time = time.into();
194    }
195
196    pub fn set_access_time(&mut self, time: SystemTime) {
197        self.entry_mut().access_time = time.into();
198    }
199
200    pub fn set_create_time(&mut self, time: SystemTime) {
201        self.entry_mut().create_time = time.into();
202    }
203
204    pub fn update_modify_time(&mut self, update_modify_time: bool) {
205        self.update_modify_time = update_modify_time;
206    }
207
208    pub fn copy_file_times<Buffer2, L2: LockChoice>(&mut self, other: &File<'_, Buffer2, L2>) {
209        let this = self.entry_mut();
210        let other = other.entry();
211        this.modify_time = other.modify_time;
212        this.create_time = other.create_time;
213        this.access_time = other.access_time;
214    }
215
216    pub fn size(&self) -> u32 {
217        self.entry().file_data().unwrap().1
218    }
219
220    pub fn name(&self) -> &str {
221        self.entry().name()
222    }
223
224    fn entry(&self) -> &NonEmptyEntry {
225        self.archive
226            .chain_index
227            .get_entry(self.chain, self.entry_index)
228            .and_then(PackEntry::as_non_empty)
229            .expect("invalid file object")
230    }
231
232    fn entry_mut(&mut self) -> &mut NonEmptyEntry {
233        self.archive
234            .chain_index
235            .get_entry_mut(self.chain, self.entry_index)
236            .and_then(PackEntry::as_non_empty_mut)
237            .expect("invalid file object")
238    }
239
240    fn fetch_data(&mut self) -> io::Result<()> {
241        let (pos_data, size) = self.entry().file_data().unwrap();
242        self.data.get_mut().resize(size as usize, 0);
243        self.archive
244            .stream
245            .with_lock(|buffer| crate::io::read_exact_at(buffer, pos_data, self.data.get_mut()))
246    }
247
248    fn try_fetch_data(&mut self) -> io::Result<()> {
249        if !self.truncated && self.data.get_ref().is_empty() && self.size() > 0 {
250            self.fetch_data()
251        } else {
252            Ok(())
253        }
254    }
255}
256
257impl<Buffer, L> Seek for FileMut<'_, Buffer, L>
258where
259    Buffer: Read + Write + Seek,
260    L: LockChoice,
261{
262    fn seek(&mut self, seek: SeekFrom) -> io::Result<u64> {
263        let size = self.data.get_ref().len().max(self.size() as usize) as u64;
264        seek_impl(seek, self.data.position(), size).inspect(|&new_pos| {
265            self.data.set_position(new_pos);
266        })
267    }
268}
269
270impl<Buffer, L> Read for FileMut<'_, Buffer, L>
271where
272    Buffer: Read + Write + Seek,
273    L: LockChoice,
274{
275    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
276        self.try_fetch_data()?;
277        self.data.read(buf)
278    }
279
280    fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
281        self.try_fetch_data()?;
282        self.data.read_exact(buf)
283    }
284
285    fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
286        let len = buf.len();
287        let size = self.data.get_ref().len().max(self.size() as usize);
288        buf.resize(len + size, 0);
289        self.read_exact(&mut buf[len..]).map(|()| size)
290    }
291}
292
293impl<Buffer, L> Write for FileMut<'_, Buffer, L>
294where
295    Buffer: Read + Write + Seek,
296    L: LockChoice,
297{
298    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
299        self.try_fetch_data()?;
300        let len = self.data.get_ref().len();
301        match len.checked_add(buf.len()).map(|new_len| new_len.checked_sub(u32::MAX as usize)) {
302            // data + buf < u32::MAX
303            Some(None | Some(0)) => self.data.write(buf),
304            // data + buf > u32::MAX, truncate buf
305            Some(Some(slice_overflow)) => self.data.write(&buf[..buf.len() - slice_overflow]),
306            // data + buf overflows usize::MAX
307            None => Ok(0),
308        }
309    }
310
311    fn flush(&mut self) -> io::Result<()> {
312        if self.data.get_ref().is_empty() {
313            return Ok(()); // nothing to write
314        }
315        if self.update_modify_time {
316            self.set_modify_time(SystemTime::now());
317        }
318        let chain = self.archive.chain_index.get_mut(self.chain).expect("invalid chain");
319        let entry_offset = chain.stream_offset_for_entry(self.entry_index).expect("invalid entry");
320
321        let entry = chain.get_mut(self.entry_index).expect("invalid entry");
322
323        let data = &self.data.get_ref()[..];
324        debug_assert!(data.len() <= !0u32 as usize);
325        let data_len = data.len() as u32;
326        self.archive.stream.with_lock(|stream| {
327            let (mut pos_data, mut size) = entry.as_non_empty().unwrap().file_data().unwrap();
328            // new unwritten file/more data than what fits, so use a new block
329            if data_len > size {
330                // Append data at the end of the buffer as it no longer fits
331                // This causes fragmentation
332                pos_data = crate::io::append_data(&mut *stream, data)?;
333            } else {
334                // data fits into the previous buffer space
335                crate::io::write_data_at(&mut *stream, pos_data, data)?;
336            }
337            size = data_len;
338            entry.as_non_empty_mut().unwrap().set_file_data(pos_data, size).unwrap();
339            crate::io::write_entry_at(self.archive.blowfish.as_deref(), stream, entry_offset, entry)
340        })
341    }
342}
343
344impl<Buffer, L> Drop for FileMut<'_, Buffer, L>
345where
346    Buffer: Write + Read + Seek,
347    L: LockChoice,
348{
349    fn drop(&mut self) {
350        let _ = self.flush();
351    }
352}
353
354fn seek_impl(seek: SeekFrom, seek_pos: u64, size: u64) -> io::Result<u64> {
355    let (base_pos, offset) = match seek {
356        SeekFrom::Start(n) => {
357            return Ok(n);
358        }
359        SeekFrom::End(n) => (size, n),
360        SeekFrom::Current(n) => (seek_pos, n),
361    };
362    let new_pos = if offset >= 0 {
363        base_pos.checked_add(offset as u64)
364    } else {
365        base_pos.checked_sub((offset.wrapping_neg()) as u64)
366    };
367    match new_pos {
368        Some(n) => Ok(n),
369        None => Err(io::Error::new(
370            io::ErrorKind::InvalidInput,
371            "invalid seek to a negative or overflowing position",
372        )),
373    }
374}
375
376pub enum DirEntry<'pk2, Buffer, L: LockChoice> {
377    Directory(Directory<'pk2, Buffer, L>),
378    File(File<'pk2, Buffer, L>),
379}
380
381impl<Buffer, L: LockChoice> Copy for DirEntry<'_, Buffer, L> {}
382impl<Buffer, L: LockChoice> Clone for DirEntry<'_, Buffer, L> {
383    fn clone(&self) -> Self {
384        *self
385    }
386}
387
388impl<'pk2, Buffer, L: LockChoice> DirEntry<'pk2, Buffer, L> {
389    fn from(
390        entry: &PackEntry,
391        archive: &'pk2 Pk2<Buffer, L>,
392        chain: ChainOffset,
393        idx: usize,
394    ) -> Option<Self> {
395        let entry = entry.as_non_empty()?;
396        if entry.is_file() {
397            Some(DirEntry::File(File::new(archive, chain, idx)))
398        } else {
399            Some(DirEntry::Directory(Directory::new(archive, Some(chain), idx)))
400        }
401    }
402}
403
404/// A directory entry in a pk2 archive.
405pub struct Directory<'pk2, Buffer, L: LockChoice> {
406    archive: &'pk2 Pk2<Buffer, L>,
407    chain: Option<ChainOffset>,
408    entry_index: usize,
409}
410
411impl<Buffer, L: LockChoice> Copy for Directory<'_, Buffer, L> {}
412impl<Buffer, L: LockChoice> Clone for Directory<'_, Buffer, L> {
413    fn clone(&self) -> Self {
414        *self
415    }
416}
417
418impl<'pk2, Buffer, L: LockChoice> Directory<'pk2, Buffer, L> {
419    pub(super) fn new(
420        archive: &'pk2 Pk2<Buffer, L>,
421        chain: Option<ChainOffset>,
422        entry_index: usize,
423    ) -> Self {
424        Directory { archive, chain, entry_index }
425    }
426
427    fn entry(&self) -> &'pk2 NonEmptyEntry {
428        self.archive
429            .chain_index
430            .get_entry(self.chain.unwrap_or(ChainIndex::PK2_ROOT_CHAIN_OFFSET), self.entry_index)
431            .and_then(PackEntry::as_non_empty)
432            .expect("invalid file object")
433    }
434
435    fn pos_children(&self) -> ChainOffset {
436        self.entry().directory_children_offset().unwrap()
437    }
438    // returns the chain this folder represents
439    fn dir_chain(&self, chain: ChainOffset) -> &'pk2 PackBlockChain {
440        self.archive.chain_index.get(chain).expect("invalid dir object")
441    }
442
443    pub fn is_backlink(&self) -> bool {
444        // TODO: Record backlinks in the graph when building the chain index
445        matches!(self.entry().name(), "." | "..")
446    }
447
448    pub fn name(&self) -> &'pk2 str {
449        self.entry().name()
450    }
451
452    pub fn modify_time(&self) -> Option<SystemTime> {
453        self.entry().modify_time.into_systime()
454    }
455
456    pub fn access_time(&self) -> Option<SystemTime> {
457        self.entry().access_time.into_systime()
458    }
459
460    pub fn create_time(&self) -> Option<SystemTime> {
461        self.entry().create_time.into_systime()
462    }
463
464    pub fn open_file(&self, path: &str) -> io::Result<File<'pk2, Buffer, L>> {
465        let (chain, entry_idx, entry) = self
466            .archive
467            .chain_index
468            .resolve_path_to_entry_and_parent(self.pos_children(), path)
469            .map_err(|e| {
470                io::Error::new(
471                    io::ErrorKind::NotFound,
472                    format!("failed to open path {path:?}: {e}"),
473                )
474            })?;
475        Pk2::<Buffer, L>::is_file(entry).map(|_| File::new(self.archive, chain, entry_idx))
476    }
477
478    pub fn open_directory(&self, path: &str) -> io::Result<Directory<'pk2, Buffer, L>> {
479        let (chain, entry_idx, entry) = self
480            .archive
481            .chain_index
482            .resolve_path_to_entry_and_parent(self.pos_children(), path)
483            .map_err(|e| {
484                io::Error::new(
485                    io::ErrorKind::NotFound,
486                    format!("failed to open path {path:?}: {e}"),
487                )
488            })?;
489
490        if entry.as_non_empty().is_some_and(|it| it.is_directory()) {
491            Ok(Directory::new(self.archive, Some(chain), entry_idx))
492        } else {
493            Err(io::Error::new(io::ErrorKind::NotFound, "not a directory"))
494        }
495    }
496
497    pub fn open(&self, path: &str) -> io::Result<DirEntry<'pk2, Buffer, L>> {
498        let (chain, entry_idx, entry) = self
499            .archive
500            .chain_index
501            .resolve_path_to_entry_and_parent(self.pos_children(), path)
502            .map_err(|e| {
503                io::Error::new(
504                    io::ErrorKind::NotFound,
505                    format!("failed to open path {path:?}: {e}"),
506                )
507            })?;
508        DirEntry::from(entry, self.archive, chain, entry_idx)
509            .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "no file or directory found"))
510    }
511
512    /// Invokes cb on every file in this directory and its children
513    /// The callback gets invoked with its relative path to `base` and the file object.
514    // Todo, replace this with a file_paths iterator once generators are stable
515    pub fn for_each_file(
516        &self,
517        mut cb: impl FnMut(&Path, File<'pk2, Buffer, L>) -> io::Result<()>,
518    ) -> io::Result<()> {
519        let mut path = std::path::PathBuf::new();
520
521        #[allow(clippy::type_complexity)]
522        pub fn for_each_file_rec<'pk2, Buffer, L: LockChoice>(
523            path: &mut PathBuf,
524            dir: &Directory<'pk2, Buffer, L>,
525            cb: &mut dyn FnMut(&Path, File<'pk2, Buffer, L>) -> io::Result<()>,
526        ) -> io::Result<()> {
527            for entry in dir.entries() {
528                match entry {
529                    DirEntry::Directory(dir) => {
530                        if dir.is_backlink() {
531                            continue;
532                        }
533                        let name = dir.name();
534                        path.push(name);
535                        for_each_file_rec(path, &dir, cb)?;
536                        path.pop();
537                    }
538                    DirEntry::File(file) => {
539                        path.push(file.name());
540                        cb(path, file)?;
541                        path.pop();
542                    }
543                }
544            }
545            Ok(())
546        }
547
548        for_each_file_rec(&mut path, self, &mut cb)
549    }
550
551    /// Returns an iterator over all files in this directory.
552    pub fn files(&self) -> impl Iterator<Item = File<'pk2, Buffer, L>> + use<'pk2, Buffer, L> {
553        let chain = self.pos_children();
554        let archive = self.archive;
555        self.dir_chain(chain)
556            .entries()
557            .enumerate()
558            .filter(|(_, entry)| entry.is_file())
559            .map(move |(idx, _)| File::new(archive, chain, idx))
560    }
561
562    /// Returns an iterator over all items in this directory including backlinks like `.` and
563    /// `..`.
564    pub fn entries(
565        &self,
566    ) -> impl Iterator<Item = DirEntry<'pk2, Buffer, L>> + use<'pk2, Buffer, L> {
567        let chain = self.pos_children();
568        let archive = self.archive;
569        self.dir_chain(chain)
570            .entries()
571            .enumerate()
572            .filter_map(move |(idx, entry)| DirEntry::from(entry, archive, chain, idx))
573    }
574}
575
576impl<Buffer, L: LockChoice> Hash for Directory<'_, Buffer, L> {
577    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
578        state.write_usize(self.archive as *const _ as usize);
579        self.chain.hash(state);
580        state.write_usize(self.entry_index);
581    }
582}
583
584impl<Buffer, L: LockChoice> Hash for File<'_, Buffer, L> {
585    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
586        state.write_usize(self.archive as *const _ as usize);
587        state.write_u64(self.chain.0.get());
588        state.write_usize(self.entry_index);
589    }
590}
591
592impl<Buffer, L> Hash for FileMut<'_, Buffer, L>
593where
594    Buffer: Read + Write + Seek,
595    L: LockChoice,
596{
597    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
598        state.write_usize(self.archive as *const _ as usize);
599        state.write_u64(self.chain.0.get());
600        state.write_usize(self.entry_index);
601    }
602}