runefs/
index.rs

1use std::{
2    collections::{hash_map, HashMap},
3    fs::{self, File},
4    io::Read,
5    path::Path,
6    slice::Iter,
7};
8
9#[cfg(feature = "serde")]
10use serde::{Deserialize, Serialize};
11
12use crate::{
13    archive::{ArchiveMetadata, ArchiveRef, ARCHIVE_REF_LEN},
14    error::{ParseError, ReadError},
15    Dat2, REFERENCE_TABLE_ID,
16};
17use itertools::izip;
18use nom::{
19    bytes::complete::take,
20    combinator::cond,
21    multi::{many0, many_m_n},
22    number::complete::{be_i32, be_u16, be_u32, be_u8}, Parser,
23};
24
25use crate::codec::{Buffer, Decoded};
26use crate::parse::be_u32_smart;
27
28pub const IDX_PREFIX: &str = "main_file_cache.idx";
29
30/// A list of valid indices.
31#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
32#[derive(Clone, Debug, Default)]
33pub struct Indices(pub(crate) HashMap<u8, Index>);
34
35impl Indices {
36    /// Allocates an [`Index`] for every `.idx` file in the root directory.
37    ///
38    /// # Errors
39    ///
40    /// Constructing this type is quite error prone, it needs to do quite a bit 
41    /// of book-keeping to get its allocation right. However, if the cache is 
42    /// unchanged _and_ in its proper format it will, most likely, succeed.
43    ///
44    /// The primary errors have to do with I/O, in order to read every index 
45    /// successfully it needs a `Dat2` reference and the metadata index.
46    ///
47    /// If an index is found it needs to load its entire contents and parse it, 
48    /// failure at this point is considered a bug.
49    pub fn new<P: AsRef<Path>>(path: P) -> crate::Result<Self> {
50        let path = path.as_ref();
51
52        let ref_index = Index::from_path(
53            REFERENCE_TABLE_ID,
54            path.join(format!("{}{}", IDX_PREFIX, REFERENCE_TABLE_ID)),
55        )?;
56        let dat2 = Dat2::new(path.join(crate::MAIN_DATA))?;
57        let mut indices = HashMap::with_capacity(255);
58
59        for p in fs::read_dir(path)? {
60            let path = p?.path();
61
62            if let Some(ext) = path.extension().and_then(std::ffi::OsStr::to_str)
63                && let Some(index_id) = ext.strip_prefix("idx") {
64                    let index_id: u8 = index_id.parse().expect("invalid extension format");
65                    if index_id == 255 {
66                        continue;
67                    }
68                    let mut index = Index::from_path(index_id, path)?;
69                    let archive_ref = ref_index.archive_refs.get(&(index_id as u32)).ok_or(
70                        ReadError::ArchiveNotFound {
71                            idx: REFERENCE_TABLE_ID,
72                            arc: index_id as u32,
73                        },
74                    )?;
75                    if archive_ref.length != 0 {
76                        index.metadata = dat2.metadata(archive_ref)?;
77                    }
78                    indices.insert(index_id, index);
79                }
80        }
81
82        indices.insert(REFERENCE_TABLE_ID, ref_index);
83
84        Ok(Self(indices))
85    }
86
87    pub fn get(&self, key: &u8) -> Option<&Index> {
88        self.0.get(key)
89    }
90
91    pub fn count(&self) -> usize {
92        self.0.len()
93    }
94}
95
96/// A virtual file type for every `.idx` in the cache directory.
97#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
98#[derive(Clone, Debug, Default)]
99pub struct Index {
100    pub id: u8,
101    pub archive_refs: HashMap<u32, ArchiveRef>,
102    pub metadata: IndexMetadata,
103}
104
105impl Index {
106    /// Creates an `Index` from a file path.
107    ///
108    /// # Panics
109    ///
110    /// When an index is loaded the given id and its file extension are compared, if these mismatch
111    /// it is considered a bug.
112    ///
113    /// # Errors
114    ///
115    /// The primary errors concern I/O where the file couldn't be opened or read.
116    pub fn from_path<P: AsRef<Path>>(id: u8, path: P) -> crate::Result<Self> {
117        let path = path.as_ref();
118        let index_extension = format!("idx{}", id);
119        let extension = path
120            .extension()
121            .and_then(std::ffi::OsStr::to_str)
122            .unwrap_or("nothing");
123
124        if extension != index_extension {
125            panic!("index extension mismatch: expected {index_extension} but found {extension}");
126        }
127
128        let mut file = File::open(path)?;
129        let mut buffer = Vec::with_capacity(file.metadata()?.len() as usize);
130        file.read_to_end(&mut buffer)?;
131
132        Self::from_buffer(id, &buffer)
133    }
134
135    pub(crate) fn from_buffer(id: u8, buffer: &[u8]) -> crate::Result<Self> {
136        let mut archive_refs = HashMap::new();
137
138        for (archive_id, archive_data) in buffer.chunks_exact(ARCHIVE_REF_LEN).enumerate() {
139            let archive_id = archive_id as u32;
140
141            let archive_ref = match ArchiveRef::from_buffer(archive_id, id, archive_data) {
142                Ok(archive) => archive,
143                Err(_) => return Err(ParseError::Archive(archive_id).into()),
144            };
145            archive_refs.insert(archive_id, archive_ref);
146        }
147
148        Ok(Self {
149            id,
150            archive_refs,
151            metadata: IndexMetadata::default(),
152        })
153    }
154}
155
156impl IntoIterator for Indices {
157    type Item = (u8, Index);
158    type IntoIter = hash_map::IntoIter<u8, Index>;
159
160    #[inline]
161    fn into_iter(self) -> Self::IntoIter {
162        self.0.into_iter()
163    }
164}
165
166impl<'a> IntoIterator for &'a Indices {
167    type Item = (&'a u8, &'a Index);
168    type IntoIter = hash_map::Iter<'a, u8, Index>;
169
170    #[inline]
171    fn into_iter(self) -> Self::IntoIter {
172        self.0.iter()
173    }
174}
175
176/// All of the index metadata fetched through `Dat2` from the metadata table.
177#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
178#[derive(Default, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
179pub struct IndexMetadata(Vec<ArchiveMetadata>);
180
181impl IndexMetadata {
182    /// Takes a specific raw metadata buffer and turns it into a `IndexMetadata`. 
183    /// 
184    /// # Errors
185    /// 
186    /// If, for what ever reason, the buffer does not _exactly_ adhere to the correct
187    /// format parsing will fail.
188    pub fn from_buffer(buffer: Buffer<Decoded>) -> crate::Result<Self> {
189        Self::from_slice(buffer.as_slice())
190    }
191
192    pub(crate) fn from_slice(buffer: &[u8]) -> crate::Result<Self> {
193        let (buffer, protocol) = be_u8(buffer)?;
194        // TODO: should actually parse this and add it to the struct
195        let (buffer, _) = cond(protocol >= 6, be_u32).parse(buffer)?;
196        let (buffer, identified, whirlpool, codec, hash) = parse_identified(buffer)?;
197        let (buffer, archive_count) = parse_archive_count(buffer, protocol)?;
198        let (buffer, ids) = parse_ids(buffer, protocol, archive_count)?;
199        let (buffer, name_hashes) = parse_hashes(buffer, identified, archive_count)?;
200        let (buffer, crcs) = many_m_n(0, archive_count, be_u32).parse(buffer)?;
201        let (buffer, hashes) = parse_hashes(buffer, hash, archive_count)?;
202        let (buffer, whirlpools) = parse_whirlpools(buffer, whirlpool, archive_count)?;
203        // skip for now TODO: should also be saved in the struct
204        //let (buffer, compressed, decompressed) = parse_codec(buffer, codec, archive_count)?;
205        let (buffer, _) = cond(codec, many_m_n(0, archive_count * 8, be_u8)).parse(buffer)?;
206        let (buffer, versions) = many_m_n(0, archive_count, be_u32).parse(buffer)?;
207        let (buffer, entry_counts) = parse_entry_counts(buffer, protocol, archive_count)?;
208        let (_, valid_ids) = parse_valid_ids(buffer, protocol, &entry_counts)?;
209        let mut archives = Vec::with_capacity(archive_count);
210        let mut last_archive_id = 0;
211        let archive_data = izip!(
212            ids,
213            name_hashes,
214            crcs,
215            hashes,
216            whirlpools,
217            versions,
218            entry_counts,
219            valid_ids
220        );
221        for (id, name_hash, crc, hash, whirlpool, version, entry_count, valid_ids) in archive_data {
222            last_archive_id += id as i32;
223
224            archives.push(ArchiveMetadata {
225                id: last_archive_id as u32,
226                name_hash,
227                crc,
228                hash,
229                whirlpool,
230                version,
231                entry_count,
232                valid_ids,
233            });
234        }
235        Ok(Self(archives))
236    }
237
238    #[inline]
239    pub fn iter(&self) -> Iter<'_, ArchiveMetadata> {
240        self.0.iter()
241    }
242}
243
244impl std::ops::Index<usize> for IndexMetadata {
245    type Output = ArchiveMetadata;
246
247    fn index(&self, index: usize) -> &Self::Output {
248        &self.0[index]
249    }
250}
251
252impl IntoIterator for IndexMetadata {
253    type Item = ArchiveMetadata;
254    type IntoIter = std::vec::IntoIter<ArchiveMetadata>;
255
256    #[inline]
257    fn into_iter(self) -> Self::IntoIter {
258        self.0.into_iter()
259    }
260}
261
262impl<'a> IntoIterator for &'a IndexMetadata {
263    type Item = &'a ArchiveMetadata;
264    type IntoIter = Iter<'a, ArchiveMetadata>;
265
266    #[inline]
267    fn into_iter(self) -> Self::IntoIter {
268        self.0.iter()
269    }
270}
271
272fn parse_identified(buffer: &[u8]) -> crate::Result<(&[u8], bool, bool, bool, bool)> {
273    let (buffer, identified) = be_u8(buffer)?;
274
275    let whirlpool = (2 & identified) != 0;
276    let codec = (identified & 4) != 0;
277    let hash = (identified & 8) != 0;
278    let identified = (1 & identified) != 0;
279
280    Ok((buffer, identified, whirlpool, codec, hash))
281}
282
283fn parse_hashes(
284    buffer: &[u8],
285    hash: bool,
286    archive_count: usize,
287) -> crate::Result<(&[u8], Vec<i32>)> {
288    let (buffer, taken) = cond(hash, take(archive_count * 4)).parse(buffer)?;
289    let (_, mut hashes) = many0(be_i32).parse(taken.unwrap_or(&[]))?;
290
291    if hashes.len() != archive_count {
292        hashes = vec![0; archive_count * 4];
293    }
294
295    Ok((buffer, hashes))
296}
297
298fn parse_whirlpools(
299    buffer: &[u8],
300    whirlpool: bool,
301    archive_count: usize,
302) -> crate::Result<(&[u8], Vec<[u8; 64]>)> {
303    let (buffer, taken) = cond(whirlpool, take(archive_count * 64)).parse(buffer)?;
304    let mut whirlpools = vec![[0; 64]; archive_count];
305
306    for (index, chunk) in taken.unwrap_or(&[]).chunks_exact(64).enumerate() {
307        whirlpools[index].copy_from_slice(chunk);
308    }
309    if whirlpools.len() != archive_count {
310        whirlpools = vec![[0; 64]; archive_count];
311    }
312
313    Ok((buffer, whirlpools))
314}
315
316// fn parse_codec(buffer: &[u8], codec: bool, archive_count: usize) -> crate::Result<(&[u8], Vec<u32>, Vec<u32>)> {
317//     todo!()
318// }
319
320fn parse_valid_ids<'a>(
321    mut buffer: &'a [u8],
322    protocol: u8,
323    entry_counts: &[usize],
324) -> crate::Result<(&'a [u8], Vec<Vec<u32>>)> {
325    let mut result = Vec::with_capacity(entry_counts.len());
326
327    for entry_count in entry_counts {
328        let (buf, id_modifiers) = if protocol >= 7 {
329            many_m_n(0, *entry_count, be_u32_smart).parse(buffer)?
330        } else {
331            let (buf, result) = many_m_n(0, *entry_count, be_u16).parse(buffer)?;
332            let result = result.iter().map(|&id_mod| id_mod as u32).collect();
333
334            (buf, result)
335        };
336        buffer = buf;
337
338        let mut ids = Vec::with_capacity(id_modifiers.len());
339        let mut id = 0_u32;
340        for current_id in id_modifiers {
341            id += current_id;
342            ids.push(id);
343        }
344
345        result.push(ids);
346    }
347
348    Ok((buffer, result))
349}
350
351fn parse_archive_count(buffer: &[u8], protocol: u8) -> crate::Result<(&[u8], usize)> {
352    let (buffer, value) = if protocol >= 7 {
353        be_u32_smart(buffer)?
354    } else {
355        let (buf, res) = be_u16(buffer)?;
356        (buf, res as u32)
357    };
358
359    Ok((buffer, value as usize))
360}
361
362fn parse_ids(
363    buffer: &[u8],
364    protocol: u8,
365    archive_count: usize,
366) -> crate::Result<(&[u8], Vec<u32>)> {
367    let (buffer, ids) = if protocol >= 7 {
368        many_m_n(0, archive_count, be_u32_smart).parse(buffer)?
369    } else {
370        let (buf, res) = many_m_n(0, archive_count, be_u16).parse(buffer)?;
371        let res = res.iter().map(|&ec| ec as u32).collect();
372        (buf, res)
373    };
374
375    Ok((buffer, ids))
376}
377
378fn parse_entry_counts(
379    buffer: &[u8],
380    protocol: u8,
381    archive_count: usize,
382) -> crate::Result<(&[u8], Vec<usize>)> {
383    let (buffer, entry_counts) = if protocol >= 7 {
384        many_m_n(0, archive_count, be_u32_smart).parse(buffer)?
385    } else {
386        let (buf, res) = many_m_n(0, archive_count, be_u16).parse(buffer)?;
387        let res = res.iter().map(|&ec| ec as u32).collect();
388
389        (buf, res)
390    };
391
392    let entry_counts: Vec<usize> = entry_counts
393        .iter()
394        .map(|&entry_count| entry_count as usize)
395        .collect();
396
397    Ok((buffer, entry_counts))
398}