assystem/
lib.rs

1use std::{
2    io::{Read, Seek, SeekFrom, Write},
3    rc::Rc,
4};
5/*
6Memory structure: file header, root node, blocks
7
8"File header" is just the name of the file format
9
10Node structure: false branch data pos (nul?), true branch data pos (nul?), content block pos (nul?)
11
12Block structure: prev block pos (nul? only in first block), block length, next block pos (nul?)
13
14The first block must be present and empty
15*/
16
17const FILE_HEADER: [u8; 7] = *b"ASS v1\0";
18mod offsets {
19    pub mod r#static {
20        pub const ROOT_NODE: u64 = super::super::FILE_HEADER.len() as u64;
21        pub const BLOCKS: u64 = ROOT_NODE + super::super::sizes::NODE;
22    }
23
24    pub mod node {
25        pub const FALSE_BRANCH_DATA_POS: u64 = 0;
26        pub const TRUE_BRANCH_DATA_POS: u64 = FALSE_BRANCH_DATA_POS + 8;
27        pub const CONTENT_BLOCK_POS: u64 = TRUE_BRANCH_DATA_POS + 8;
28    }
29
30    pub mod block {
31        pub const PREV_BLOCK_POS: u64 = 0;
32        pub const BLOCK_LENGTH: u64 = PREV_BLOCK_POS + 8;
33        pub const NEXT_BLOCK_POS: u64 = BLOCK_LENGTH + 8;
34    }
35}
36mod sizes {
37    pub const BLOCK: u64 = super::offsets::block::NEXT_BLOCK_POS + 8;
38    pub const NODE: u64 = super::offsets::node::CONTENT_BLOCK_POS + 8;
39}
40
41type DataPosition = u64;
42type BlockPosition = u64;
43
44fn bits<'a>(bytes: &'a [u8]) -> impl Iterator<Item = bool> + 'a {
45    struct BitIter<'a> {
46        bytes: std::slice::Iter<'a, u8>,
47        mask: u8,
48        curbyte: u8,
49    }
50    impl<'a> Iterator for BitIter<'a> {
51        type Item = bool;
52
53        fn next(&mut self) -> Option<Self::Item> {
54            if self.mask == 0b1000_0000 {
55                self.curbyte = *self.bytes.next()?;
56            }
57            let result = self.mask & self.curbyte != 0;
58            self.mask = self.mask.rotate_right(1);
59            Some(result)
60        }
61    }
62    BitIter {
63        bytes: bytes.iter(),
64        mask: 0b1000_0000,
65        curbyte: 0,
66    }
67}
68
69struct TaskSource {
70    ref_: Rc<Task>,
71    is_true_branch: bool,
72}
73struct Task {
74    prev: Option<TaskSource>,
75    node_pos: u64,
76}
77impl Task {
78    fn gather_key(&self) -> Vec<u8> {
79        let mut result = Vec::new();
80
81        let mut curbyte: u8 = 0;
82        let mut mask: u8 = 0b0000_0001;
83
84        let mut cur_prev = &self.prev;
85
86        while let Some(prev) = cur_prev {
87            if prev.is_true_branch {
88                curbyte |= mask;
89            }
90            if mask == 0b1000_0000 {
91                result.push(curbyte);
92                curbyte = 0;
93            }
94            mask = mask.rotate_left(1);
95            cur_prev = &prev.ref_.prev;
96        }
97
98        result.reverse();
99        result
100    }
101}
102
103pub struct Lister<'a, F> {
104    ass: &'a mut ASS<F>,
105    tasks: Vec<Task>,
106}
107impl<'a, F: ASSFile> Iterator for Lister<'a, F> {
108    type Item = (Vec<u8>, Vec<u8>);
109
110    fn next(&mut self) -> Option<Self::Item> {
111        loop {
112            let task = self.tasks.pop()?;
113            self.ass.file.seek(SeekFrom::Start(task.node_pos)).unwrap();
114            let task = Rc::new(task);
115            for is_true_branch in [false, true] {
116                let branch_data_pos = self.ass.read_u64();
117                if branch_data_pos != DATA_DOES_NOT_EXIST_POS {
118                    self.tasks.push(Task {
119                        node_pos: branch_data_pos,
120                        prev: Some(TaskSource {
121                            ref_: task.clone(),
122                            is_true_branch,
123                        }),
124                    });
125                }
126            }
127            let content_block_pos = self.ass.read_u64();
128            if content_block_pos != DATA_DOES_NOT_EXIST_POS {
129                let key = task.gather_key();
130                let value = self.ass.read_block(content_block_pos);
131                return Some((key, value));
132            }
133        }
134    }
135}
136
137pub trait ASSFile: Write + Read + Seek {
138    /// Truncates the file at its current position
139    fn truncate(&mut self) -> std::io::Result<()>;
140}
141impl ASSFile for std::io::Cursor<Vec<u8>> {
142    fn truncate(&mut self) -> std::io::Result<()> {
143        let curpos = self.seek(SeekFrom::Current(0)).unwrap();
144        self.get_mut().truncate(curpos.try_into().unwrap());
145        Ok(())
146    }
147}
148impl ASSFile for std::fs::File {
149    fn truncate(&mut self) -> std::io::Result<()> {
150        let curpos = self.seek(SeekFrom::Current(0)).unwrap();
151        self.set_len(curpos)
152    }
153}
154
155const EMPTY_VALUE_BLOCK_POS: u64 = 1;
156const DATA_DOES_NOT_EXIST_POS: u64 = 0;
157
158pub struct ASS<F> {
159    file: F,
160}
161impl<F: ASSFile> ASS<F> {
162    fn write_u64(&mut self, index: u64) {
163        self.file.write_all(&index.to_be_bytes()).unwrap();
164    }
165    fn read_u64(&mut self) -> u64 {
166        let mut result = [0u8; 8];
167        self.file.read_exact(&mut result).unwrap();
168        u64::from_be_bytes(result)
169    }
170    fn alloc(&mut self, data: &[u8]) -> BlockPosition {
171        if data.len() == 0 {
172            return EMPTY_VALUE_BLOCK_POS;
173        }
174        let data_len: u64 = data.len().try_into().unwrap();
175        self.file
176            .seek(SeekFrom::Start(offsets::r#static::BLOCKS))
177            .unwrap();
178        loop {
179            let _prev_block_pos = self.read_u64();
180            let block_length = self.read_u64();
181            let next_block_pos = self.read_u64();
182            let data_pos = self.tell();
183            if next_block_pos != DATA_DOES_NOT_EXIST_POS {
184                let free_space_length = (next_block_pos - data_pos) - block_length;
185                if free_space_length < data_len + sizes::BLOCK {
186                    self.file.seek(SeekFrom::Start(next_block_pos)).unwrap();
187                    continue;
188                }
189            }
190            let existing_block_pos = data_pos - sizes::BLOCK;
191            self.file
192                .seek_relative(block_length.try_into().unwrap())
193                .unwrap();
194            self.write_u64(existing_block_pos);
195            self.write_u64(data_len);
196            self.write_u64(next_block_pos);
197            self.file.write_all(&data).unwrap();
198            self.file
199                .seek(SeekFrom::Start(
200                    existing_block_pos + offsets::block::NEXT_BLOCK_POS,
201                ))
202                .unwrap();
203            let new_block_pos = data_pos + block_length;
204            self.write_u64(new_block_pos);
205            if next_block_pos != DATA_DOES_NOT_EXIST_POS {
206                self.file
207                    .seek(SeekFrom::Start(
208                        next_block_pos + offsets::block::PREV_BLOCK_POS,
209                    ))
210                    .unwrap();
211                self.write_u64(new_block_pos);
212            }
213            return new_block_pos;
214        }
215    }
216    fn dealloc(&mut self, pos: BlockPosition) {
217        if pos == EMPTY_VALUE_BLOCK_POS {
218            return;
219        }
220        self.file.seek(SeekFrom::Start(pos)).unwrap();
221        let prev_block_pos = self.read_u64();
222        let _block_length = self.read_u64();
223        let next_block_pos = self.read_u64();
224        if next_block_pos == DATA_DOES_NOT_EXIST_POS {
225            self.file
226                .seek(SeekFrom::Start(
227                    prev_block_pos + offsets::block::BLOCK_LENGTH,
228                ))
229                .unwrap();
230            let prev_block_len = self.read_u64();
231            self.file
232                .seek_relative(
233                    i64::try_from(prev_block_len + sizes::BLOCK - offsets::block::NEXT_BLOCK_POS)
234                        .unwrap(),
235                )
236                .unwrap();
237            self.file.truncate().unwrap();
238        } else {
239            self.file
240                .seek(SeekFrom::Start(
241                    next_block_pos + offsets::block::PREV_BLOCK_POS,
242                ))
243                .unwrap();
244            self.write_u64(prev_block_pos);
245        }
246        self.file
247            .seek(SeekFrom::Start(
248                prev_block_pos + offsets::block::NEXT_BLOCK_POS,
249            ))
250            .unwrap();
251        self.write_u64(next_block_pos);
252    }
253    fn read_block(&mut self, pos: BlockPosition) -> Vec<u8> {
254        if pos == EMPTY_VALUE_BLOCK_POS {
255            return Vec::new();
256        }
257        self.file.seek(SeekFrom::Start(pos)).unwrap();
258        let _prev_block_pos = self.read_u64();
259        let block_length = self.read_u64();
260        let _next_block_pos = self.read_u64();
261        let mut result = vec![0u8; block_length.try_into().unwrap()];
262        self.file.read_exact(&mut result).unwrap();
263        result
264    }
265    fn tell(&mut self) -> u64 {
266        self.file.seek(SeekFrom::Current(0)).unwrap()
267    }
268    pub fn get(&mut self, key: &[u8]) -> Option<Vec<u8>> {
269        self.file
270            .seek(SeekFrom::Start(offsets::r#static::ROOT_NODE))
271            .unwrap();
272        for bit in bits(key) {
273            if bit {
274                self.file
275                    .seek_relative(offsets::node::TRUE_BRANCH_DATA_POS as i64)
276                    .unwrap();
277            }
278            let branch_data_position = self.read_u64();
279            if branch_data_position == DATA_DOES_NOT_EXIST_POS {
280                return None;
281            }
282            self.file
283                .seek(SeekFrom::Start(branch_data_position))
284                .unwrap();
285        }
286        self.file
287            .seek_relative(offsets::node::CONTENT_BLOCK_POS as i64)
288            .unwrap();
289        let content_block_pos = self.read_u64();
290        if content_block_pos == DATA_DOES_NOT_EXIST_POS {
291            None
292        } else {
293            Some(self.read_block(content_block_pos))
294        }
295    }
296    pub fn set(&mut self, key: &[u8], value: &[u8]) -> Option<Vec<u8>> {
297        self.file
298            .seek(SeekFrom::Start(offsets::r#static::ROOT_NODE))
299            .unwrap();
300        for bit in bits(key) {
301            if bit {
302                self.file
303                    .seek_relative(offsets::node::TRUE_BRANCH_DATA_POS as i64)
304                    .unwrap();
305            }
306            let mut branch_data_pos = self.read_u64();
307            if branch_data_pos == DATA_DOES_NOT_EXIST_POS {
308                let branch_data_pos_pos = self.tell() - offsets::node::TRUE_BRANCH_DATA_POS;
309                let new_node_data_pos = self.alloc(&[0u8; sizes::NODE as usize]) + sizes::BLOCK;
310                self.file
311                    .seek(SeekFrom::Start(branch_data_pos_pos))
312                    .unwrap();
313                self.write_u64(new_node_data_pos);
314                branch_data_pos = new_node_data_pos;
315            }
316            self.file.seek(SeekFrom::Start(branch_data_pos)).unwrap();
317        }
318        self.file
319            .seek_relative(offsets::node::CONTENT_BLOCK_POS as i64)
320            .unwrap();
321        let content_block_pos_pos = self.tell();
322        let old_content_block_pos = self.read_u64();
323        let previous_value = if old_content_block_pos == DATA_DOES_NOT_EXIST_POS {
324            None
325        } else {
326            let previous_value = self.read_block(old_content_block_pos);
327            self.dealloc(old_content_block_pos);
328            Some(previous_value)
329        };
330        let new_content_block_pos = self.alloc(value);
331        self.file
332            .seek(SeekFrom::Start(content_block_pos_pos))
333            .unwrap();
334        self.write_u64(new_content_block_pos);
335        previous_value
336    }
337    pub fn remove(&mut self, key: &[u8]) -> Option<Vec<u8>> {
338        struct Decision {
339            pos: DataPosition,
340            is_true_branch: bool,
341        }
342        let mut decisions = Vec::new();
343        let mut cur_node_pos: DataPosition = offsets::r#static::ROOT_NODE;
344        self.file.seek(SeekFrom::Start(cur_node_pos)).unwrap();
345        for bit in bits(key) {
346            if bit {
347                self.file
348                    .seek_relative(offsets::node::TRUE_BRANCH_DATA_POS as i64)
349                    .unwrap();
350            }
351            let branch_data_position = self.read_u64();
352            if branch_data_position == DATA_DOES_NOT_EXIST_POS {
353                return None;
354            }
355            self.file
356                .seek(SeekFrom::Start(branch_data_position))
357                .unwrap();
358            decisions.push(Decision {
359                pos: cur_node_pos,
360                is_true_branch: bit,
361            });
362            cur_node_pos = branch_data_position;
363        }
364        self.file
365            .seek_relative(offsets::node::CONTENT_BLOCK_POS as i64)
366            .unwrap();
367        let content_block_pos = self.read_u64();
368        let previous_value = if content_block_pos == DATA_DOES_NOT_EXIST_POS {
369            None
370        } else {
371            let previous_value = self.read_block(content_block_pos);
372            self.dealloc(content_block_pos);
373            Some(previous_value)
374        };
375        self.file
376            .seek(SeekFrom::Start(
377                cur_node_pos + offsets::node::CONTENT_BLOCK_POS,
378            ))
379            .unwrap();
380        self.write_u64(DATA_DOES_NOT_EXIST_POS);
381        while let Some(decision) = decisions.pop() {
382            self.file.seek(SeekFrom::Start(cur_node_pos)).unwrap();
383            let false_branch_data_pos = self.read_u64();
384            let true_branch_data_pos = self.read_u64();
385            let content_block_pos = self.read_u64();
386            if false_branch_data_pos == DATA_DOES_NOT_EXIST_POS
387                && true_branch_data_pos == DATA_DOES_NOT_EXIST_POS
388                && content_block_pos == DATA_DOES_NOT_EXIST_POS
389            {
390                self.dealloc(cur_node_pos - sizes::BLOCK);
391                self.file.seek(SeekFrom::Start(decision.pos)).unwrap();
392                if decision.is_true_branch {
393                    self.file
394                        .seek_relative(offsets::node::TRUE_BRANCH_DATA_POS as i64)
395                        .unwrap();
396                }
397                self.write_u64(DATA_DOES_NOT_EXIST_POS);
398                cur_node_pos = decision.pos;
399            } else {
400                break;
401            }
402        }
403        previous_value
404    }
405    pub fn list(&mut self) -> Lister<F> {
406        Lister {
407            ass: self,
408            tasks: vec![Task {
409                node_pos: offsets::r#static::ROOT_NODE,
410                prev: None,
411            }],
412        }
413    }
414    pub fn open(mut file: F) -> Result<Self, OpeningError> {
415        file.seek(SeekFrom::Start(0)).unwrap();
416        let is_empty = match file.read_exact(&mut [0]) {
417            Ok(()) => {
418                file.seek(SeekFrom::Start(0)).unwrap();
419                false
420            }
421            Err(_) => true,
422        };
423        let mut ass = Self { file };
424        if is_empty {
425            ass.file.write_all(&FILE_HEADER).unwrap();
426            ass.write_u64(DATA_DOES_NOT_EXIST_POS);
427            ass.write_u64(DATA_DOES_NOT_EXIST_POS);
428            ass.write_u64(DATA_DOES_NOT_EXIST_POS);
429            ass.write_u64(DATA_DOES_NOT_EXIST_POS);
430            ass.write_u64(0);
431            ass.write_u64(DATA_DOES_NOT_EXIST_POS);
432        } else {
433            let mut header_buf = [0u8; FILE_HEADER.len()];
434            ass.file
435                .read_exact(&mut header_buf)
436                .map_err(|_| OpeningError::Assless())?;
437            if header_buf != FILE_HEADER {
438                return Err(OpeningError::Assless());
439            }
440        }
441        Ok(ass)
442    }
443}
444
445#[derive(Debug)]
446pub enum OpeningError {
447    /// The file by the given path is not an ASS file of the needed version
448    Assless(),
449    IO(std::io::Error),
450}
451
452#[cfg(test)]
453mod tests {
454    use super::*;
455
456    fn set_get() -> ASS<impl ASSFile> {
457        let mut ass = ASS::open(std::io::Cursor::new(Vec::new())).unwrap();
458        assert_eq!(ass.set(b"Drunk", b"Driving"), None);
459        assert_eq!(ass.set(b"Spongebob", b"Squarewave"), None);
460        assert_eq!(ass.set(b"Drunk", b"Driving"), Some(v(b"Driving")));
461        assert_eq!(ass.get(b"Spongebob"), Some(v(b"Squarewave")));
462        assert_eq!(ass.get(b"Drunk"), Some(v(b"Driving")));
463        assert_eq!(ass.get(b"DISTONN"), None);
464        ass
465    }
466    #[test]
467    fn test_set_get() {
468        set_get();
469    }
470
471    fn len<F: ASSFile>(ass: &mut ASS<F>) -> u64 {
472        ass.file.seek(SeekFrom::End(0)).unwrap()
473    }
474
475    fn v(b: &[u8]) -> Vec<u8> {
476        Vec::from(b)
477    }
478
479    #[test]
480    fn test_replacing() {
481        let mut ass = set_get();
482
483        let len_1 = len(&mut ass);
484
485        assert_eq!(
486            ass.set(b"Spongebob", b"Squarepants"),
487            Some(Vec::from(b"Squarewave"))
488        );
489
490        let len_2 = len(&mut ass);
491
492        assert_eq!(len_1, len_2 - 1);
493
494        assert_eq!(
495            ass.set(b"Spongebob", b"Squarepants"),
496            Some(Vec::from(b"Squarepants"))
497        );
498
499        let len_3 = len(&mut ass);
500
501        assert_eq!(len_2, len_3);
502    }
503
504    #[test]
505    fn test_listing() {
506        let mut ass = set_get();
507
508        assert_eq!(
509            ass.list().collect::<Vec<_>>(),
510            vec![
511                (v(b"Spongebob"), v(b"Squarewave")),
512                (v(b"Drunk"), v(b"Driving"))
513            ]
514        );
515    }
516
517    #[test]
518    fn test_removing() {
519        let mut ass = set_get();
520
521        assert_eq!(ass.remove(b"Spongebob"), Some(v(b"Squarewave")));
522        assert_eq!(ass.remove(b"Spongebob"), None);
523    }
524
525    #[test]
526    fn test_branch_reduction() {
527        let mut ass = set_get();
528
529        let source_len = len(&mut ass);
530
531        assert_eq!(ass.set(b"Spongebob1", b"TEST"), None);
532
533        let len_after_addition = len(&mut ass);
534
535        assert_eq!(source_len, len_after_addition - (24 * 2) * 8 - 24 - 4);
536
537        assert_eq!(ass.remove(b"Spongebob1"), Some(v(b"TEST")));
538        assert_eq!(ass.remove(b"Spongebob1"), None);
539        assert_eq!(ass.get(b"Spongebob"), Some(v(b"Squarewave")));
540
541        let len_after_removal = len(&mut ass);
542
543        assert_eq!(source_len, len_after_removal);
544    }
545}