1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
//! # smol - 5 bit encoding file format
//! what is smol? smol is a file format that compresses text into 5 bits per letter instead of the normal 8 bits.
//! this is achived by having a charset lesser than 32 chars so all letters fits nicely into 5 bits (32 values). 
//! this makes all sort of problem such as an byte being 8 so multiple letters will overlap eachother
//! but this is all handeled through this library for ease of use.  
//!   
//! this is not made for any production applications only as an hobby.  
//!   
//! ## how does this work?  
//! this is the entire charset  
//! ```
//! " abcdefghijklmnopqrstuvwxyz.!?12"  
//!  ^ space
//! ```  
//!  
//! first 30 chars are normal (space, alphabet, ! and ?)  
//!   
//! the last two (1 and 2) have special functions which is:  [CURRENTLY NOT IMPLEMENTED]  
//! (1): enters number mode - all characters read will be interpreted as their number part. [A = 1, B = 2 ... I = 9]  
//! (2): makes the next procceding character uppercase or special action in some cases. example: newlines are formatted `"2 "`
//!   
//! see also [`SmolBlob`]

use std::io::{ Cursor, Read };
use anyhow::{ Result, Error };
mod utils;

// CHANGE THIS EACH RELEASE
const VERSION: u64 = 1;

pub struct SmolBlob {
    version: u64,
    data: Vec<u8>
}

/// [`SmolBlob`] is a chunk of smol data
impl SmolBlob {
    /// converts the [`SmolBlob`] data into a valid file buffer
    pub fn buffer(&self) -> Vec<u8> {
        let mut buffer = vec![];
        buffer.extend(b"smol");

        let mut header = vec![];
        leb128::write::unsigned(&mut header, self.version).unwrap();

        leb128::write::unsigned(&mut buffer, header.len() as u64).unwrap();
        buffer.extend(header);
        leb128::write::unsigned(&mut buffer, self.data.len() as u64).unwrap();
        buffer.extend(self.data.clone());

        buffer
    }

    /// returns the inner buffer length **ONLY**, not to be confused by [`SmolBlob::buffer`].len()
    pub fn len(&self) -> usize {
        self.buffer().len()
    }

    /// encodes a [`String`] and returns a [`SmolBlob`]
    /// # example
    ///
    /// ```
    /// let encoded: SmolBlob = SmolBlob::encode(&input);
    /// fs::write("smol.bin", &encoded.buffer()).unwrap();
    /// ```
    pub fn encode(str: &String) -> SmolBlob {
        let mut data = vec![];
    
        let mut current: u16 = 0;
        let mut offset: usize = 0;
        for char in str.to_lowercase().chars().into_iter() {
            let index = utils::char_to_index(char);
            current |= (index as u16) << offset;
            offset += 5;
            if offset >= 8 {
                data.push((current & 0xff) as u8);
                current = (current & 0xff00) >> 8;
                offset -= 8;
            }
        }
    
        if offset > 0 {
            data.push(current as u8);
        }
    
        return SmolBlob { version: VERSION, data };
    }
    /// decodes a [`SmolBlob::buffer`] and returns a [`String`]  
    /// # example
    ///
    /// ```
    /// let decoded: String = SmolBlob::decode(&encoded.buffer()).unwrap();
    /// fs::write("unsmol.bin", &decoded).unwrap();
    /// ```
    pub fn decode(input: &Vec<u8>) -> Result<String, Error> {
        if input.len() < 4 {
            return Err(Error::msg("file is too small, is it truncated?"));
        }
        let mut curs = Cursor::new(input);
        
        let mut magic = [0u8; 4];
        curs.read_exact(&mut magic)?;
    
        if &magic != b"smol" {
            return Err(Error::msg("invalid file magic, this is not an smol file"));
        }
    
        let header_size = leb128::read::unsigned(&mut curs)?;
        let mut header = vec![0u8; header_size as usize];
        curs.read_exact(&mut header)?;
    
        let data_size = leb128::read::unsigned(&mut curs)?;
        let mut data = vec![0u8; data_size as usize];
        curs.read_exact(&mut data)?;
    
        let mut out_string = String::new();
        let mut current_bit: u8 = 0;
        let mut byte_index: usize = 0;
        loop {
            let mut current: u8 = (data[byte_index] >> current_bit) & 0x1F;
    
            if current_bit >= 4 {
                if byte_index == (data.len() - 1) {
                    break;
                } 
    
                current |= (data[byte_index + 1]  << (8 - current_bit)) & 0x1F;
            }
    
            out_string.push(utils::index_to_char(current as usize));
    
            current_bit += 5;
            if current_bit >= 8 {
                byte_index += 1;
                current_bit -= 8;
            }
        }
    
        Ok(out_string)
    }

    /// decodes a [`SmolBlob`] and returns a [`String`]  
    /// internally this is the same as [`SmolBlob::decode`] but with [`SmolBlob`] instead of [`Vec<u8>`] buffer
    /// # example
    ///
    /// ```
    /// let decoded: String = SmolBlob::decode_blob(&encoded).unwrap();
    /// fs::write("unsmol.bin", &decoded).unwrap();
    /// ```
    pub fn decode_blob(input: &SmolBlob) -> Result<String, Error> {
        return SmolBlob::decode(&input.buffer());
    }
}