tejar/
create.rs

1const CHUNK_SIZE: usize = 8 * 1024 + 1; // +1 because to override the existing the buffer in buffer reader
2
3pub struct Tejar {
4    pub list_content: String,  // file
5    pub data_content: Vec<u8>, // all file content
6}
7
8pub struct InputFile {
9    pub path: camino::Utf8PathBuf,
10    pub content_type: String,
11    pub gzip: bool,
12}
13
14pub struct List {
15    pub records: Vec<ListRecord>,
16}
17
18impl List {
19    pub fn new<I>(records: I) -> Self
20    where
21        I: Iterator<Item = ListRecord>,
22    {
23        Self {
24            records: records.collect(),
25        }
26    }
27
28    fn list_writer(
29        &self,
30        root: &camino::Utf8Path,
31    ) -> Result<camino::Utf8PathBuf, crate::error::ListWriterError> {
32        use std::io::Write;
33        let file_path = root.join("LIST.tejar-list");
34        let list_file = std::fs::File::create(&file_path)?;
35        let mut list_writer = std::io::BufWriter::new(list_file);
36        for record in self.records.iter() {
37            list_writer.write_all(record.to_string().as_bytes())?;
38        }
39        Ok(file_path)
40    }
41
42    pub(crate) fn parse_list(list_content: &str) -> Result<List, crate::error::ListParseError> {
43        let lines = list_content.trim().split('\n');
44        let iter = lines.enumerate();
45        let mut records = Vec::new();
46        for (index, line) in iter {
47            if line.is_empty() {
48                continue;
49            }
50            let parts: Vec<&str> = line.split('|').collect();
51            if parts.len() == 8 {
52                let record = ListRecord {
53                    data_file_name: parts[0].to_string(),
54                    file_name: parts[1].to_string(),
55                    start: parts[2].parse::<u32>().map_err(|e| {
56                        crate::error::ListParseError::ParseError {
57                            line: index + 1,
58                            message: e.to_string(),
59                        }
60                    })?,
61                    size: parts[3].parse::<u32>().map_err(|e| {
62                        crate::error::ListParseError::ParseError {
63                            line: index + 1,
64                            message: e.to_string(),
65                        }
66                    })?,
67                    content_type: parts[4].to_string(),
68                    compression: parts[5].to_string(),
69                    timestamp: parts[6].parse::<u64>().map_err(|e| {
70                        crate::error::ListParseError::ParseError {
71                            line: index + 1,
72                            message: e.to_string(),
73                        }
74                    })?,
75                    checksum: parts[7].to_string(),
76                };
77                records.push(record);
78            } else {
79                return Err(crate::error::ListParseError::ParseError {
80                    line: index + 1,
81                    message: line.to_string(),
82                });
83            }
84        }
85        Ok(List { records })
86    }
87}
88
89pub struct ListRecord {
90    pub data_file_name: String,
91    pub file_name: String,
92    pub content_type: String,
93    pub compression: String, // TODO: enum Compressor { Only Supported Compression + None }
94    pub start: u32,
95    pub size: u32,
96    pub timestamp: u64,
97    pub checksum: String,
98}
99
100impl ToString for ListRecord {
101    fn to_string(&self) -> String {
102        format!(
103            "{}|{}|{}|{}|{}|{}|{}|{}\n",
104            self.data_file_name,
105            self.file_name,
106            self.start,
107            self.size,
108            self.content_type,
109            self.compression,
110            self.timestamp,
111            self.checksum,
112        )
113    }
114}
115
116impl ListRecord {
117    fn new(
118        data_file_name: &str,
119        input_file: &InputFile,
120        checksum: &str,
121        start: u32,
122        size: u32,
123        timestamp: u64,
124    ) -> Self {
125        ListRecord {
126            data_file_name: data_file_name.to_string(),
127            file_name: input_file.path.to_string(),
128            content_type: input_file.content_type.clone(),
129            compression: if input_file.gzip {
130                "gzip".to_string()
131            } else {
132                // TODO:
133                "none".to_string()
134            },
135            start,
136            size,
137            timestamp,
138            checksum: checksum.to_string(),
139        }
140    }
141}
142
143pub fn create(
144    root: &camino::Utf8Path,
145    files: &[InputFile],
146) -> Result<(camino::Utf8PathBuf, camino::Utf8PathBuf), crate::error::CreateError> {
147    let data_file_name = generate_data_file_name();
148    let data_file_path = root.join(data_file_name.as_str());
149    let data_file = std::fs::File::create(&data_file_path)?;
150    let mut writer = std::io::BufWriter::new(data_file);
151    let mut list_records: Vec<(String, ListRecord)> = vec![];
152    let mut start = 0;
153    for input_file in files.iter() {
154        let checksum = calculate_checksum(&root.join(input_file.path.as_path()))?;
155        if let Some((_, record)) = list_records.iter().find(|(c, _)| c.eq(&checksum)) {
156            let record = ListRecord::new(
157                &data_file_name,
158                input_file,
159                &checksum,
160                record.start,
161                record.size,
162                current_time_secs()?,
163            );
164            list_records.push((checksum, record));
165            continue;
166        }
167        let content_length = data_writer(&mut writer, &root.join(&input_file.path))?;
168        let record = ListRecord::new(
169            &data_file_name,
170            input_file,
171            &checksum,
172            start,
173            content_length,
174            current_time_secs()?,
175        );
176        list_records.push((checksum, record));
177        start += content_length;
178    }
179    let list_data = List::new(list_records.into_iter().map(|(_, r)| r));
180    let list_path = list_data.list_writer(root)?;
181    Ok((list_path, data_file_path))
182}
183
184fn calculate_checksum(path: &camino::Utf8Path) -> Result<String, crate::error::CheckSumError> {
185    use sha2::Digest;
186    use std::io::Read;
187    let mut file_reader = std::io::BufReader::new(std::fs::File::open(path)?);
188    let mut buffer = [0u8; CHUNK_SIZE];
189    let mut hasher = sha2::Sha256::new();
190    loop {
191        let bytes_read = file_reader.read(&mut buffer)?;
192        if bytes_read == 0 {
193            break;
194        }
195        hasher.update(&buffer[..bytes_read]);
196    }
197    Ok(hex::encode(hasher.finalize().as_slice()))
198}
199
200fn data_writer<W: std::io::Write>(
201    writer: &mut std::io::BufWriter<W>,
202    path: &camino::Utf8Path, // File path to be written into writer
203) -> Result<u32, crate::error::DataWriterError> {
204    use std::io::Read;
205    use std::io::Write;
206    let mut buffer = [0u8; CHUNK_SIZE];
207    let mut file_reader = std::io::BufReader::new(std::fs::File::open(path)?);
208    let mut content_length = 0;
209    loop {
210        let bytes_read = file_reader.read(&mut buffer)?;
211        if bytes_read == 0 {
212            break;
213        }
214        writer.write_all(&buffer[..bytes_read])?;
215        content_length += bytes_read;
216    }
217    Ok(content_length as u32)
218}
219
220fn generate_data_file_name() -> String {
221    let time = std::time::SystemTime::now();
222    let since_epoch = time
223        .duration_since(std::time::UNIX_EPOCH)
224        .expect("tejar: SystemTime went backward");
225    format!("DATA-{}.tejar-data", since_epoch.as_millis())
226}
227
228fn current_time_secs() -> Result<u64, std::time::SystemTimeError> {
229    let time = std::time::SystemTime::now();
230    Ok(time.duration_since(std::time::UNIX_EPOCH)?.as_secs())
231}
232
233/*
234Rough Work, will be needing it later
235pub fn create(
236    root: &camino::Utf8Path,
237    files: &[InputFile],
238) -> Result<Tejar, crate::error::CreateError> {
239    let mut list_content = "".to_string();
240    let mut data_content = vec![];
241
242    for input_file in files.iter() {
243        let path = root.join(&input_file.path);
244        // todo: use buffer reader +
245        let mut content = std::fs::read(path)?;
246        // TODO: use compression?
247        //
248        list_content.push_str(
249            format!(
250                "{}|{}|{}\n",
251                input_file.path.as_str(),
252                input_file.content_type,
253                content.len()
254            )
255            .as_str(),
256        );
257        data_content.append(&mut content);
258    }
259
260    Ok(Tejar {
261        list_content,
262        data_content,
263    })
264}
265
266 */