1const CHUNK_SIZE: usize = 8 * 1024 + 1; pub struct Tejar {
4 pub list_content: String, pub data_content: Vec<u8>, }
7
8pub struct InputFile {
9 pub path: camino::Utf8PathBuf,
10 pub content_type: String,
11 pub gzip: bool,
12}
13
14pub struct List {
15 pub records: Vec<ListRecord>,
16}
17
18impl List {
19 pub fn new<I>(records: I) -> Self
20 where
21 I: Iterator<Item = ListRecord>,
22 {
23 Self {
24 records: records.collect(),
25 }
26 }
27
28 fn list_writer(
29 &self,
30 root: &camino::Utf8Path,
31 ) -> Result<camino::Utf8PathBuf, crate::error::ListWriterError> {
32 use std::io::Write;
33 let file_path = root.join("LIST.tejar-list");
34 let list_file = std::fs::File::create(&file_path)?;
35 let mut list_writer = std::io::BufWriter::new(list_file);
36 for record in self.records.iter() {
37 list_writer.write_all(record.to_string().as_bytes())?;
38 }
39 Ok(file_path)
40 }
41
42 pub(crate) fn parse_list(list_content: &str) -> Result<List, crate::error::ListParseError> {
43 let lines = list_content.trim().split('\n');
44 let iter = lines.enumerate();
45 let mut records = Vec::new();
46 for (index, line) in iter {
47 if line.is_empty() {
48 continue;
49 }
50 let parts: Vec<&str> = line.split('|').collect();
51 if parts.len() == 8 {
52 let record = ListRecord {
53 data_file_name: parts[0].to_string(),
54 file_name: parts[1].to_string(),
55 start: parts[2].parse::<u32>().map_err(|e| {
56 crate::error::ListParseError::ParseError {
57 line: index + 1,
58 message: e.to_string(),
59 }
60 })?,
61 size: parts[3].parse::<u32>().map_err(|e| {
62 crate::error::ListParseError::ParseError {
63 line: index + 1,
64 message: e.to_string(),
65 }
66 })?,
67 content_type: parts[4].to_string(),
68 compression: parts[5].to_string(),
69 timestamp: parts[6].parse::<u64>().map_err(|e| {
70 crate::error::ListParseError::ParseError {
71 line: index + 1,
72 message: e.to_string(),
73 }
74 })?,
75 checksum: parts[7].to_string(),
76 };
77 records.push(record);
78 } else {
79 return Err(crate::error::ListParseError::ParseError {
80 line: index + 1,
81 message: line.to_string(),
82 });
83 }
84 }
85 Ok(List { records })
86 }
87}
88
89pub struct ListRecord {
90 pub data_file_name: String,
91 pub file_name: String,
92 pub content_type: String,
93 pub compression: String, pub start: u32,
95 pub size: u32,
96 pub timestamp: u64,
97 pub checksum: String,
98}
99
100impl ToString for ListRecord {
101 fn to_string(&self) -> String {
102 format!(
103 "{}|{}|{}|{}|{}|{}|{}|{}\n",
104 self.data_file_name,
105 self.file_name,
106 self.start,
107 self.size,
108 self.content_type,
109 self.compression,
110 self.timestamp,
111 self.checksum,
112 )
113 }
114}
115
116impl ListRecord {
117 fn new(
118 data_file_name: &str,
119 input_file: &InputFile,
120 checksum: &str,
121 start: u32,
122 size: u32,
123 timestamp: u64,
124 ) -> Self {
125 ListRecord {
126 data_file_name: data_file_name.to_string(),
127 file_name: input_file.path.to_string(),
128 content_type: input_file.content_type.clone(),
129 compression: if input_file.gzip {
130 "gzip".to_string()
131 } else {
132 "none".to_string()
134 },
135 start,
136 size,
137 timestamp,
138 checksum: checksum.to_string(),
139 }
140 }
141}
142
143pub fn create(
144 root: &camino::Utf8Path,
145 files: &[InputFile],
146) -> Result<(camino::Utf8PathBuf, camino::Utf8PathBuf), crate::error::CreateError> {
147 let data_file_name = generate_data_file_name();
148 let data_file_path = root.join(data_file_name.as_str());
149 let data_file = std::fs::File::create(&data_file_path)?;
150 let mut writer = std::io::BufWriter::new(data_file);
151 let mut list_records: Vec<(String, ListRecord)> = vec![];
152 let mut start = 0;
153 for input_file in files.iter() {
154 let checksum = calculate_checksum(&root.join(input_file.path.as_path()))?;
155 if let Some((_, record)) = list_records.iter().find(|(c, _)| c.eq(&checksum)) {
156 let record = ListRecord::new(
157 &data_file_name,
158 input_file,
159 &checksum,
160 record.start,
161 record.size,
162 current_time_secs()?,
163 );
164 list_records.push((checksum, record));
165 continue;
166 }
167 let content_length = data_writer(&mut writer, &root.join(&input_file.path))?;
168 let record = ListRecord::new(
169 &data_file_name,
170 input_file,
171 &checksum,
172 start,
173 content_length,
174 current_time_secs()?,
175 );
176 list_records.push((checksum, record));
177 start += content_length;
178 }
179 let list_data = List::new(list_records.into_iter().map(|(_, r)| r));
180 let list_path = list_data.list_writer(root)?;
181 Ok((list_path, data_file_path))
182}
183
184fn calculate_checksum(path: &camino::Utf8Path) -> Result<String, crate::error::CheckSumError> {
185 use sha2::Digest;
186 use std::io::Read;
187 let mut file_reader = std::io::BufReader::new(std::fs::File::open(path)?);
188 let mut buffer = [0u8; CHUNK_SIZE];
189 let mut hasher = sha2::Sha256::new();
190 loop {
191 let bytes_read = file_reader.read(&mut buffer)?;
192 if bytes_read == 0 {
193 break;
194 }
195 hasher.update(&buffer[..bytes_read]);
196 }
197 Ok(hex::encode(hasher.finalize().as_slice()))
198}
199
200fn data_writer<W: std::io::Write>(
201 writer: &mut std::io::BufWriter<W>,
202 path: &camino::Utf8Path, ) -> Result<u32, crate::error::DataWriterError> {
204 use std::io::Read;
205 use std::io::Write;
206 let mut buffer = [0u8; CHUNK_SIZE];
207 let mut file_reader = std::io::BufReader::new(std::fs::File::open(path)?);
208 let mut content_length = 0;
209 loop {
210 let bytes_read = file_reader.read(&mut buffer)?;
211 if bytes_read == 0 {
212 break;
213 }
214 writer.write_all(&buffer[..bytes_read])?;
215 content_length += bytes_read;
216 }
217 Ok(content_length as u32)
218}
219
220fn generate_data_file_name() -> String {
221 let time = std::time::SystemTime::now();
222 let since_epoch = time
223 .duration_since(std::time::UNIX_EPOCH)
224 .expect("tejar: SystemTime went backward");
225 format!("DATA-{}.tejar-data", since_epoch.as_millis())
226}
227
228fn current_time_secs() -> Result<u64, std::time::SystemTimeError> {
229 let time = std::time::SystemTime::now();
230 Ok(time.duration_since(std::time::UNIX_EPOCH)?.as_secs())
231}
232
233