zff/version2/io/
zffcreator.rs

1// - STD
2use std::io::{Read, Write, Seek, SeekFrom, Cursor};
3use std::path::{PathBuf};
4use std::fs::{File, OpenOptions, remove_file, read_link, read_dir};
5use std::collections::{HashMap, VecDeque};
6
7
8// - internal
9use crate::{
10	Result,
11	HashType,
12	HeaderCoding,
13	ZffError,
14	ZffErrorKind,
15	file_extension_next_value,
16	file_extension_previous_value,
17	DEFAULT_HEADER_VERSION_SEGMENT_HEADER,
18	DEFAULT_FOOTER_VERSION_SEGMENT_FOOTER,
19	DEFAULT_FOOTER_VERSION_MAIN_FOOTER,
20	FILE_EXTENSION_FIRST_VALUE,
21};
22use crate::{
23	header::{ObjectHeader, MainHeader, SegmentHeader, ChunkHeader},
24	footer::{SegmentFooter, MainFooter},
25	version2::{
26		object::{ObjectEncoder, PhysicalObjectEncoder, LogicalObjectEncoder},
27	}
28};
29
30use super::{
31	get_file_header,
32	ObjectEncoderInformation,
33};
34
35#[cfg(target_family = "unix")]
36use super::{
37	add_to_hardlink_map,
38};
39
40// - external
41use ed25519_dalek::{Keypair};
42
43/// struct which contains the metadata of the appropriate creator (e.g. like encryption key, main header, ...).
44pub struct ZffCreatorMetadataParams {
45	encryption_key: Option<Vec<u8>>,
46	signature_key: Option<Keypair>,
47	main_header: MainHeader,
48	header_encryption: bool,
49	description_notes: Option<String>,
50}
51
52impl ZffCreatorMetadataParams {
53	/// constructs a struct with the given metadata.
54	pub fn with_data(
55		encryption_key: Option<Vec<u8>>,
56		signature_key: Option<Keypair>,
57		main_header: MainHeader,
58		header_encryption: bool,
59		description_notes: Option<String>) -> ZffCreatorMetadataParams {
60		Self {
61			encryption_key,
62			signature_key,
63			main_header,
64			header_encryption,
65			description_notes,
66		}
67	}
68}
69
70/// The ZffCreator can be used to create a new zff container by the given files/values.
71pub struct ZffCreator<R: Read> {
72	object_encoder_vec: Vec<ObjectEncoderInformation<R>>,
73	object_encoder: ObjectEncoder<R>, //the current object encoder
74	written_object_header: bool,
75	unaccessable_files: Vec<String>,
76	output_filenpath: String,
77	current_segment_no: u64,
78	last_accepted_segment_filepath: PathBuf,
79	description_notes: Option<String>,
80	object_header_segment_numbers: HashMap<u64, u64>, //<object_number, segment_no>
81	object_footer_segment_numbers: HashMap<u64, u64>, //<object_number, segment_no>
82}
83
84impl<R: Read> ZffCreator<R> {
85	/// Creates a new [ZffCreator] instance for the given values.
86	pub fn new<O: Into<String>>(
87		physical_objects: HashMap<ObjectHeader, R>, // <ObjectHeader, input_data stream>
88		logical_objects: HashMap<ObjectHeader, Vec<PathBuf>>, //<ObjectHeader, input_files>
89		hash_types: Vec<HashType>,
90		output_filenpath: O,
91		params: ZffCreatorMetadataParams) -> Result<ZffCreator<R>>{
92
93		let initial_chunk_number = 1;
94		let signature_key_bytes = params.signature_key.map(|keypair| keypair.to_bytes().to_vec());
95
96		let mut object_encoder_vec = Vec::new();
97		for (object_header, input_data) in physical_objects {
98			let object_encoder = PhysicalObjectEncoder::new(
99				object_header,
100				input_data,
101				hash_types.clone(),
102				params.encryption_key.clone(),
103				signature_key_bytes.clone(),
104				params.main_header.clone(),
105				initial_chunk_number,
106				params.header_encryption)?;
107			object_encoder_vec.push(ObjectEncoderInformation::with_data(ObjectEncoder::Physical(object_encoder), false, Vec::new()));
108		}
109		for (object_header, input_files) in logical_objects {
110			let mut current_file_number = 0;
111			let mut parent_file_number = 0;
112			let mut hardlink_map = HashMap::new();
113			let mut unaccessable_files = Vec::new();
114			let mut directories_to_traversal = VecDeque::new(); // <(path, parent_file_number, current_file_number)>
115			let mut files = Vec::new();
116			let mut symlink_real_paths = HashMap::new();
117			let mut directory_children = HashMap::<u64, Vec<u64>>::new(); //<file number of directory, Vec<filenumber of child>>
118			let mut root_dir_filenumbers = Vec::new();
119
120			//files in virtual root folder
121			for path in input_files {
122				current_file_number += 1;
123				let metadata = match std::fs::symlink_metadata(&path) {
124					Ok(metadata) => metadata,
125					Err(_) => {
126						unaccessable_files.push(path.to_string_lossy().to_string());
127						continue;
128					},
129				};
130				match File::open(&path) {
131					Ok(_) => (),
132					Err(_) => {
133						if !metadata.is_symlink() {
134							unaccessable_files.push(path.to_string_lossy().to_string());
135						};
136						continue;
137					},
138				};
139				root_dir_filenumbers.push(current_file_number);
140				if metadata.file_type().is_dir() {
141					directories_to_traversal.push_back((path, parent_file_number, current_file_number));
142				} else {
143					if metadata.file_type().is_symlink() {
144						match read_link(&path) {
145							Ok(symlink_real) => symlink_real_paths.insert(current_file_number, symlink_real),
146							Err(_) => symlink_real_paths.insert(current_file_number, PathBuf::from("")),
147						};
148					}
149					let file_header = match get_file_header(&metadata, &path, current_file_number, parent_file_number) {
150						Ok(file_header) => file_header,
151						Err(_) => continue,
152					};
153
154					#[cfg(target_family = "unix")]
155					add_to_hardlink_map(&mut hardlink_map, &metadata, current_file_number);
156
157					files.push((path.clone(), file_header));
158				}
159			}
160
161			// - files in subfolders
162			while let Some((current_dir, dir_parent_file_number, dir_current_file_number)) = directories_to_traversal.pop_front() {
163  				let element_iterator = match read_dir(&current_dir) {
164					Ok(iterator) => iterator,
165					Err(_) => {
166						unaccessable_files.push(current_dir.to_string_lossy().to_string());
167						continue;
168					}
169				};
170
171				let metadata = match std::fs::symlink_metadata(&current_dir) {
172					Ok(metadata) => metadata,
173					Err(_) => {
174						unaccessable_files.push(current_dir.to_string_lossy().to_string());
175						continue;
176					},
177				};
178				match File::open(&current_dir) {
179					Ok(_) => (),
180					Err(_) => {
181						unaccessable_files.push(current_dir.to_string_lossy().to_string());
182						continue;
183					},
184				};
185				if let Some(files_vec) = directory_children.get_mut(&dir_parent_file_number) {
186					files_vec.push(dir_current_file_number);
187				} else {
188					directory_children.insert(dir_parent_file_number, Vec::new());
189					directory_children.get_mut(&dir_parent_file_number).unwrap().push(dir_current_file_number);
190				};
191
192				parent_file_number = dir_current_file_number;
193				let file_header = match get_file_header(&metadata, &current_dir, dir_current_file_number, dir_parent_file_number) {
194					Ok(file_header) => file_header,
195					Err(_) => continue,
196				};
197				#[cfg(target_family = "unix")]
198				add_to_hardlink_map(&mut hardlink_map, &metadata, dir_current_file_number);
199				
200				files.push((current_dir.clone(), file_header));
201
202				// files in current folder
203				for inner_element in element_iterator {
204					current_file_number += 1;
205					let inner_element = match inner_element {
206						Ok(element) => element,
207						Err(e) => {
208							unaccessable_files.push(e.to_string());
209							continue;
210						}
211					};
212
213					let metadata = match std::fs::symlink_metadata(&inner_element.path()) {
214						Ok(metadata) => metadata,
215						Err(_) => {
216							unaccessable_files.push(current_dir.to_string_lossy().to_string());
217							continue;
218						},
219					};
220					match File::open(&inner_element.path()) {
221						Ok(_) => (),
222						Err(_) => {
223							unaccessable_files.push(inner_element.path().to_string_lossy().to_string());
224							continue;
225						},
226					};
227					if metadata.file_type().is_dir() {
228						directories_to_traversal.push_back((inner_element.path(), parent_file_number, current_file_number));
229					} else {
230						if let Some(files_vec) = directory_children.get_mut(&parent_file_number) {
231							files_vec.push(current_file_number);
232						} else {
233							directory_children.insert(parent_file_number, Vec::new());
234							directory_children.get_mut(&parent_file_number).unwrap().push(current_file_number);
235						};
236
237						match read_link(inner_element.path()) {
238							Ok(symlink_real) => symlink_real_paths.insert(current_file_number, symlink_real),
239							Err(_) => symlink_real_paths.insert(current_file_number, PathBuf::from("")),
240						};
241						let path = inner_element.path().clone();
242						let file_header = match get_file_header(&metadata, &path, current_file_number, parent_file_number) {
243							Ok(file_header) => file_header,
244							Err(_) => continue,
245						};
246						
247						#[cfg(target_family = "unix")]
248						add_to_hardlink_map(&mut hardlink_map, &metadata, current_file_number);
249
250						files.push((inner_element.path().clone(), file_header));
251					}
252				}
253			}
254
255			let object_encoder = LogicalObjectEncoder::new(
256				object_header,
257				files,
258				root_dir_filenumbers,
259				hash_types.clone(),
260				params.encryption_key.clone(),
261				signature_key_bytes.clone(),
262				params.main_header.clone(),
263				symlink_real_paths,
264				hardlink_map,
265				directory_children,
266				initial_chunk_number,
267				params.header_encryption)?;
268			object_encoder_vec.push(ObjectEncoderInformation::with_data(ObjectEncoder::Logical(Box::new(object_encoder)), false, unaccessable_files));
269		}
270		object_encoder_vec.reverse();
271		let (object_encoder, written_object_header, unaccessable_files) = match object_encoder_vec.pop() {
272			Some(creator_obj_encoder) => (creator_obj_encoder.object_encoder, creator_obj_encoder.written_object_header, creator_obj_encoder.unaccessable_files),
273			None => return Err(ZffError::new(ZffErrorKind::NoObjectsLeft, "")),
274		};
275
276		Ok(Self {
277			object_encoder_vec,
278			object_encoder,
279			written_object_header,
280			unaccessable_files,
281			output_filenpath: output_filenpath.into(),
282			current_segment_no: 1, //initial segment number should always be 1.
283			last_accepted_segment_filepath: PathBuf::new(),
284			description_notes: params.description_notes,
285			object_header_segment_numbers: HashMap::new(),
286			object_footer_segment_numbers: HashMap::new(),
287		})
288	}
289
290	fn write_next_segment<W: Write + Seek>(
291	&mut self,
292	output: &mut W,
293	seek_value: u64, // The seek value is a value of bytes you need to skip (e.g. the main_header, the object_header, ...)
294	) -> Result<u64> {
295		let mut eof = false; //true, if EOF of input stream is reached.
296		output.seek(SeekFrom::Start(seek_value))?;
297		let mut written_bytes: u64 = 0;
298		let target_chunk_size = self.object_encoder.main_header().chunk_size();
299		let target_segment_size = self.object_encoder.main_header().segment_size();
300		
301		//prepare segment header
302		let segment_header = SegmentHeader::new(
303			DEFAULT_HEADER_VERSION_SEGMENT_HEADER,
304			self.object_encoder.main_header().unique_identifier(),
305			self.current_segment_no);
306
307		//check if the segment size is to small
308		if (seek_value as usize +
309			segment_header.encode_directly().len() +
310			self.object_encoder.get_encoded_header().len() +
311			target_chunk_size) > self.object_encoder.main_header().segment_size() as usize {
312	        
313	        return Err(ZffError::new(ZffErrorKind::SegmentSizeToSmall, ""));
314	    };
315
316		//write segment header
317		written_bytes += output.write(&segment_header.encode_directly())? as u64;
318
319		//prepare segment footer
320		let mut segment_footer = SegmentFooter::new_empty(DEFAULT_FOOTER_VERSION_SEGMENT_FOOTER);	
321		
322		//write the object header
323		if !self.written_object_header {
324			self.object_header_segment_numbers.insert(self.object_encoder.obj_number(), self.current_segment_no);
325			segment_footer.add_object_header_offset(self.object_encoder.obj_number(), seek_value + written_bytes);
326			written_bytes += output.write(&self.object_encoder.get_encoded_header())? as u64;
327			self.written_object_header = true;
328		};
329
330		// read chunks and write them into the Writer.
331		let mut segment_footer_len = segment_footer.encode_directly().len() as u64;
332		loop {
333			if (written_bytes +
334				segment_footer_len +
335				target_chunk_size as u64) > target_segment_size-seek_value as u64 {
336				
337				if written_bytes == segment_header.encode_directly().len() as u64 {
338					return Err(ZffError::new(ZffErrorKind::ReadEOF, ""));
339				} else {
340					break;
341				}
342			};
343			let current_offset = seek_value + written_bytes;
344			let current_chunk_number = self.object_encoder.current_chunk_number();
345			let data = match self.object_encoder.get_next_data(current_offset, self.current_segment_no) {
346				Ok(data) => data,
347				Err(e) => match e.get_kind() {
348					ZffErrorKind::ReadEOF => {
349						if written_bytes == segment_header.encode_directly().len() as u64 {
350							return Err(e);
351						} else {
352							//write the appropriate object footer and break the loop
353							self.object_footer_segment_numbers.insert(self.object_encoder.obj_number(), self.current_segment_no);
354							segment_footer.add_object_footer_offset(self.object_encoder.obj_number(), seek_value + written_bytes);
355							written_bytes += output.write(&self.object_encoder.get_encoded_footer())? as u64;
356							eof = true;
357							break;
358						}
359					},
360					ZffErrorKind::InterruptedInputStream => {
361						break;
362					},
363					_ => return Err(e),
364				},
365			};
366			written_bytes += output.write(&data)? as u64;
367			let mut data_cursor = Cursor::new(&data);
368			if ChunkHeader::check_identifier(&mut data_cursor) {
369				segment_footer.add_chunk_offset(current_chunk_number, current_offset);
370				segment_footer_len += 16;
371			};
372		}
373
374		// finish the segment footer and write the encoded footer into the Writer.
375		segment_footer.set_footer_offset(seek_value + written_bytes);
376		if eof {
377			let main_footer = MainFooter::new(DEFAULT_FOOTER_VERSION_MAIN_FOOTER, self.current_segment_no, self.object_header_segment_numbers.clone(), self.object_footer_segment_numbers.clone(), self.description_notes.clone(), 0);
378			segment_footer.set_length_of_segment(seek_value + written_bytes + segment_footer.encode_directly().len() as u64 + main_footer.encode_directly().len() as u64);
379		} else {
380			segment_footer.set_length_of_segment(seek_value + written_bytes + segment_footer.encode_directly().len() as u64);
381		}
382			
383		written_bytes += output.write(&segment_footer.encode_directly())? as u64;
384		Ok(written_bytes)
385	}
386
387	/// generates the appropriate .zXX files.
388	pub fn generate_files(&mut self) -> Result<()> {
389		let mut first_segment_filename = PathBuf::from(&self.output_filenpath);
390	    let mut file_extension = String::from(FILE_EXTENSION_FIRST_VALUE);
391	    first_segment_filename.set_extension(&file_extension);
392	    self.last_accepted_segment_filepath = first_segment_filename.clone();
393	    let mut output_file = File::create(&first_segment_filename)?;
394		let encoded_main_header = self.object_encoder.main_header().encode_directly();
395
396	    output_file.write_all(&encoded_main_header)?;
397	    let mut main_footer_start_offset = self.write_next_segment(&mut output_file, encoded_main_header.len() as u64)? +
398	    								   encoded_main_header.len() as u64;
399
400	    let mut seek_value = 0;
401	    loop {
402	    	self.current_segment_no += 1;
403	    	file_extension = file_extension_next_value(&file_extension)?;
404	    	let mut segment_filename = PathBuf::from(&self.output_filenpath);
405	    	segment_filename.set_extension(&file_extension);
406	    	let mut output_file = File::create(&segment_filename)?;
407	    	main_footer_start_offset = match self.write_next_segment(&mut output_file, seek_value) {
408	    		Ok(written_bytes) => {
409	    			seek_value = 0;
410	    			written_bytes
411	    		},
412	    		Err(e) => match e.get_kind() {
413	    			ZffErrorKind::ReadEOF => {
414	    				remove_file(&segment_filename)?;
415	    				let (object_encoder, written_object_header, unaccessable_files) = match self.object_encoder_vec.pop() {
416	    					Some(creator_obj_encoder) => (creator_obj_encoder.object_encoder, creator_obj_encoder.written_object_header, creator_obj_encoder.unaccessable_files),
417	    					None => break,
418	    				};
419	    				self.object_encoder = object_encoder;
420	    				self.written_object_header = written_object_header;
421	    				self.unaccessable_files = unaccessable_files;
422	    				self.current_segment_no -=1;
423	    				file_extension = file_extension_previous_value(&file_extension)?;
424	    				seek_value = main_footer_start_offset;
425	    				main_footer_start_offset
426	    			},
427	    			_ => return Err(e),
428	    		},
429	    	};
430	    	self.last_accepted_segment_filepath = segment_filename.clone();
431	    }
432
433	    let main_footer = MainFooter::new(DEFAULT_FOOTER_VERSION_MAIN_FOOTER, self.current_segment_no-1, self.object_header_segment_numbers.clone(), self.object_footer_segment_numbers.clone(), self.description_notes.clone(), main_footer_start_offset);
434	    let mut output_file = OpenOptions::new().write(true).append(true).open(&self.last_accepted_segment_filepath)?;
435	    output_file.write_all(&main_footer.encode_directly())?;
436
437	    Ok(())
438	}
439
440	/// Returns a reference of the unaccessable files.
441	pub fn unaccessable_files(&self) -> &Vec<String> {
442		&self.unaccessable_files
443	}
444}