corevm_host/fs/
operations.rs

1use super::*;
2
3/// File system node.
4pub enum Node {
5	File(File),
6	Dir(Dir),
7}
8
9impl Node {
10	pub fn open<R: ReadBlock>(block_ref: &BlockRef, block_reader: &mut R) -> Result<Self, Error> {
11		let mut reader = NodeReader::new(block_ref, block_reader)?;
12		let node = match reader.file().main_block().kind() {
13			NodeKind::File => {
14				let file = reader.into_file();
15				Self::File(file)
16			},
17			NodeKind::Dir => {
18				let dir = Dir::decode(&mut reader).map_err(|_| Error::Io)?;
19				Self::Dir(dir)
20			},
21		};
22		Ok(node)
23	}
24}
25
26// TODO @ivan Resolving a path involves reading and decoding a directory in full, although we only
27// need one entry. Ideally we need to be able to check that the entry exists without reading and
28// decoding all entries.
29
30/// A directory stored in the block storage.
31///
32/// Stored as file with a different [`NodeKind`] in the main block.
33#[derive(Encode, Decode, Debug)]
34pub struct Dir(pub VecMap<FileName, BlockRef>);
35
36impl Dir {
37	pub fn open<R: ReadBlock>(block_ref: &BlockRef, block_reader: &mut R) -> Result<Self, Error> {
38		let mut reader = NodeReader::new(block_ref, block_reader)?;
39		if reader.file().main_block().kind() != NodeKind::Dir {
40			return Err(Error::Node);
41		}
42		let dir = Self::decode(&mut reader).map_err(|_| Error::Io)?;
43		Ok(dir)
44	}
45}
46
47/// A file stored in the block storage.
48///
49/// Might actually refer to a directory. Check [`MainBlock::kind`] to determine that.
50pub struct File {
51	main_block: MainBlock,
52	current_block: Option<(usize, FileBlock)>,
53	position: u64,
54}
55
56impl File {
57	pub fn main_block(&self) -> &MainBlock {
58		&self.main_block
59	}
60
61	pub fn position(&self) -> u64 {
62		self.position
63	}
64
65	pub fn seek(&mut self, position: u64) -> Result<(), Error> {
66		if position > self.main_block.file_size {
67			return Err(Error::Io);
68		}
69		self.position = position;
70		Ok(())
71	}
72
73	pub fn open<R: ReadBlock>(main_block_ref: &BlockRef, reader: &mut R) -> Result<Self, Error> {
74		let block = reader.read_block(main_block_ref)?;
75		let main_block = MainBlock::decode(block)?;
76		Ok(Self { main_block, current_block: None, position: 0 })
77	}
78
79	pub fn read<R: ReadBlock>(&mut self, buf: &mut [u8], reader: &mut R) -> Result<usize, Error> {
80		let n = (buf.len() as u64).min(self.main_block.file_size - self.position);
81		if n == 0 {
82			return Ok(0);
83		}
84		let next_position = self.position + n;
85		let n = n as usize;
86		let first_block_size = self.main_block.first_block.len() as u64;
87		let mut i = self.get_block_index(self.position);
88		let mut buf_position = 0;
89		if i == usize::MAX {
90			// Copy from the first block.
91			let a = self.position as usize;
92			let b = next_position.min(first_block_size) as usize;
93			let m = b - a;
94			buf[..m].copy_from_slice(&self.main_block.first_block.0[a..b]);
95			self.position += m as u64;
96			buf_position += m;
97			i = 0;
98		}
99		// Copy from the rest of the blocks.
100		while self.position != next_position {
101			let block = match &mut self.current_block {
102				Some((block_index, block)) if *block_index == i => block,
103				block => {
104					let data = reader.read_block(&self.main_block.block_refs[i])?;
105					let new_block = FileBlock::new(data)?;
106					&mut block.insert((i, new_block)).1
107				},
108			};
109			let a = (self.position - first_block_size - i as u64 * MAX_BLOCK_SIZE as u64) as usize;
110			let m = ((next_position - self.position) as usize).min(block.len() - a);
111			buf[buf_position..buf_position + m].copy_from_slice(&block.0[a..a + m]);
112			self.position += m as u64;
113			buf_position += m;
114			i += 1;
115		}
116		Ok(n)
117	}
118
119	pub fn read_exact<R: ReadBlock>(
120		&mut self,
121		buf: &mut [u8],
122		reader: &mut R,
123	) -> Result<(), Error> {
124		let n = self.read(buf, reader)?;
125		if n != buf.len() {
126			return Err(Error::Io);
127		}
128		Ok(())
129	}
130
131	pub fn read_to_end<R: ReadBlock>(
132		&mut self,
133		buf: &mut Vec<u8>,
134		reader: &mut R,
135	) -> Result<usize, Error> {
136		let remaining = (self.main_block.file_size - self.position) as usize;
137		let old_len = buf.len();
138		buf.resize(old_len + remaining, 0_u8);
139		self.read_exact(&mut buf[old_len..], reader)?;
140		Ok(remaining)
141	}
142
143	fn get_block_index(&self, mut position: u64) -> usize {
144		debug_assert!(position < self.main_block.file_size);
145		let first_block_size = self.main_block.first_block.len() as u64;
146		if position < first_block_size {
147			return usize::MAX;
148		}
149		position -= first_block_size;
150		(position / self.main_block.block_size) as usize
151	}
152}
153
154/// Reads a node (file or directory) from the block storage.
155pub struct NodeReader<R: ReadBlock> {
156	reader: R,
157	file: File,
158}
159
160impl<R: ReadBlock> NodeReader<R> {
161	pub fn new(main_block_ref: &BlockRef, mut reader: R) -> Result<Self, Error> {
162		let file = File::open(main_block_ref, &mut reader)?;
163		Ok(Self { reader, file })
164	}
165
166	pub fn file(&self) -> &File {
167		&self.file
168	}
169
170	pub fn into_file(self) -> File {
171		self.file
172	}
173
174	pub fn into_inner(self) -> R {
175		self.reader
176	}
177
178	pub fn read(&mut self, buf: &mut [u8]) -> Result<usize, Error> {
179		self.file.read(buf, &mut self.reader)
180	}
181
182	pub fn read_exact(&mut self, buf: &mut [u8]) -> Result<(), Error> {
183		self.file.read_exact(buf, &mut self.reader)
184	}
185
186	pub fn read_to_end(&mut self, buf: &mut Vec<u8>) -> Result<usize, Error> {
187		self.file.read_to_end(buf, &mut self.reader)
188	}
189
190	pub fn seek(&mut self, position: u64) -> Result<(), Error> {
191		self.file.seek(position)
192	}
193}
194
195impl<R: ReadBlock> codec::Input for NodeReader<R> {
196	fn remaining_len(&mut self) -> Result<Option<usize>, codec::Error> {
197		let remaining = self.file.main_block().file_size() - self.file.position();
198		Ok(remaining.try_into().ok())
199	}
200
201	fn read(&mut self, into: &mut [u8]) -> Result<(), codec::Error> {
202		self.read_exact(into).map_err(|_| "I/o error")?;
203		Ok(())
204	}
205}
206
207/// Write a node (file or directory) to the block storage.
208pub struct NodeWriter<W: WriteBlock> {
209	writer: W,
210	buf: Vec<u8>,
211	kind: NodeKind,
212	file_size: u64,
213	block_size: usize,
214	first_block_size: usize,
215	position: u64,
216	block_refs: Vec<BlockRef>,
217	first_block: Vec<u8>,
218	service_id: ServiceId,
219}
220
221impl<W: WriteBlock> NodeWriter<W> {
222	pub fn new(
223		service_id: ServiceId,
224		writer: W,
225		file_size: u64,
226		block_size: usize,
227	) -> Result<Self, Error> {
228		Self::do_new(service_id, writer, NodeKind::File, file_size, block_size)
229	}
230
231	pub fn new_dir(
232		service_id: ServiceId,
233		writer: W,
234		file_size: u64,
235		block_size: usize,
236	) -> Result<Self, Error> {
237		Self::do_new(service_id, writer, NodeKind::Dir, file_size, block_size)
238	}
239
240	fn do_new(
241		service_id: ServiceId,
242		writer: W,
243		kind: NodeKind,
244		file_size: u64,
245		block_size: usize,
246	) -> Result<Self, Error> {
247		let first_block_size = {
248			let mut num_blocks = file_size.div_ceil(block_size as u64) as usize;
249			let mut first_block_size = MAX_BLOCK_SIZE as u64;
250			// Two steps of fixed-point iteration.
251			for _ in 0..2 {
252				let metadata_len =
253					main_block_metadata_encoded_len(file_size, block_size as u64, num_blocks)
254						.ok_or(Error::Block)? as u64;
255				first_block_size = (MAX_BLOCK_SIZE as u64)
256					.checked_sub(metadata_len)
257					.ok_or(Error::Block)?
258					.min(file_size);
259				let new_num_blocks =
260					(file_size - first_block_size).div_ceil(block_size as u64) as usize;
261				if num_blocks == new_num_blocks {
262					break;
263				}
264				num_blocks = new_num_blocks;
265			}
266			first_block_size as usize
267		};
268		let buf_capacity = (block_size as u64).min(file_size) as usize;
269		Ok(Self {
270			writer,
271			kind,
272			file_size,
273			block_size,
274			first_block_size,
275			position: 0,
276			buf: Vec::with_capacity(buf_capacity.max(first_block_size)),
277			block_refs: Vec::new(),
278			first_block: Vec::new(),
279			service_id,
280		})
281	}
282
283	pub fn write_all(&mut self, data: &[u8]) -> Result<(), Error> {
284		let next_position = self.position + data.len() as u64;
285		if next_position > self.file_size {
286			return Err(Error::Io);
287		}
288		let mut slice = data;
289		while !slice.is_empty() {
290			let max_block_size =
291				if self.first_block.is_empty() { self.first_block_size } else { self.block_size };
292			let n = slice.len().min(max_block_size - self.buf.len());
293			let (chunk, rest) = slice.split_at(n);
294			self.buf.extend_from_slice(chunk);
295			if self.buf.len() == max_block_size {
296				self.write_block()?;
297			}
298			slice = rest;
299		}
300		self.position = next_position;
301		Ok(())
302	}
303
304	pub fn read_from<R: HostFileRead>(&mut self, reader: &mut R) -> Result<(), Error> {
305		let mut remaining = reader.remaining_len()?;
306		let next_position = self.position.checked_add(remaining).ok_or(Error::Io)?;
307		if next_position > self.file_size {
308			return Err(Error::Io);
309		}
310		while remaining != 0 {
311			let max_block_size =
312				if self.first_block.is_empty() { self.first_block_size } else { MAX_BLOCK_SIZE };
313			let old_len = self.buf.len();
314			let n = remaining.min(max_block_size as u64 - old_len as u64) as usize;
315			self.buf.resize(old_len + n, 0_u8);
316			reader.read_exact(&mut self.buf[old_len..])?;
317			if self.buf.len() == max_block_size {
318				self.write_block()?;
319			}
320			remaining -= n as u64;
321		}
322		self.position = next_position;
323		Ok(())
324	}
325
326	/// Finish writing the node.
327	///
328	/// Returns main block hash and the underlying block writer.
329	pub fn finish(mut self) -> Result<(BlockRef, W), Error> {
330		if !self.buf.is_empty() {
331			self.write_block()?;
332		}
333		if self.position != self.file_size {
334			return Err(Error::Io);
335		}
336		// Write main block.
337		let main_block = MainBlock {
338			kind: self.kind,
339			file_size: self.file_size,
340			block_size: self.block_size as u64,
341			block_refs: core::mem::take(&mut self.block_refs),
342			first_block: FileBlock(core::mem::take(&mut self.first_block).into()),
343		};
344		debug_assert!(validate_main_block(
345			main_block.file_size,
346			main_block.block_size,
347			&main_block.block_refs,
348			&main_block.first_block
349		)
350		.is_ok());
351		self.buf.clear();
352		main_block.encode_to(&mut self.buf);
353		self.writer.write_block(self.service_id, &self.buf[..])?;
354		let main_block_ref =
355			BlockRef { service_id: self.service_id, hash: Hash::digest(&self.buf[..]) };
356		Ok((main_block_ref, self.writer))
357	}
358
359	fn write_block(&mut self) -> Result<(), Error> {
360		if self.first_block.is_empty() {
361			self.first_block = core::mem::take(&mut self.buf);
362			self.buf = Vec::with_capacity((self.block_size as u64).min(self.file_size) as usize);
363		} else {
364			self.block_refs
365				.push(BlockRef { service_id: self.service_id, hash: Hash::digest(&self.buf[..]) });
366			self.writer.write_block(self.service_id, &self.buf[..])?;
367			self.buf.clear();
368		}
369		Ok(())
370	}
371}
372
373// This is a workaround for `codec::Output::write` being infallible.
374struct FallibleOutput<'a, W: WriteBlock> {
375	writer: &'a mut NodeWriter<W>,
376	error: Option<Error>,
377}
378
379impl<W: WriteBlock> codec::Output for FallibleOutput<'_, W> {
380	fn write(&mut self, bytes: &[u8]) {
381		if self.error.is_some() {
382			return;
383		}
384		if let Err(e) = self.writer.write_all(bytes) {
385			self.error = Some(e);
386		}
387	}
388}
389
390/// Copy the file from the host file system to the block storage.
391pub fn copy_file_in<R: HostFileRead, W: WriteBlock>(
392	host_file_reader: &mut R,
393	service_id: ServiceId,
394	block_writer: &mut W,
395	block_size: usize,
396) -> Result<BlockRef, Error> {
397	let file_size = host_file_reader.remaining_len()?;
398	let mut writer = NodeWriter::new(service_id, block_writer, file_size, block_size)?;
399	writer.read_from(host_file_reader)?;
400	let (main_block_ref, _writer) = writer.finish()?;
401	Ok(main_block_ref)
402}
403
404/// Copy the file referenced by `main_block_ref` from the block storage to the host file system.
405pub fn copy_file_out<R: ReadBlock, W: HostFileWrite + ?Sized>(
406	main_block_ref: &BlockRef,
407	block_reader: &mut R,
408	host_file_writer: &mut W,
409) -> Result<(), Error> {
410	let mut reader = NodeReader::new(main_block_ref, block_reader)?;
411	if reader.file.main_block.kind != NodeKind::File {
412		// Not a file.
413		return Err(Error::Node);
414	}
415	do_copy_file_out(&mut reader, host_file_writer)?;
416	Ok(())
417}
418
419fn do_copy_file_out<R: ReadBlock, W: HostFileWrite + ?Sized>(
420	reader: &mut NodeReader<R>,
421	writer: &mut W,
422) -> Result<(), Error> {
423	let mut buf = vec![0_u8; MAX_BLOCK_SIZE];
424	loop {
425		let n = reader.read(&mut buf[..])?;
426		if n == 0 {
427			break;
428		}
429		writer.write_all(&buf[..n])?;
430	}
431	Ok(())
432}
433
434/// Create directory in the block storage.
435pub fn create_dir<W: WriteBlock>(
436	dir: &Dir,
437	service_id: ServiceId,
438	block_writer: &mut W,
439	block_size: usize,
440) -> Result<BlockRef, Error> {
441	let file_size = dir.encoded_size() as u64;
442	let mut writer = NodeWriter::new_dir(service_id, block_writer, file_size, block_size)?;
443	let mut output = FallibleOutput { writer: &mut writer, error: None };
444	dir.encode_to(&mut output);
445	if let Some(e) = output.error {
446		return Err(e);
447	}
448	let (main_block_ref, _writer) = writer.finish()?;
449	Ok(main_block_ref)
450}
451
452/// Recurisvely copy the directory from the host file system to the block storage.
453///
454/// Returns the main block hash of the destination directory.
455pub fn copy_dir_in<F: HostFileRead, R: HostDirRead<F>, W: WriteBlock>(
456	host_dir_reader: R,
457	service_id: ServiceId,
458	block_writer: &mut W,
459	block_size: usize,
460) -> Result<BlockRef, Error> {
461	let mut dir_stack = Vec::new();
462	let mut queue = VecDeque::new();
463	let mut last_dir_ref = BlockRef { service_id: 0, hash: Hash::default() };
464	let mut visited_dirs = VecSet::new();
465	queue.push_back((host_dir_reader, FileName(Default::default()), usize::MAX));
466	while let Some((mut host_dir_reader, dir_name, parent_dir_index)) = queue.pop_front() {
467		let mut files = VecMap::new();
468		let mut subdirs = false;
469		while let Some(entry) = host_dir_reader.next_entry() {
470			let entry = entry?;
471			match entry.kind {
472				NodeKind::File => {
473					let mut file = host_dir_reader.open_file(&entry.file_name)?;
474					let block_ref = copy_file_in(&mut file, service_id, block_writer, block_size)?;
475					files.insert(entry.file_name, block_ref);
476				},
477				NodeKind::Dir => {
478					let (another_dir_reader, dir_id) =
479						host_dir_reader.open_dir(&entry.file_name)?;
480					let visited = match dir_id {
481						Some(dir_id) => !visited_dirs.insert(dir_id),
482						None => false,
483					};
484					if visited {
485						return Err(Error::Loop);
486					}
487					queue.push_back((another_dir_reader, entry.file_name, dir_stack.len()));
488					subdirs = true;
489				},
490			}
491		}
492		if subdirs {
493			dir_stack.push((files, dir_name, parent_dir_index));
494			continue;
495		}
496		// Don't use stack for directories that don't contain other directories.
497		let dir = Dir(files);
498		last_dir_ref = create_dir(&dir, service_id, block_writer, block_size)?;
499		if parent_dir_index == usize::MAX {
500			continue;
501		}
502		dir_stack[parent_dir_index].0.insert(dir_name, last_dir_ref);
503	}
504	while let Some((files, dir_name, parent_dir_index)) = dir_stack.pop() {
505		let dir = Dir(files);
506		last_dir_ref = create_dir(&dir, service_id, block_writer, block_size)?;
507		if parent_dir_index == usize::MAX {
508			continue;
509		}
510		dir_stack[parent_dir_index].0.insert(dir_name, last_dir_ref);
511	}
512	Ok(last_dir_ref)
513}
514
515/// Recursively copy the directory referenced by `main_block_ref` from the block storage to the
516/// host file system.
517pub fn copy_dir_out<R: ReadBlock, W: HostDirWrite>(
518	main_block_ref: &BlockRef,
519	block_reader: &mut R,
520	host_dir_writer: W,
521) -> Result<(), Error> {
522	let reader = NodeReader::new(main_block_ref, &mut *block_reader)?;
523	if reader.file.main_block.kind != NodeKind::Dir {
524		// Not a directory.
525		return Err(Error::Node);
526	}
527	do_copy_dir_out(reader, host_dir_writer)
528}
529
530fn do_copy_dir_out<R: ReadBlock, W: HostDirWrite>(
531	mut node_reader: NodeReader<R>,
532	host_dir_writer: W,
533) -> Result<(), Error> {
534	let mut queue = VecDeque::new();
535	let dir = Dir::decode(&mut node_reader).map_err(|_| Error::Io)?;
536	let mut block_reader = node_reader.into_inner();
537	queue.push_back((dir, host_dir_writer));
538	while let Some((dir, mut host_dir_writer)) = queue.pop_front() {
539		for (file_name, hash) in dir.0.iter() {
540			let mut reader = NodeReader::new(hash, &mut block_reader)?;
541			match reader.file.main_block.kind {
542				NodeKind::File => {
543					let mut file = host_dir_writer.create_file(file_name)?;
544					do_copy_file_out(&mut reader, &mut file)?;
545				},
546				NodeKind::Dir => {
547					let another_dir = Dir::decode(&mut reader).map_err(|_| Error::Io)?;
548					let another_dir_writer = host_dir_writer.create_dir(file_name)?;
549					queue.push_back((another_dir, another_dir_writer));
550				},
551			}
552		}
553	}
554	Ok(())
555}
556
557/// Recursively traverses file system nodes.
558pub struct NodeIter<R: ReadBlock> {
559	queue: VecDeque<BlockRef>,
560	reader: R,
561}
562
563impl<R: ReadBlock> NodeIter<R> {
564	pub fn new(main_block_ref: BlockRef, reader: R) -> Self {
565		let mut queue = VecDeque::new();
566		queue.push_back(main_block_ref);
567		Self { queue, reader }
568	}
569}
570
571impl<R: ReadBlock> Iterator for NodeIter<R> {
572	type Item = Result<(BlockRef, File), Error>;
573
574	fn next(&mut self) -> Option<Self::Item> {
575		macro_rules! check {
576			($body: expr) => {
577				match $body {
578					Ok(ret) => ret,
579					Err(e) => return Some(Err(e)),
580				}
581			};
582		}
583		let hash = self.queue.pop_front()?;
584		let mut reader = check!(NodeReader::new(&hash, &mut self.reader));
585		match reader.file.main_block.kind {
586			NodeKind::File => Some(Ok((hash, reader.into_file()))),
587			NodeKind::Dir => {
588				let dir = check!(Dir::decode(&mut reader).map_err(|_| Error::Io));
589				for (_name, hash) in dir.0.into_iter() {
590					self.queue.push_back(hash);
591				}
592				check!(reader.seek(0));
593				Some(Ok((hash, reader.into_file())))
594			},
595		}
596	}
597}
598
599/// Copy the file or directory (recursively) referenced by `main_block_ref` from the block storage
600/// to the host file system.
601pub fn copy_out<R: ReadBlock, W: HostWrite>(
602	main_block_ref: &BlockRef,
603	block_reader: &mut R,
604	host_writer: W,
605) -> Result<(), Error> {
606	let mut reader = NodeReader::new(main_block_ref, block_reader)?;
607	match reader.file.main_block.kind {
608		NodeKind::File => {
609			let mut file_writer = host_writer.into_file_writer()?;
610			do_copy_file_out(&mut reader, &mut file_writer)?
611		},
612		NodeKind::Dir => do_copy_dir_out(reader, host_writer.into_dir_writer()?)?,
613	}
614	Ok(())
615}
616
617/// Fully read the file referenced by `main_block_ref`.
618pub fn read<R: ReadBlock>(
619	main_block_ref: &BlockRef,
620	block_reader: &mut R,
621) -> Result<Vec<u8>, Error> {
622	let mut reader = NodeReader::new(main_block_ref, block_reader)?;
623	let mut buf = Vec::with_capacity(reader.file().main_block().file_size() as usize);
624	reader.read_to_end(&mut buf)?;
625	Ok(buf)
626}
627
628/// Resolve `path` into main block reference.
629///
630/// `root_dir_ref` refers to the root directory, `current_dir` is the current working directory
631/// path. Panics if `current_dir` is not absolute.
632pub fn resolve_path<R: ReadBlock>(
633	root_dir_ref: BlockRef,
634	current_dir: &CStr,
635	path: &CStr,
636	block_reader: &mut R,
637) -> Result<BlockRef, Error> {
638	assert!(
639		current_dir.to_bytes().is_empty() || matches!(current_dir.to_bytes(), [b'/', ..]),
640		"Current directory path must be absolute: {current_dir:?}"
641	);
642	let mut dir_stack = Vec::new();
643	let mut pending_ref = root_dir_ref;
644	let cwd_components = match path.to_bytes() {
645		[b'/', ..] => c"",
646		_ => current_dir,
647	}
648	.to_bytes()
649	.split(|b| *b == b'/');
650	let path_components = path.to_bytes().split(|b| *b == b'/');
651	for component in cwd_components.chain(path_components) {
652		match component {
653			b"." | b"" => {},
654			b".." =>
655				if let Some(dir_ref) = dir_stack.pop() {
656					pending_ref = dir_ref;
657				},
658			name => {
659				let dir_ref = pending_ref;
660				let dir = Dir::open(&dir_ref, block_reader)?;
661				pending_ref = dir.0.get(name).cloned().ok_or(Error::Path)?;
662				dir_stack.push(dir_ref);
663			},
664		}
665	}
666	Ok(pending_ref)
667}