corevm_host/
fs.rs

1//! CoreVM FS that uses preimage store as a backend.
2//!
3//! Files are stored as a series of blocks. A file begins with a main block that stores metadata
4//! and the actual first block. A directory is stored as a file with [`NodeKind::Dir`] specified in
5//! the main block. Maximum file size is around 504 GiB.
6
7mod error;
8mod operations;
9#[cfg(test)]
10mod tests;
11
12#[cfg(any(feature = "std", test))]
13mod fs_std;
14
15#[cfg(any(feature = "std", test))]
16pub use self::fs_std::*;
17pub use self::{error::*, operations::*};
18
19use alloc::{borrow::Borrow, collections::VecDeque, ffi::CString, vec, vec::Vec};
20use bytes::{Buf, Bytes};
21use codec::{Compact, CompactLen, ConstEncodedLen, Decode, Encode, MaxEncodedLen};
22use core::{ffi::CStr, ops::Deref};
23use jam_types::{ServiceId, VecMap, VecSet, MAX_PREIMAGE_BLOB_LEN};
24
25/// Minimum file block size in bytes.
26///
27/// Only the last file block can be smaller than that number.
28///
29/// Having min. size helps read metadata at the start of the file with a single lookup into the
30/// preimage store.
31pub const MIN_BLOCK_SIZE: usize = 64 * 1024;
32
33/// Maximum file block size.
34pub const MAX_BLOCK_SIZE: usize = MAX_PREIMAGE_BLOB_LEN;
35
36/// Maximum file name length in bytes.
37///
38/// Uses the same value as Linux for compatibility. Includes the NUL byte.
39pub const MAX_FILE_NAME_LEN: usize = 4096;
40
41/// Single file block.
42///
43/// Up to [`MAX_BLOCK_SIZE`] bytes long.
44#[derive(Clone, Debug)]
45pub struct FileBlock(Bytes);
46
47impl FileBlock {
48	/// Create new file block from the provided data.
49	///
50	/// Fails if the data is larger than [`MAX_BLOCK_SIZE`].
51	pub fn new(data: Bytes) -> Result<Self, InvalidBlock> {
52		if data.len() > MAX_BLOCK_SIZE {
53			return Err(InvalidBlock);
54		}
55		Ok(Self(data))
56	}
57
58	/// Convert into underlying `Bytes`.
59	pub fn into_inner(self) -> Bytes {
60		self.0
61	}
62}
63
64impl Deref for FileBlock {
65	type Target = [u8];
66	fn deref(&self) -> &Self::Target {
67		&self.0[..]
68	}
69}
70
71impl AsRef<[u8]> for FileBlock {
72	fn as_ref(&self) -> &[u8] {
73		&self.0[..]
74	}
75}
76
77/// File system node type.
78#[derive(
79	Encode, Decode, MaxEncodedLen, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy, Debug,
80)]
81pub enum NodeKind {
82	/// File.
83	File,
84	/// Directory.
85	Dir,
86}
87
88impl ConstEncodedLen for NodeKind {}
89
90/// The main block that describes a file system node (either a file or a directory).
91///
92/// Contains the first block, the list of all other file blocks and the metadata.
93#[derive(Debug)]
94pub struct MainBlock {
95	kind: NodeKind,
96	file_size: u64,
97	/// The exact size of every block except the first and the last one that can be smaller.
98	block_size: u64,
99	/// References of all file blocks except the first one.
100	///
101	/// The first block is a part of the superblock.
102	block_refs: Vec<BlockRef>,
103	/// The first file block.
104	first_block: FileBlock,
105}
106
107impl MainBlock {
108	/// Decode main block from the provided `Bytes`.
109	pub fn decode(mut input: Bytes) -> Result<Self, InvalidBlock> {
110		let mut slice = &input[..];
111		let kind = NodeKind::decode(&mut slice).map_err(|_| InvalidBlock)?;
112		let file_size = Compact::<u64>::decode(&mut slice).map_err(|_| InvalidBlock)?.0;
113		let block_size = Compact::<u64>::decode(&mut slice).map_err(|_| InvalidBlock)?.0;
114		let block_refs = Vec::<BlockRef>::decode(&mut slice).map_err(|_| InvalidBlock)?;
115		let remaining_len = slice.len();
116		input.advance(input.len() - remaining_len);
117		let first_block = FileBlock::new(input)?;
118		validate_main_block(file_size, block_size, &block_refs, &first_block)?;
119		Ok(Self { kind, file_size, block_size, block_refs, first_block })
120	}
121
122	/// Encode main block into the provided `Vec`.
123	pub fn encode_to(&self, output: &mut Vec<u8>) {
124		self.kind.encode_to(output);
125		Compact(self.file_size).encode_to(output);
126		Compact(self.block_size).encode_to(output);
127		self.block_refs.encode_to(output);
128		output.extend_from_slice(self.first_block.as_ref());
129	}
130
131	/// Get node type.
132	pub const fn kind(&self) -> NodeKind {
133		self.kind
134	}
135
136	/// Get file size.
137	pub const fn file_size(&self) -> u64 {
138		self.file_size
139	}
140
141	/// Get block size.
142	pub const fn block_size(&self) -> u64 {
143		self.block_size
144	}
145
146	/// Get the first block.
147	pub fn first_block(&self) -> &FileBlock {
148		&self.first_block
149	}
150
151	/// Get the references to all blocks except the first one.
152	pub fn block_refs(&self) -> &[BlockRef] {
153		&self.block_refs
154	}
155}
156
157fn validate_main_block(
158	file_size: u64,
159	block_size: u64,
160	block_refs: &[BlockRef],
161	first_block: &FileBlock,
162) -> Result<(), InvalidBlock> {
163	if !(MIN_BLOCK_SIZE as u64..=MAX_BLOCK_SIZE as u64).contains(&block_size) {
164		log::trace!("Invalid block size: {block_size}");
165		return Err(InvalidBlock);
166	}
167	let first_block_size = first_block.len();
168	if first_block_size < MIN_BLOCK_SIZE && !block_refs.is_empty() {
169		log::trace!(
170			"Invalid first block size: first block size = {first_block_size}, \
171            min. block size = {MIN_BLOCK_SIZE}, no. of hashes = {}",
172			block_refs.len()
173		);
174		return Err(InvalidBlock);
175	}
176	let max = first_block_size as u64 + block_refs.len() as u64 * block_size;
177	let min = if block_refs.is_empty() { first_block_size as u64 } else { max - block_size + 1 };
178	if !(min..=max).contains(&file_size) {
179		log::trace!("Invalid file size: {file_size} not in {min}..={max}");
180		return Err(InvalidBlock);
181	}
182	let encoded_len = main_block_metadata_encoded_len(file_size, block_size, block_refs.len())
183		.ok_or(InvalidBlock)? +
184		first_block.len();
185	if encoded_len > MAX_BLOCK_SIZE {
186		log::trace!(
187			"Encoded len is greater than the block size: {encoded_len} vs. {MAX_BLOCK_SIZE}"
188		);
189		return Err(InvalidBlock);
190	}
191	Ok(())
192}
193
194/// Includes everything except the first block.
195fn main_block_metadata_encoded_len(
196	file_size: u64,
197	block_size: u64,
198	num_blocks: usize,
199) -> Option<usize> {
200	(NodeKind::max_encoded_len() +
201		Compact::<u64>::compact_len(&file_size) +
202		Compact::<u64>::compact_len(&block_size))
203	.checked_add(vec_encoded_len::<BlockRef>(num_blocks))
204}
205
206/// File name.
207///
208/// Internally this is a C-string to be compatible with UNIX system calls.
209#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
210pub struct FileName(CString);
211
212impl FileName {
213	/// Create new file name from the provide C-string.
214	pub fn new(name: CString) -> Result<Self, InvalidPath> {
215		validate_file_name(&name)?;
216		Ok(Self(name))
217	}
218
219	/// Convert into underlying C string.
220	pub fn into_inner(self) -> CString {
221		self.0
222	}
223}
224
225impl Encode for FileName {
226	fn encode_to<O: codec::Output + ?Sized>(&self, output: &mut O) {
227		self.0.to_bytes().encode_to(output)
228	}
229}
230
231impl Decode for FileName {
232	fn decode<I: codec::Input>(input: &mut I) -> Result<Self, codec::Error> {
233		let mut bytes: Vec<u8> = Decode::decode(input)?;
234		bytes.push(0_u8);
235		let name = CString::from_vec_with_nul(bytes).map_err(|_| "Invalid C-string")?;
236		validate_file_name(&name).map_err(|_| "Invalid file name")?;
237		Ok(Self(name))
238	}
239}
240
241impl MaxEncodedLen for FileName {
242	fn max_encoded_len() -> usize {
243		// -1 because we don't encode the NUL byte.
244		MAX_FILE_NAME_LEN - 1 + Compact::<u32>(MAX_FILE_NAME_LEN as u32).encoded_size()
245	}
246}
247
248impl AsRef<CStr> for FileName {
249	fn as_ref(&self) -> &CStr {
250		self.0.as_c_str()
251	}
252}
253
254impl Deref for FileName {
255	type Target = CStr;
256
257	fn deref(&self) -> &Self::Target {
258		self.0.as_c_str()
259	}
260}
261
262impl Borrow<CStr> for FileName {
263	fn borrow(&self) -> &CStr {
264		self.0.as_c_str()
265	}
266}
267
268impl Borrow<[u8]> for FileName {
269	fn borrow(&self) -> &[u8] {
270		// We borrow without the NUL byte to be able to resolve paths without unnecessary cloning.
271		self.0.to_bytes()
272	}
273}
274
275fn validate_file_name(name: &CStr) -> Result<(), InvalidPath> {
276	let bytes = name.to_bytes_with_nul();
277	if !(2..=MAX_FILE_NAME_LEN).contains(&bytes.len()) || bytes.contains(&b'/') {
278		return Err(InvalidPath);
279	}
280	Ok(())
281}
282
283/// Unique file block identifier in the file system.
284#[derive(
285	Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Encode, Decode, MaxEncodedLen, Debug,
286)]
287pub struct BlockRef {
288	/// Id of the service where all the file blocks are stored.
289	pub service_id: ServiceId,
290	/// Hash of the first file block.
291	pub hash: Hash,
292}
293
294impl ConstEncodedLen for BlockRef {}
295
296impl core::fmt::Display for BlockRef {
297	fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
298		write!(f, "{:x}:", self.service_id)?;
299		for byte in self.hash.0.iter() {
300			write!(f, "{byte:02x}")?;
301		}
302		Ok(())
303	}
304}
305
306/// File block hash.
307///
308/// This is a type-safe wrapper around [`Hash`](jam_types::Hash) that is prefixed with
309/// [`Hash::PREFIX`] when printed. Never equals the hash of the whole file (even if it fits into a
310/// file block).
311#[derive(
312	Encode, Decode, MaxEncodedLen, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy, Debug, Default,
313)]
314pub struct Hash(pub jam_types::Hash);
315
316impl Hash {
317	/// Hash prefix when printed as string.
318	pub const PREFIX: &str = "fs-";
319
320	/// Computes the hash of the provided data.
321	pub fn digest(data: &[u8]) -> Self {
322		let h = blake2b_simd::Params::new().hash_length(32).hash(data);
323		Self(h.as_bytes().try_into().expect("Hash length set to 32"))
324	}
325
326	/// Returns `true` if the hash is zero.
327	pub fn is_zero(&self) -> bool {
328		self.0.iter().all(|b| *b == 0)
329	}
330}
331
332impl ConstEncodedLen for Hash {}
333
334impl core::fmt::Display for Hash {
335	fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
336		f.write_str(Hash::PREFIX)?;
337		for byte in self.0.iter() {
338			write!(f, "{byte:02x}")?;
339		}
340		Ok(())
341	}
342}
343
344// TODO @ivan `jamt` and `corevm-builder` need async versions of these traits
345
346/// File block reader.
347pub trait ReadBlock {
348	/// Read file block referenced by `block_ref` from the content-addressable storage.
349	fn read_block(&mut self, block_ref: &BlockRef) -> Result<Bytes, IoError>;
350}
351
352impl<R: ReadBlock + ?Sized> ReadBlock for &mut R {
353	fn read_block(&mut self, block_ref: &BlockRef) -> Result<Bytes, IoError> {
354		ReadBlock::read_block(*self, block_ref)
355	}
356}
357
358impl ReadBlock for VecMap<BlockRef, Bytes> {
359	fn read_block(&mut self, block_ref: &BlockRef) -> Result<Bytes, IoError> {
360		Ok(self.get(block_ref).ok_or(IoError)?.clone())
361	}
362}
363
364/// File block writer.
365pub trait WriteBlock {
366	/// Write file block stored in `buf` to the content-addressable storage.
367	fn write_block(&mut self, service_id: ServiceId, buf: &[u8]) -> Result<(), IoError>;
368}
369
370impl<W: WriteBlock + ?Sized> WriteBlock for &mut W {
371	fn write_block(&mut self, service_id: ServiceId, buf: &[u8]) -> Result<(), IoError> {
372		WriteBlock::write_block(*self, service_id, buf)
373	}
374}
375
376impl WriteBlock for VecMap<BlockRef, Bytes> {
377	fn write_block(&mut self, service_id: ServiceId, buf: &[u8]) -> Result<(), IoError> {
378		let block_ref = BlockRef { service_id, hash: Hash::digest(buf) };
379		self.insert(block_ref, buf.to_vec().into());
380		Ok(())
381	}
382}
383
384/// Host file reader.
385pub trait HostFileRead {
386	/// Returns the remaining number of bytes in the input stream.
387	fn remaining_len(&mut self) -> Result<u64, IoError>;
388
389	/// Reads `buf.len()` bytes from the input stream into the provided buffer.
390	fn read_exact(&mut self, buf: &mut [u8]) -> Result<(), IoError>;
391}
392
393/// Host file writer.
394pub trait HostFileWrite {
395	/// Fully write the provided buffer to the output stream.
396	fn write_all(&mut self, buf: &[u8]) -> Result<(), IoError>;
397}
398
399/// Host directory reader.
400pub trait HostDirRead<F: HostFileRead> {
401	/// Unique identifier of the file/directory.
402	///
403	/// On UNIX this is `(device id, inode)`. Used to handle file system loops.
404	type Id: core::hash::Hash + core::cmp::Eq + core::cmp::Ord;
405
406	fn next_entry(&mut self) -> Option<Result<HostDirEntry, IoError>>;
407	fn open_file(&mut self, name: &FileName) -> Result<F, IoError>;
408	fn open_dir(&mut self, name: &FileName) -> Result<(Self, Option<Self::Id>), IoError>
409	where
410		Self: Sized;
411}
412
413/// Host directory entry.
414#[derive(Debug)]
415pub struct HostDirEntry {
416	pub kind: NodeKind,
417	pub file_name: FileName,
418}
419
420/// Host directory writer.
421pub trait HostDirWrite {
422	type FileWrite: HostFileWrite;
423
424	fn create_file(&mut self, name: &FileName) -> Result<Self::FileWrite, IoError>;
425	fn create_dir(&mut self, name: &FileName) -> Result<Self, IoError>
426	where
427		Self: Sized;
428}
429
430/// Host writer.
431///
432/// Can be turned into either file or directory writer.
433pub trait HostWrite {
434	type FileWrite: HostFileWrite;
435	type DirWrite: HostDirWrite;
436
437	fn into_file_writer(self) -> Result<Self::FileWrite, IoError>;
438	fn into_dir_writer(self) -> Result<Self::DirWrite, IoError>;
439}
440
441fn vec_encoded_len<T: ConstEncodedLen>(len: usize) -> usize {
442	T::max_encoded_len() * len + Compact::<u64>::compact_len(&(len as u64))
443}