1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
//! This contains thin wrappers around `memfd_create` and their file sealing API.
//!
//! The [`MemFile`] struct represents a file created by the `memfd_create` syscall.
//! Such files are memory backed and fully anonymous, meaning no other process can see them (well... except by looking in `/proc` on Linux).
//!
//! After creation, the file descriptors can be shared with child processes, or even sent to other processes over a Unix socket.
//! The files can then be memory mapped to be used as shared memory.
//!
//! This is all quite similar to `shmopen`, except that files created by `shmopen` are not anoynmous.
//! Depending on your application, the anonymous nature of `memfd` may be a nice property.
//! Additionally, files created by `shmopen` do not support file sealing.
//!
//! # File sealing
//! You can enable file sealing for [`MemFile`] by creating them with [`CreateOptions::allow_sealing(true)`][CreateOptions::allow_sealing].
//! This allows you to use [`MemFile::add_seals`] to add seals to the file.
//! You can also get the list of seals with [`MemFile::get_seals`].
//!
//! Once a seal is added to a file, it can not be removed.
//! Each seal prevents certain types of actions on the file.
//! For example: the [`Seal::Write`] seal prevents writing to the file, both through syscalls and memory mappings,
//! and the [`Seal::Shrink`] and [`Seal::Grow`] seals prevent the file from being resized.
//!
//! This is quite interesting for Rust, as it is the only guaranteed safe way to map memory:
//! when a file is sealed with [`Seal::Write`] and [`Seal::Shrink`], it is file contents can not change, and the file can not be shrinked.
//! The latter is also important, because trying to read from a memory mapping a of file that was shrinked will raise a `SIGBUS` signal
//! and likely crash your application.
//!
//! Another interesting option is to first create a shared, writable memory mapping for your [`MemFile`],
//! and then add the [`Seal::FutureWrite`] and [`Seal::Shrink`] seals.
//! The existing memory mapping can still be used to change the contents of the file, but no new shared, writable memory mappings can be created.
//! When sharing the file with other processes, it prevents those processes from shrinking or writing to the file,
//! while the original process can still change the file contents.
//!
//! # Example
//! ```
//! # fn main() -> std::io::Result<()> {
//! use memfile::{MemFile, CreateOptions, Seal};
//! use std::io::Write;
//!
//! let mut file = MemFile::create("foo", CreateOptions::new().allow_sealing(true))?;
//! file.write_all(b"Hello world!")?;
//! file.add_seals(Seal::Write | Seal::Shrink | Seal::Grow)?;
//! // From now on, all writes or attempts to created shared, writable memory mappings will fail.
//! # Ok(())
//! # }
//! ```

use std::fs::File;
use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, RawFd};

mod sys;
mod seal;

pub use seal::{Seal, Seals};

/// A memory backed file that can have seals applied to it.
///
/// The struct implements [`AsRawFd`], [`IntoRawFd`] and [`FromRawFd`].
/// When using [`FromRawFd::from_raw_fd`], you must ensure that the file descriptor is a valid `memfd`.
#[derive(Debug)]
pub struct MemFile {
	file: File,
}

impl MemFile {
	/// Create a new `memfd` with the given options.
	///
	/// The `name` argument is purely for debugging purposes.
	/// On Linux it shows up in `/proc`, but it serves no other purpose.
	/// In particular, multiple files can be created with the same name.
	///
	/// The close-on-exec flag is set on the created file descriptor.
	/// If you want to pass it to a child process, you should use [`libc::dup2`] or something similar *after forking*.
	/// Disabling the close-on-exec flag before forking causes a race condition with other threads.
	pub fn create(name: &str, options: &CreateOptions) -> std::io::Result<Self> {
		let file = sys::memfd_create(name, options.as_flags())?;
		Ok(Self { file })
	}

	/// Try to create a new [`MemFile`] Createinstance that shares the same underlying file handle as the existing [`MemFile`] instance.
	///
	/// Reads, writes, and seeks will affect both [`MemFile`] instances simultaneously.
	pub fn try_clone(&self) -> std::io::Result<Self> {
		let file = self.file.try_clone()?;
		Ok(Self { file })
	}

	/// Wrap an already-open file as [`MemFile`].
	///
	/// This function returns an error if the file was not created by `memfd_create`.
	///
	/// If the function succeeds, the passed in file object is consumed and the returned [`MemFile`] takes ownership of the file descriptor.
	/// If the function fails, the original file object is included in the returned error.
	pub fn from_file<T: AsRawFd + IntoRawFd>(file: T) -> Result<Self, FromFdError<T>> {
		match sys::memfd_get_seals(file.as_raw_fd()) {
			Ok(_) => {
				let file = unsafe { File::from_raw_fd(file.into_raw_fd()) };
				Ok(Self { file })
			},
			Err(error) => Err(FromFdError { error, file }),
		}
	}

	/// Truncate or extend the underlying file, updating the size of this file to become size.
	///
	/// If the size is less than the current file's size, then the file will be shrunk.
	/// If it is greater than the current file's size, then the file will be extended to size and have all of the intermediate data filled in with 0s.
	/// The file's cursor isn't changed.
	/// In particular, if the cursor was at the end and the file is shrunk using this operation, the cursor will now be past the end.
	pub fn set_len(&self, size: u64) -> std::io::Result<()> {
		self.file.set_len(size)
	}

	/// Get the active seals of the file.
	pub fn get_seals(&self) -> std::io::Result<Seals> {
		let seals = sys::memfd_get_seals(self.as_raw_fd())?;
		Ok(Seals::from_bits_truncate(seals as u32))
	}

	/// Add a single seal to the file.
	///
	/// If you want to add multiple seals, you should prefer [`Self::add_seals`] to reduce the number of syscalls.
	///
	/// This function will fail if the file was not created with sealing support,
	/// if the file has already been sealed with [`Seal::Seal`],
	/// or if you try to add [`Seal::Write`] while a shared, writable memory mapping exists for the file.
	///
	/// Adding a seal that is already active is a no-op.
	pub fn add_seal(&self, seal: Seal) -> std::io::Result<()> {
		self.add_seals(seal.into())
	}

	/// Add multiple seals to the file.
	///
	/// This function will fail if the file was not created with sealing support,
	/// if the file has already been sealed with [`Seal::Seal`],
	/// or if you try to add [`Seal::Write`] while a shared, writable memory mapping exists for the file.
	///
	/// Adding seals that are already active is a no-op.
	pub fn add_seals(&self, seals: Seals) -> std::io::Result<()> {
		sys::memfd_add_seals(self.as_raw_fd(), seals.bits() as std::os::raw::c_int)
	}
}

impl FromRawFd for MemFile {
	unsafe fn from_raw_fd(fd: RawFd) -> Self {
		let file = File::from_raw_fd(fd);
		Self { file }
	}
}

impl AsRawFd for MemFile {
	fn as_raw_fd(&self) -> RawFd {
		self.file.as_raw_fd()
	}
}

impl IntoRawFd for MemFile {
	fn into_raw_fd(self) -> RawFd {
		self.file.into_raw_fd()
	}
}

impl std::os::unix::fs::FileExt for MemFile {
	fn read_at(&self, buf: &mut [u8], offset: u64) -> std::io::Result<usize> {
		self.file.read_at(buf, offset)
	}

	fn write_at(&self, buf: &[u8], offset: u64) -> std::io::Result<usize> {
		self.file.write_at(buf, offset)
	}
}

impl std::io::Write for MemFile {
	fn flush(&mut self) -> std::io::Result<()> {
		self.file.flush()
	}

	fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
		self.file.write(buf)
	}
}

impl std::io::Read for MemFile {
	fn read(&mut self, buf: &mut[u8]) -> std::io::Result<usize> {
		self.file.read(buf)
	}
}

impl std::io::Seek for MemFile {
	fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result<u64> {
		self.file.seek(pos)
	}
}

impl From<MemFile> for std::process::Stdio {
	fn from(other: MemFile) -> Self {
		other.file.into()
	}
}

/// Options for creating a [`MemFile`].
///
/// Support for options depend on platform and OS details.
/// Refer to your kernel documentation of `memfd_create` for details.
#[derive(Copy, Clone, Debug, Default)]
pub struct CreateOptions {
	allow_sealing: bool,
	huge_table: Option<HugeTlb>,
}

impl CreateOptions {
	/// Get the default creation options for a [`MemFile`].
	///
	/// Initially, file sealing is not enabled no no huge TLB page size is configured.
	///
	/// Note that the close-on-exec flag will always be set on the created file descriptor.
	/// If you want to pass it to a child process, you should use [`libc::dup2`] or something similar *after forking*.
	/// Disabling the close-on-exec flag before forking causes a race condition with other threads.
	pub fn new() -> Self {
		Self::default()
	}

	/// Allow sealing operations on the created [`MemFile`].
	pub fn allow_sealing(&mut self, value: bool) -> &mut Self {
		self.allow_sealing = value;
		self
	}

	/// Create the file in a `hugetlbfs` filesystem using huge pages for the translation look-aside buffer.
	///
	/// Support for this feature and specific sizes depend on the CPU and kernel configuration.
	/// See also: <https://www.kernel.org/doc/html/latest/admin-guide/mm/hugetlbpage.html>
	pub fn huge_tlb(&mut self, value: Option<HugeTlb>) -> &mut Self {
		self.huge_table = value;
		self
	}

	/// Get the options as raw flags for `libc::memfd_create`.
	fn as_flags(&self) -> std::os::raw::c_int {
		let mut flags = sys::flags::MFD_CLOEXEC;
		if self.allow_sealing {
			flags |= sys::flags::MFD_ALLOW_SEALING;
		}
		#[cfg(target_os = "linux")]
		if let Some(size) = self.huge_table {
			flags |= sys::flags::MFD_HUGETLB | size as u32 as std::os::raw::c_int;
		}
		flags
	}
}

/// Page size for the translation look-aside buffer.
///
/// Support for specific sizes depends on the CPU and kernel configuration.
/// See also: <https://www.kernel.org/doc/html/latest/admin-guide/mm/hugetlbpage.html>
#[derive(Copy, Clone, Debug, PartialEq, Eq, Ord, PartialOrd, Hash)]
#[repr(u32)]
#[non_exhaustive]
pub enum HugeTlb {
	Huge64KB = sys::flags::MFD_HUGE_64KB as u32,
	Huge512KB = sys::flags::MFD_HUGE_512KB as u32,
	Huge1MB = sys::flags::MFD_HUGE_1MB as u32,
	Huge2MB = sys::flags::MFD_HUGE_2MB as u32,
	Huge8MB = sys::flags::MFD_HUGE_8MB as u32,
	Huge16MB = sys::flags::MFD_HUGE_16MB as u32,
	Huge32MB = sys::flags::MFD_HUGE_32MB as u32,
	Huge256MB = sys::flags::MFD_HUGE_256MB as u32,
	Huge512MB = sys::flags::MFD_HUGE_512MB as u32,
	Huge1GB = sys::flags::MFD_HUGE_1GB as u32,
	Huge2GB = sys::flags::MFD_HUGE_2GB as u32,
	Huge16GB = sys::flags::MFD_HUGE_16GB as u32,
}

/// Error returned when the file passed to [`MemFile::from_file`] is not a `memfd`.
///
/// This struct contains the [`std::io::Error`] that occurred and the original value passed to `from_file`.
/// It is also directly convertible to [`std::io::Error`], so you can pass it up using the `?` operator
/// from a function that returns an [`std::io::Result`].
pub struct FromFdError<T> {
	error: std::io::Error,
	file: T,
}

impl<T> FromFdError<T> {
	/// Get a reference to the I/O error.
	pub fn error(&self) -> &std::io::Error {
		&self.error
	}

	/// Get a reference to the original file object.
	pub fn file(&self) -> &T {
		&self.file
	}

	/// Consume the struct and return the I/O error and the original file object as tuple.
	pub fn into_parts(self) -> (std::io::Error, T) {
		(self.error, self.file)
	}

	/// Consume the struct and return the I/O error.
	pub fn into_error(self) -> std::io::Error {
		self.error
	}

	/// Consume the struct and return the original file object.
	pub fn into_file(self) -> T {
		self.file
	}
}

impl<T> From<FromFdError<T>> for std::io::Error {
	fn from(other: FromFdError<T>) -> Self {
		other.into_error()
	}
}