1use config::{CachePolicy, Config};
2use file_handle::{FileHandle, OpenableFileHandle};
3
4use inode_store::{InodeId, InodeStore};
5use rfuse3::{Errno, raw::reply::ReplyEntry};
6use uuid::Uuid;
7
8use crate::util::convert_stat64_to_file_attr;
9use mount_fd::MountFds;
10use statx::StatExt;
11use std::io::Result;
12use std::ops::DerefMut;
13use std::sync::atomic::{AtomicBool, AtomicU32};
14use std::{
15 collections::{BTreeMap, btree_map},
16 ffi::{CStr, CString, OsString},
17 fs::File,
18 io::{self, Error},
19 marker::PhantomData,
20 os::{
21 fd::{AsFd, AsRawFd, BorrowedFd, RawFd},
22 unix::ffi::OsStringExt,
23 },
24 path::PathBuf,
25 sync::Arc,
26 sync::atomic::{AtomicU64, Ordering},
27 time::Duration,
28};
29use util::{
30 UniqueInodeGenerator, ebadf, is_dir, openat, reopen_fd_through_proc, stat_fd,
31 validate_path_component,
32};
33use vm_memory::bitmap::BitmapSlice;
34
35mod async_io;
36mod config;
37mod file_handle;
38mod inode_store;
39mod mount_fd;
40pub mod newlogfs;
41mod os_compat;
42mod statx;
43mod util;
44
45pub const CURRENT_DIR_CSTR: &[u8] = b".\0";
47pub const PARENT_DIR_CSTR: &[u8] = b"..\0";
49pub const VFS_MAX_INO: u64 = 0xff_ffff_ffff_ffff;
50const MOUNT_INFO_FILE: &str = "/proc/self/mountinfo";
51pub const EMPTY_CSTR: &[u8] = b"\0";
52pub const PROC_SELF_FD_CSTR: &[u8] = b"/proc/self/fd\0";
53pub const ROOT_ID: u64 = 1;
54use tokio::sync::{Mutex, MutexGuard, RwLock};
55
56pub async fn new_passthroughfs_layer(rootdir: &str) -> Result<PassthroughFs> {
57 let config = Config {
58 root_dir: String::from(rootdir),
59 xattr: true,
61 do_import: true,
62 inode_file_handles: true,
63 ..Default::default()
64 };
65
66 let fs = PassthroughFs::<()>::new(config)?;
67
68 fs.import().await?;
69 Ok(fs)
70}
71
72type Inode = u64;
73type Handle = u64;
74
75const MAX_HOST_INO: u64 = 0x7fff_ffff_ffff;
77
78#[derive(Debug)]
86enum InodeFile<'a> {
87 Owned(File),
88 Ref(&'a File),
89}
90
91impl AsRawFd for InodeFile<'_> {
92 fn as_raw_fd(&self) -> RawFd {
95 match self {
96 Self::Owned(file) => file.as_raw_fd(),
97 Self::Ref(file_ref) => file_ref.as_raw_fd(),
98 }
99 }
100}
101
102impl AsFd for InodeFile<'_> {
103 fn as_fd(&self) -> BorrowedFd<'_> {
104 match self {
105 Self::Owned(file) => file.as_fd(),
106 Self::Ref(file_ref) => file_ref.as_fd(),
107 }
108 }
109}
110
111#[derive(Debug)]
112enum InodeHandle {
113 File(File),
114 Handle(Arc<OpenableFileHandle>),
115}
116
117impl InodeHandle {
118 fn file_handle(&self) -> Option<&FileHandle> {
119 match self {
120 InodeHandle::File(_) => None,
121 InodeHandle::Handle(h) => Some(h.file_handle()),
122 }
123 }
124
125 fn get_file(&self) -> Result<InodeFile<'_>> {
126 match self {
127 InodeHandle::File(f) => Ok(InodeFile::Ref(f)),
128 InodeHandle::Handle(h) => {
129 let f = h.open(libc::O_PATH)?;
130 Ok(InodeFile::Owned(f))
131 }
132 }
133 }
134
135 fn open_file(&self, flags: libc::c_int, proc_self_fd: &File) -> Result<File> {
136 match self {
137 InodeHandle::File(f) => reopen_fd_through_proc(f, flags, proc_self_fd),
138 InodeHandle::Handle(h) => h.open(flags),
139 }
140 }
141
142 fn stat(&self) -> Result<libc::stat64> {
143 match self {
144 InodeHandle::File(f) => stat_fd(f, None),
145 InodeHandle::Handle(_h) => {
146 let file = self.get_file()?;
147 stat_fd(&file, None)
148 }
149 }
150 }
151}
152
153#[derive(Debug)]
155pub struct InodeData {
156 inode: Inode,
157 handle: InodeHandle,
159 id: InodeId,
160 refcount: AtomicU64,
161 mode: u32,
163}
164
165impl InodeData {
166 fn new(inode: Inode, f: InodeHandle, refcount: u64, id: InodeId, mode: u32) -> Self {
167 InodeData {
168 inode,
169 handle: f,
170 id,
171 refcount: AtomicU64::new(refcount),
172 mode,
173 }
174 }
175
176 fn get_file(&self) -> Result<InodeFile<'_>> {
177 self.handle.get_file()
178 }
179
180 fn open_file(&self, flags: libc::c_int, proc_self_fd: &File) -> Result<File> {
181 self.handle.open_file(flags, proc_self_fd)
182 }
183}
184
185struct InodeMap {
187 pub inodes: RwLock<InodeStore>,
188}
189
190impl InodeMap {
191 fn new() -> Self {
192 InodeMap {
193 inodes: RwLock::new(Default::default()),
194 }
195 }
196
197 async fn clear(&self) {
198 self.inodes.write().await.clear();
200 }
201
202 async fn get(&self, inode: Inode) -> Result<Arc<InodeData>> {
203 self.inodes
205 .read()
206 .await
207 .get(&inode)
208 .cloned()
209 .ok_or_else(ebadf)
210 }
211
212 fn get_inode_locked(
213 inodes: &InodeStore,
214 id: &InodeId,
215 handle: Option<&FileHandle>,
216 ) -> Option<Inode> {
217 match handle {
218 Some(h) => inodes.inode_by_handle(h).copied(),
219 None => inodes.inode_by_id(id).copied(),
220 }
221 }
222
223 async fn get_alt(&self, id: &InodeId, handle: Option<&FileHandle>) -> Option<Arc<InodeData>> {
224 let inodes = self.inodes.read().await;
226
227 Self::get_alt_locked(&inodes, id, handle)
228 }
229
230 fn get_alt_locked(
231 inodes: &InodeStore,
232 id: &InodeId,
233 handle: Option<&FileHandle>,
234 ) -> Option<Arc<InodeData>> {
235 handle
236 .and_then(|h| inodes.get_by_handle(h))
237 .or_else(|| {
238 inodes.get_by_id(id).filter(|data| {
239 handle.is_none() || data.handle.file_handle().is_none()
248 })
249 })
250 .cloned()
251 }
252
253 async fn insert(&self, data: Arc<InodeData>) {
254 let mut inodes = self.inodes.write().await;
255
256 Self::insert_locked(&mut inodes, data)
257 }
258
259 fn insert_locked(inodes: &mut InodeStore, data: Arc<InodeData>) {
260 inodes.insert(data);
261 }
262}
263
264struct HandleData {
265 inode: Inode,
266 file: File,
267 lock: Mutex<()>,
268 open_flags: AtomicU32,
269}
270
271impl HandleData {
272 fn new(inode: Inode, file: File, flags: u32) -> Self {
273 HandleData {
274 inode,
275 file,
276 lock: Mutex::new(()),
277 open_flags: AtomicU32::new(flags),
278 }
279 }
280
281 fn get_file(&self) -> &File {
282 &self.file
283 }
284
285 async fn get_file_mut(&self) -> (MutexGuard<()>, &File) {
286 (self.lock.lock().await, &self.file)
287 }
288
289 fn borrow_fd(&self) -> BorrowedFd {
290 self.file.as_fd()
291 }
292
293 async fn get_flags(&self) -> u32 {
294 self.open_flags.load(Ordering::Relaxed)
295 }
296
297 async fn set_flags(&self, flags: u32) {
298 self.open_flags.store(flags, Ordering::Relaxed);
299 }
300}
301
302struct HandleMap {
303 handles: RwLock<BTreeMap<Handle, Arc<HandleData>>>,
304}
305
306impl HandleMap {
307 fn new() -> Self {
308 HandleMap {
309 handles: RwLock::new(BTreeMap::new()),
310 }
311 }
312
313 async fn clear(&self) {
314 self.handles.write().await.clear();
316 }
317
318 async fn insert(&self, handle: Handle, data: HandleData) {
319 self.handles.write().await.insert(handle, Arc::new(data));
321 }
322
323 async fn release(&self, handle: Handle, inode: Inode) -> Result<()> {
324 let mut handles = self.handles.write().await;
326
327 if let btree_map::Entry::Occupied(e) = handles.entry(handle) {
328 if e.get().inode == inode {
329 e.remove();
332
333 return Ok(());
334 }
335 }
336
337 Err(ebadf())
338 }
339
340 async fn get(&self, handle: Handle, inode: Inode) -> Result<Arc<HandleData>> {
341 self.handles
343 .read()
344 .await
345 .get(&handle)
346 .filter(|hd| hd.inode == inode)
347 .cloned()
348 .ok_or_else(ebadf)
349 }
350}
351
352pub struct PassthroughFs<S: BitmapSlice + Send + Sync = ()> {
360 inode_map: InodeMap,
365 next_inode: AtomicU64,
366
367 handle_map: HandleMap,
370 next_handle: AtomicU64,
371
372 ino_allocator: UniqueInodeGenerator,
374 mount_fds: MountFds,
376
377 proc_self_fd: File,
382
383 writeback: AtomicBool,
386
387 no_open: AtomicBool,
389
390 no_opendir: AtomicBool,
392
393 no_readdir: AtomicBool,
398
399 seal_size: AtomicBool,
401
402 dir_entry_timeout: Duration,
406 dir_attr_timeout: Duration,
407
408 cfg: Config,
409
410 _uuid: Uuid,
411
412 phantom: PhantomData<S>,
413}
414
415impl<S: BitmapSlice + Send + Sync> PassthroughFs<S> {
416 pub fn new(mut cfg: Config) -> Result<PassthroughFs<S>> {
418 if cfg.no_open && cfg.cache_policy != CachePolicy::Always {
419 warn!("passthroughfs: no_open only work with cache=always, reset to open mode");
420 cfg.no_open = false;
421 }
422 if cfg.writeback && cfg.cache_policy == CachePolicy::Never {
423 warn!(
424 "passthroughfs: writeback cache conflicts with cache=none, reset to no_writeback"
425 );
426 cfg.writeback = false;
427 }
428
429 let proc_self_fd_cstr = unsafe { CStr::from_bytes_with_nul_unchecked(PROC_SELF_FD_CSTR) };
431 let proc_self_fd = Self::open_file(
432 &libc::AT_FDCWD,
433 proc_self_fd_cstr,
434 libc::O_PATH | libc::O_NOFOLLOW | libc::O_CLOEXEC,
435 0,
436 )?;
437
438 let (dir_entry_timeout, dir_attr_timeout) =
439 match (cfg.dir_entry_timeout, cfg.dir_attr_timeout) {
440 (Some(e), Some(a)) => (e, a),
441 (Some(e), None) => (e, cfg.attr_timeout),
442 (None, Some(a)) => (cfg.entry_timeout, a),
443 (None, None) => (cfg.entry_timeout, cfg.attr_timeout),
444 };
445
446 let mount_fds = MountFds::new(None)?;
447
448 Ok(PassthroughFs {
449 inode_map: InodeMap::new(),
450 next_inode: AtomicU64::new(ROOT_ID + 1),
451 ino_allocator: UniqueInodeGenerator::new(),
452
453 handle_map: HandleMap::new(),
454 next_handle: AtomicU64::new(1),
455
456 mount_fds,
457 proc_self_fd,
458
459 writeback: AtomicBool::new(false),
460 no_open: AtomicBool::new(false),
461 no_opendir: AtomicBool::new(false),
462 no_readdir: AtomicBool::new(cfg.no_readdir),
464 seal_size: AtomicBool::new(cfg.seal_size),
465 dir_entry_timeout,
467 dir_attr_timeout,
468 cfg,
469
470 _uuid: Uuid::new_v4(),
471
472 phantom: PhantomData,
473 })
474 }
475
476 pub async fn import(&self) -> Result<()> {
478 let root = CString::new(self.cfg.root_dir.as_str()).expect("CString::new failed");
479
480 let (path_fd, handle_opt, st) = Self::open_file_and_handle(self, &libc::AT_FDCWD, &root)
481 .map_err(|e| {
482 error!("fuse: import: failed to get file or handle: {e:?}");
483 e
484 })?;
485 let id = InodeId::from_stat(&st);
486 let handle = if let Some(h) = handle_opt {
487 InodeHandle::Handle(self.to_openable_handle(h)?)
488 } else {
489 InodeHandle::File(path_fd)
490 };
491
492 unsafe { libc::umask(0o000) };
496
497 self.inode_map
499 .insert(Arc::new(InodeData::new(
500 ROOT_ID,
501 handle,
502 2,
503 id,
504 st.st.st_mode,
505 )))
506 .await;
507
508 Ok(())
509 }
510
511 pub fn keep_fds(&self) -> Vec<RawFd> {
513 vec![self.proc_self_fd.as_raw_fd()]
514 }
515
516 fn readlinkat(dfd: i32, pathname: &CStr) -> Result<PathBuf> {
517 let mut buf = Vec::with_capacity(libc::PATH_MAX as usize);
518
519 let buf_read = unsafe {
521 libc::readlinkat(
522 dfd,
523 pathname.as_ptr(),
524 buf.as_mut_ptr() as *mut libc::c_char,
525 buf.capacity(),
526 )
527 };
528 if buf_read < 0 {
529 error!("fuse: readlinkat error");
530 return Err(Error::last_os_error());
531 }
532
533 unsafe { buf.set_len(buf_read as usize) };
535 buf.shrink_to_fit();
536
537 Ok(PathBuf::from(OsString::from_vec(buf)))
541 }
542
543 pub async fn readlinkat_proc_file(&self, inode: Inode) -> Result<PathBuf> {
546 let data = self.inode_map.get(inode).await?;
547 let file = data.get_file()?;
548 let pathname = CString::new(format!("{}", file.as_raw_fd()))
549 .map_err(|e| Error::new(io::ErrorKind::InvalidData, e))?;
550
551 Self::readlinkat(self.proc_self_fd.as_raw_fd(), &pathname)
552 }
553
554 fn create_file_excl(
555 dir: &impl AsRawFd,
556 pathname: &CStr,
557 flags: i32,
558 mode: u32,
559 ) -> io::Result<Option<File>> {
560 match openat(dir, pathname, flags | libc::O_CREAT | libc::O_EXCL, mode) {
561 Ok(file) => Ok(Some(file)),
562 Err(err) => {
563 if err.kind() == io::ErrorKind::AlreadyExists {
565 if (flags & libc::O_EXCL) != 0 {
566 return Err(err);
567 }
568 return Ok(None);
569 }
570 Err(err)
571 }
572 }
573 }
574
575 fn open_file(dfd: &impl AsRawFd, pathname: &CStr, flags: i32, mode: u32) -> io::Result<File> {
576 openat(dfd, pathname, flags, mode)
577 }
578
579 fn open_file_restricted(
580 &self,
581 dir: &impl AsRawFd,
582 pathname: &CStr,
583 flags: i32,
584 mode: u32,
585 ) -> io::Result<File> {
586 let flags = libc::O_NOFOLLOW | libc::O_CLOEXEC | flags;
587
588 openat(dir, pathname, flags, mode)
593 }
595
596 fn open_file_and_handle(
598 &self,
599 dir: &impl AsRawFd,
600 name: &CStr,
601 ) -> io::Result<(File, Option<FileHandle>, StatExt)> {
602 let path_file = self.open_file_restricted(dir, name, libc::O_PATH, 0)?;
603 let st = statx::statx(&path_file, None)?;
604 let handle = if self.cfg.inode_file_handles {
605 FileHandle::from_fd(&path_file)?
606 } else {
607 None
608 };
609
610 Ok((path_file, handle, st))
611 }
612
613 fn to_openable_handle(&self, fh: FileHandle) -> io::Result<Arc<OpenableFileHandle>> {
614 fh.into_openable(&self.mount_fds, |fd, flags, _mode| {
615 reopen_fd_through_proc(&fd, flags, &self.proc_self_fd)
616 })
617 .map(Arc::new)
618 .map_err(|e| {
619 if !e.silent() {
620 error!("{e}");
621 }
622 e.into_inner()
623 })
624 }
625
626 async fn allocate_inode(
627 &self,
628 inodes: &InodeStore,
629 id: &InodeId,
630 handle_opt: Option<&FileHandle>,
631 ) -> io::Result<Inode> {
632 if !self.cfg.use_host_ino {
633 match InodeMap::get_inode_locked(inodes, id, handle_opt) {
636 Some(a) => Ok(a),
637 None => Ok(self.next_inode.fetch_add(1, Ordering::Relaxed)),
638 }
639 } else {
640 let inode = if id.ino > MAX_HOST_INO {
641 match InodeMap::get_inode_locked(inodes, id, handle_opt) {
643 Some(ino) => ino,
644 None => self.ino_allocator.get_unique_inode(id)?,
645 }
646 } else {
647 self.ino_allocator.get_unique_inode(id)?
648 };
649 Ok(inode)
651 }
652 }
653
654 async fn do_lookup(
655 &self,
656 parent: Inode,
657 name: &CStr,
658 ) -> std::result::Result<ReplyEntry, Errno> {
659 let name = if parent == ROOT_ID && name.to_bytes_with_nul().starts_with(PARENT_DIR_CSTR) {
660 CStr::from_bytes_with_nul(CURRENT_DIR_CSTR).unwrap()
662 } else {
663 name
664 };
665
666 let dir = self.inode_map.get(parent).await?;
667 let dir_file = dir.get_file()?;
668 let (path_fd, handle_opt, st) = Self::open_file_and_handle(self, &dir_file, name)?;
669 let id = InodeId::from_stat(&st);
670 let mut found = None;
674 'search: loop {
675 match self.inode_map.get_alt(&id, handle_opt.as_ref()).await {
676 None => break 'search,
678 Some(data) => {
679 let curr = data.refcount.load(Ordering::Acquire);
680 if curr == 0 {
682 continue 'search;
683 }
684
685 let new = curr.saturating_add(1);
687
688 if data
690 .refcount
691 .compare_exchange(curr, new, Ordering::AcqRel, Ordering::Acquire)
692 .is_ok()
693 {
694 found = Some(data.inode);
695 break;
696 }
697 }
698 }
699 }
700
701 let inode = if let Some(v) = found {
702 v
703 } else {
704 let handle = if let Some(h) = handle_opt.clone() {
705 InodeHandle::Handle(self.to_openable_handle(h)?)
706 } else {
707 InodeHandle::File(path_fd)
708 };
709
710 let mut inodes = self.inode_map.inodes.write().await;
712
713 match InodeMap::get_alt_locked(&inodes, &id, handle_opt.as_ref()) {
719 Some(data) => {
720 data.refcount.fetch_add(1, Ordering::Relaxed);
724 data.inode
725 }
726 None => {
727 let inode = self
728 .allocate_inode(&inodes, &id, handle_opt.as_ref())
729 .await?;
730 if inode > VFS_MAX_INO {
733 error!("fuse: max inode number reached: {VFS_MAX_INO}");
734 return Err(io::Error::other(format!(
735 "max inode number reached: {VFS_MAX_INO}"
736 ))
737 .into());
738 }
739
740 InodeMap::insert_locked(
741 inodes.deref_mut(),
742 Arc::new(InodeData::new(inode, handle, 1, id, st.st.st_mode)),
743 );
744
745 inode
746 }
747 }
748 };
749
750 let (entry_timeout, _) = if is_dir(st.st.st_mode) {
751 (self.dir_entry_timeout, self.dir_attr_timeout)
752 } else {
753 (self.cfg.entry_timeout, self.cfg.attr_timeout)
754 };
755
756 let mut attr_temp = convert_stat64_to_file_attr(st.st);
768 attr_temp.ino = inode;
769 Ok(ReplyEntry {
770 ttl: entry_timeout,
771 attr: attr_temp,
772 generation: 0,
773 })
774 }
775
776 fn forget_one(&self, inodes: &mut InodeStore, inode: Inode, count: u64) {
777 if inode == ROOT_ID {
779 return;
780 }
781
782 if let Some(data) = inodes.get(&inode) {
783 loop {
788 let curr = data.refcount.load(Ordering::Acquire);
789
790 let new = curr.saturating_sub(count);
793
794 if data
796 .refcount
797 .compare_exchange(curr, new, Ordering::AcqRel, Ordering::Acquire)
798 .is_ok()
799 {
800 if new == 0 {
801 let keep_mapping = !self.cfg.use_host_ino || data.id.ino > MAX_HOST_INO;
805 inodes.remove(&inode, keep_mapping);
806 }
807 break;
808 }
809 }
810 }
811 }
812
813 async fn do_release(&self, inode: Inode, handle: Handle) -> io::Result<()> {
814 self.handle_map.release(handle, inode).await
815 }
816
817 fn validate_path_component(&self, name: &CStr) -> io::Result<()> {
820 if !self.cfg.do_import {
822 return Ok(());
823 }
824 validate_path_component(name)
825 }
826
827 async fn get_writeback_open_flags(&self, flags: i32) -> i32 {
879 let mut new_flags = flags;
880 let writeback = self.writeback.load(Ordering::Relaxed);
881
882 if writeback && flags & libc::O_ACCMODE == libc::O_WRONLY {
886 new_flags &= !libc::O_ACCMODE;
887 new_flags |= libc::O_RDWR;
888 }
889
890 if writeback && flags & libc::O_APPEND != 0 {
897 new_flags &= !libc::O_APPEND;
898 }
899
900 new_flags
901 }
902}
903
904#[cfg(test)]
905mod tests {
906 use std::ffi::OsString;
907
908 use rfuse3::{MountOptions, raw::Session};
909 use tokio::signal;
910
911 use crate::passthrough::newlogfs::LoggingFileSystem;
912
913 #[tokio::test]
914 async fn test_passthrough() {
915 let fs = super::new_passthroughfs_layer("/home/luxian/github/buck2-rust-third-party")
916 .await
917 .unwrap();
918 let logfs = LoggingFileSystem::new(fs);
919
920 let mount_path = OsString::from("/home/luxian/pass");
921
922 let uid = unsafe { libc::getuid() };
923 let gid = unsafe { libc::getgid() };
924
925 let not_unprivileged = true;
926
927 let mut mount_options = MountOptions::default();
928 mount_options.force_readdir_plus(true).uid(uid).gid(gid);
930
931 let mut mount_handle: rfuse3::raw::MountHandle = if !not_unprivileged {
932 Session::new(mount_options)
933 .mount_with_unprivileged(logfs, mount_path)
934 .await
935 .unwrap()
936 } else {
937 Session::new(mount_options)
938 .mount(logfs, mount_path)
939 .await
940 .unwrap()
941 };
942
943 let handle = &mut mount_handle;
944
945 tokio::select! {
946 res = handle => res.unwrap(),
947 _ = signal::ctrl_c() => {
948 mount_handle.unmount().await.unwrap()
949 }
950 }
951 }
952}