Skip to main content

nydus_storage/
utils.rs

1// Copyright 2020 Ant Group. All rights reserved.
2// Copyright 2024 Nydus Developers. All rights reserved.
3//
4// SPDX-License-Identifier: Apache-2.0
5
6//! Utility helpers to support the storage subsystem.
7use fuse_backend_rs::abi::fuse_abi::off64_t;
8use fuse_backend_rs::file_buf::FileVolatileSlice;
9#[cfg(target_os = "macos")]
10use libc::{fcntl, radvisory};
11use nix::sys::uio::preadv;
12use nydus_utils::{
13    crc32,
14    digest::{self, RafsDigest},
15    round_down_4k,
16};
17use std::alloc::{alloc, handle_alloc_error, Layout};
18use std::cmp::{self, min};
19use std::io::{ErrorKind, IoSliceMut, Result};
20use std::os::fd::{AsFd, AsRawFd};
21use std::os::unix::io::RawFd;
22#[cfg(target_os = "linux")]
23use std::path::PathBuf;
24use std::slice::from_raw_parts_mut;
25#[cfg(target_os = "macos")]
26use std::{ffi::CStr, mem, os::raw::c_char};
27use vm_memory::bytes::Bytes;
28
29use crate::{StorageError, StorageResult};
30
31/// Just a simple wrapper for posix `preadv`. Provide a slice of `IoVec` as input.
32pub fn readv(fd: RawFd, iovec: &mut [IoSliceMut], offset: u64) -> Result<usize> {
33    loop {
34        match preadv(fd, iovec, offset as off64_t).map_err(|_| last_error!()) {
35            Ok(ret) => return Ok(ret),
36            // Retry if the IO is interrupted by signal.
37            Err(err) if err.kind() != ErrorKind::Interrupted => return Err(err),
38            _ => continue,
39        }
40    }
41}
42
43/// Copy from buffer slice to another buffer slice.
44///
45/// `offset` is where to start copy in the first buffer of source slice.
46/// Up to bytes of `length` is wanted in `src`.
47/// `dst_index` and `dst_slice_offset` indicate from where to start write destination.
48/// Return (Total copied bytes, (Final written destination index, Final written destination offset))
49pub fn copyv<S: AsRef<[u8]>>(
50    src: &[S],
51    dst: &[FileVolatileSlice],
52    offset: usize,
53    length: usize,
54    mut dst_index: usize,
55    mut dst_offset: usize,
56) -> StorageResult<(usize, (usize, usize))> {
57    // Validate input parameters first to protect following loop block.
58    if src.is_empty() || length == 0 {
59        return Ok((0, (dst_index, dst_offset)));
60    } else if offset > src[0].as_ref().len()
61        || dst_index >= dst.len()
62        || dst_offset > dst[dst_index].len()
63    {
64        return Err(StorageError::MemOverflow);
65    }
66
67    let mut copied = 0;
68    let mut src_offset = offset;
69    'next_source: for s in src {
70        let s = s.as_ref();
71        let mut buffer_len = min(s.len() - src_offset, length - copied);
72
73        loop {
74            if dst_index >= dst.len() {
75                return Err(StorageError::MemOverflow);
76            }
77
78            let dst_slice = &dst[dst_index];
79            let buffer = &s[src_offset..src_offset + buffer_len];
80            let written = dst_slice
81                .write(buffer, dst_offset)
82                .map_err(StorageError::VolatileSlice)?;
83
84            copied += written;
85            if dst_slice.len() - dst_offset == written {
86                dst_index += 1;
87                dst_offset = 0;
88            } else {
89                dst_offset += written;
90            }
91
92            // Move to next source buffer if the current source buffer has been exhausted.
93            if written == buffer_len {
94                src_offset = 0;
95                continue 'next_source;
96            } else {
97                src_offset += written;
98                buffer_len -= written;
99            }
100        }
101    }
102
103    Ok((copied, (dst_index, dst_offset)))
104}
105
106/// The copy_file_range system call performs an in-kernel copy between file descriptors src and dst
107/// without the additional cost of transferring data from the kernel to user space and back again.
108///
109/// There may be additional optimizations for specific file systems. It copies up to len bytes of
110/// data from file descriptor fd_in to file descriptor fd_out, overwriting any data that exists
111/// within the requested range of the target file.
112#[cfg(target_os = "linux")]
113pub fn copy_file_range(
114    src: impl AsFd,
115    src_off: u64,
116    dst: impl AsFd,
117    dst_off: u64,
118    mut len: usize,
119) -> Result<()> {
120    let mut src_off = src_off as i64;
121    let mut dst_off = dst_off as i64;
122
123    while len > 0 {
124        let ret = nix::fcntl::copy_file_range(
125            src.as_fd().as_raw_fd(),
126            Some(&mut src_off),
127            dst.as_fd().as_raw_fd(),
128            Some(&mut dst_off),
129            len,
130        )?;
131        if ret == 0 {
132            return Err(eio!("reach end of file when copy file range"));
133        }
134        len -= ret;
135    }
136
137    Ok(())
138}
139
140#[cfg(not(target_os = "linux"))]
141pub fn copy_file_range(
142    src: impl AsFd,
143    mut src_off: u64,
144    dst: impl AsFd,
145    mut dst_off: u64,
146    mut len: usize,
147) -> Result<()> {
148    let buf_size = 4096;
149    let mut buf = vec![0u8; buf_size];
150
151    while len > 0 {
152        let bytes_to_read = buf_size.min(len);
153        let read_bytes = nix::sys::uio::pread(
154            src.as_fd().as_raw_fd(),
155            &mut buf[..bytes_to_read],
156            src_off as libc::off_t,
157        )?;
158
159        if read_bytes == 0 {
160            return Err(eio!("reach end of file when read in copy_file_range"));
161        }
162
163        let write_bytes = nix::sys::uio::pwrite(
164            dst.as_fd().as_raw_fd(),
165            &buf[..read_bytes],
166            dst_off as libc::off_t,
167        )?;
168        if write_bytes == 0 {
169            return Err(eio!("reach end of file when write in copy_file_range"));
170        }
171
172        src_off += read_bytes as u64;
173        dst_off += read_bytes as u64;
174        len -= read_bytes;
175    }
176
177    Ok(())
178}
179
180#[cfg(target_os = "linux")]
181pub fn get_path_from_file(file: &impl AsRawFd) -> Option<String> {
182    let path = PathBuf::from("/proc/self/fd").join(file.as_raw_fd().to_string());
183    match std::fs::read_link(path) {
184        Ok(v) => Some(v.display().to_string()),
185        Err(e) => {
186            warn!("Failed to get path from file descriptor: {}", e);
187            None
188        }
189    }
190}
191
192#[cfg(target_os = "macos")]
193pub fn get_path_from_file(file: &impl AsRawFd) -> Option<String> {
194    let fd = file.as_raw_fd();
195    let mut buf: [c_char; 1024] = unsafe { mem::zeroed() };
196
197    let result = unsafe { fcntl(fd, libc::F_GETPATH, buf.as_mut_ptr()) };
198
199    if result == -1 {
200        warn!("Failed to get path from file descriptor");
201        return None;
202    }
203
204    let cstr = unsafe { CStr::from_ptr(buf.as_ptr()) };
205    cstr.to_str().ok().map(|s| s.to_string())
206}
207
208/// An memory cursor to access an `FileVolatileSlice` array.
209pub struct MemSliceCursor<'a> {
210    pub mem_slice: &'a [FileVolatileSlice<'a>],
211    pub index: usize,
212    pub offset: usize,
213}
214
215impl<'a> MemSliceCursor<'a> {
216    /// Create a new `MemSliceCursor` object.
217    pub fn new<'b: 'a>(slice: &'b [FileVolatileSlice]) -> Self {
218        Self {
219            mem_slice: slice,
220            index: 0,
221            offset: 0,
222        }
223    }
224
225    /// Move cursor forward by `size`.
226    pub fn move_cursor(&mut self, mut size: usize) {
227        while size > 0 && self.index < self.mem_slice.len() {
228            let slice = self.mem_slice[self.index];
229            let this_left = slice.len() - self.offset;
230
231            match this_left.cmp(&size) {
232                cmp::Ordering::Equal => {
233                    self.index += 1;
234                    self.offset = 0;
235                    return;
236                }
237                cmp::Ordering::Greater => {
238                    self.offset += size;
239                    return;
240                }
241                cmp::Ordering::Less => {
242                    self.index += 1;
243                    self.offset = 0;
244                    size -= this_left;
245                    continue;
246                }
247            }
248        }
249    }
250
251    /// Consume `size` bytes of memory content from the cursor.
252    pub fn consume(&mut self, mut size: usize) -> Vec<IoSliceMut<'_>> {
253        let mut vectors: Vec<IoSliceMut> = Vec::with_capacity(8);
254
255        while size > 0 && self.index < self.mem_slice.len() {
256            let slice = self.mem_slice[self.index];
257            let this_left = slice.len() - self.offset;
258
259            match this_left.cmp(&size) {
260                cmp::Ordering::Greater => {
261                    // Safe because self.offset is valid and we have checked `size`.
262                    let p = unsafe { slice.as_ptr().add(self.offset) };
263                    let s = unsafe { from_raw_parts_mut(p, size) };
264                    vectors.push(IoSliceMut::new(s));
265                    self.offset += size;
266                    break;
267                }
268                cmp::Ordering::Equal => {
269                    // Safe because self.offset is valid and we have checked `size`.
270                    let p = unsafe { slice.as_ptr().add(self.offset) };
271                    let s = unsafe { from_raw_parts_mut(p, size) };
272                    vectors.push(IoSliceMut::new(s));
273                    self.index += 1;
274                    self.offset = 0;
275                    break;
276                }
277                cmp::Ordering::Less => {
278                    let p = unsafe { slice.as_ptr().add(self.offset) };
279                    let s = unsafe { from_raw_parts_mut(p, this_left) };
280                    vectors.push(IoSliceMut::new(s));
281                    self.index += 1;
282                    self.offset = 0;
283                    size -= this_left;
284                }
285            }
286        }
287
288        vectors
289    }
290
291    /// Get the inner `FileVolatileSlice` array.
292    pub fn inner_slice(&self) -> &[FileVolatileSlice<'_>] {
293        self.mem_slice
294    }
295}
296
297/// A customized readahead function to ask kernel to fault in all pages from offset to end.
298///
299/// Call libc::readahead on every 128KB range because otherwise readahead stops at kernel bdi
300/// readahead size which is 128KB by default.
301#[cfg(target_os = "linux")]
302pub fn readahead(fd: libc::c_int, mut offset: u64, end: u64) {
303    offset = round_down_4k(offset);
304    while offset < end {
305        // Kernel default 128KB readahead size
306        let count = std::cmp::min(128 << 10, end - offset);
307        unsafe { libc::readahead(fd, offset as i64, count as usize) };
308        offset += count;
309    }
310}
311
312#[cfg(target_os = "macos")]
313pub fn readahead(fd: libc::c_int, mut offset: u64, end: u64) {
314    offset = round_down_4k(offset);
315    while offset < end {
316        // Kernel default 128KB readahead size
317        let count = std::cmp::min(128 << 10, end - offset);
318        unsafe {
319            fcntl(
320                fd,
321                libc::F_RDADVISE,
322                radvisory {
323                    ra_offset: offset as i64,
324                    ra_count: count as i32,
325                },
326            );
327        }
328        offset += count;
329    }
330}
331
332/// A customized buf allocator that avoids zeroing
333pub fn alloc_buf(size: usize) -> Vec<u8> {
334    assert!(size < isize::MAX as usize);
335    if size == 0 {
336        return Vec::new();
337    }
338    let layout = Layout::from_size_align(size, 0x1000)
339        .unwrap()
340        .pad_to_align();
341    let ptr = unsafe { alloc(layout) };
342    if ptr.is_null() {
343        handle_alloc_error(layout);
344    }
345    unsafe { Vec::from_raw_parts(ptr, size, layout.size()) }
346}
347
348/// Check hash of data matches provided one
349pub fn check_hash(data: &[u8], digest: &RafsDigest, digester: digest::Algorithm) -> bool {
350    digest == &RafsDigest::from_buf(data, digester)
351}
352
353/// Check CRC of data matches provided one
354pub fn check_crc(data: &[u8], crc_digest: u32) -> bool {
355    crc_digest == crc32::Crc32::new(crc32::Algorithm::Crc32Iscsi).from_buf(data)
356}
357
358#[cfg(test)]
359mod tests {
360    use super::*;
361    use std::io::Write;
362    use vmm_sys_util::tempfile::TempFile;
363
364    #[test]
365    fn test_copyv() {
366        let mut dst_buf1 = vec![0x0u8; 4];
367        let mut dst_buf2 = vec![0x0u8; 4];
368        let volatile_slice_1 =
369            unsafe { FileVolatileSlice::from_raw_ptr(dst_buf1.as_mut_ptr(), dst_buf1.len()) };
370        let volatile_slice_2 =
371            unsafe { FileVolatileSlice::from_raw_ptr(dst_buf2.as_mut_ptr(), dst_buf2.len()) };
372        let dst_bufs = [volatile_slice_1, volatile_slice_2];
373
374        let src_buf_1 = vec![1u8, 2u8, 3u8];
375        let src_buf_2 = vec![4u8, 5u8, 6u8];
376        let src_bufs = vec![src_buf_1.as_slice(), src_buf_2.as_slice()];
377
378        assert_eq!(
379            copyv(
380                &{
381                    let _ = Vec::<u8>::new();
382                    [] as [std::vec::Vec<u8>; 0]
383                },
384                &dst_bufs,
385                0,
386                1,
387                1,
388                1
389            )
390            .unwrap(),
391            (0, (1, 1))
392        );
393        assert_eq!(
394            copyv(&src_bufs, &dst_bufs, 0, 0, 1, 1).unwrap(),
395            (0, (1, 1))
396        );
397        assert!(copyv(&src_bufs, &dst_bufs, 5, 1, 1, 1).is_err());
398        assert!(copyv(&src_bufs, &dst_bufs, 0, 1, 2, 0).is_err());
399        assert!(copyv(&src_bufs, &dst_bufs, 0, 1, 1, 3).is_err());
400
401        assert_eq!(
402            copyv(&src_bufs, &dst_bufs, 1, 5, 0, 0,).unwrap(),
403            (5, (1, 1))
404        );
405        assert_eq!(dst_buf1[0], 2);
406        assert_eq!(dst_buf1[1], 3);
407        assert_eq!(dst_buf1[2], 4);
408        assert_eq!(dst_buf1[3], 5);
409        assert_eq!(dst_buf2[0], 6);
410
411        assert_eq!(
412            copyv(&src_bufs, &dst_bufs, 1, 3, 1, 0,).unwrap(),
413            (3, (1, 3))
414        );
415        assert_eq!(dst_buf2[0], 2);
416        assert_eq!(dst_buf2[1], 3);
417        assert_eq!(dst_buf2[2], 4);
418
419        assert_eq!(
420            copyv(&src_bufs, &dst_bufs, 1, 3, 1, 1,).unwrap(),
421            (3, (2, 0))
422        );
423        assert_eq!(dst_buf2[1], 2);
424        assert_eq!(dst_buf2[2], 3);
425        assert_eq!(dst_buf2[3], 4);
426
427        assert_eq!(
428            copyv(&src_bufs, &dst_bufs, 1, 6, 0, 3,).unwrap(),
429            (5, (2, 0))
430        );
431        assert_eq!(dst_buf1[3], 2);
432        assert_eq!(dst_buf2[0], 3);
433        assert_eq!(dst_buf2[1], 4);
434        assert_eq!(dst_buf2[2], 5);
435        assert_eq!(dst_buf2[3], 6);
436    }
437
438    #[test]
439    fn test_alloc_buf_zero_size() {
440        let buf = alloc_buf(0);
441        assert!(buf.is_empty());
442        assert_eq!(buf.capacity(), 0);
443    }
444
445    #[test]
446    fn test_mem_slice_cursor_move() {
447        let mut buf1 = vec![0x0u8; 2];
448        let vs1 = unsafe { FileVolatileSlice::from_raw_ptr(buf1.as_mut_ptr(), buf1.len()) };
449        let mut buf2 = vec![0x0u8; 2];
450        let vs2 = unsafe { FileVolatileSlice::from_raw_ptr(buf2.as_mut_ptr(), buf2.len()) };
451        let vs = [vs1, vs2];
452
453        let mut cursor = MemSliceCursor::new(&vs);
454        assert_eq!(cursor.index, 0);
455        assert_eq!(cursor.offset, 0);
456
457        cursor.move_cursor(0);
458        assert_eq!(cursor.index, 0);
459        assert_eq!(cursor.offset, 0);
460
461        cursor.move_cursor(1);
462        assert_eq!(cursor.index, 0);
463        assert_eq!(cursor.offset, 1);
464
465        cursor.move_cursor(1);
466        assert_eq!(cursor.index, 1);
467        assert_eq!(cursor.offset, 0);
468
469        cursor.move_cursor(1);
470        assert_eq!(cursor.index, 1);
471        assert_eq!(cursor.offset, 1);
472
473        cursor.move_cursor(2);
474        assert_eq!(cursor.index, 2);
475        assert_eq!(cursor.offset, 0);
476
477        cursor.move_cursor(1);
478        assert_eq!(cursor.index, 2);
479        assert_eq!(cursor.offset, 0);
480    }
481
482    #[test]
483    fn test_mem_slice_cursor_consume() {
484        let mut buf1 = vec![0x0u8; 2];
485        let vs1 = unsafe { FileVolatileSlice::from_raw_ptr(buf1.as_mut_ptr(), buf1.len()) };
486        let mut buf2 = vec![0x0u8; 2];
487        let vs2 = unsafe { FileVolatileSlice::from_raw_ptr(buf2.as_mut_ptr(), buf2.len()) };
488        let vs = [vs1, vs2];
489
490        let mut cursor = MemSliceCursor::new(&vs);
491        assert_eq!(cursor.index, 0);
492        assert_eq!(cursor.offset, 0);
493
494        assert_eq!(cursor.consume(0).len(), 0);
495        assert_eq!(cursor.index, 0);
496        assert_eq!(cursor.offset, 0);
497
498        assert_eq!(cursor.consume(1).len(), 1);
499        assert_eq!(cursor.index, 0);
500        assert_eq!(cursor.offset, 1);
501
502        assert_eq!(cursor.consume(2).len(), 2);
503        assert_eq!(cursor.index, 1);
504        assert_eq!(cursor.offset, 1);
505
506        assert_eq!(cursor.consume(2).len(), 1);
507        assert_eq!(cursor.index, 2);
508        assert_eq!(cursor.offset, 0);
509
510        assert_eq!(cursor.consume(2).len(), 0);
511        assert_eq!(cursor.index, 2);
512        assert_eq!(cursor.offset, 0);
513    }
514
515    #[test]
516    fn test_copy_file_range() {
517        let mut src = TempFile::new().unwrap().into_file();
518        let dst = TempFile::new().unwrap();
519
520        let buf = vec![8u8; 4096];
521        src.write_all(&buf).unwrap();
522        copy_file_range(&src, 0, dst.as_file(), 4096, 4096).unwrap();
523        assert_eq!(dst.as_file().metadata().unwrap().len(), 8192);
524
525        let small_buf = vec![8u8; 2048];
526        let mut small_src = TempFile::new().unwrap().into_file();
527        small_src.write_all(&small_buf).unwrap();
528        assert!(copy_file_range(&small_src, 0, dst.as_file(), 4096, 4096).is_err());
529
530        let empty_src = TempFile::new().unwrap().into_file();
531        assert!(copy_file_range(&empty_src, 0, dst.as_file(), 4096, 4096).is_err());
532    }
533
534    #[test]
535    fn test_get_path_from_file() {
536        let temp_file = TempFile::new().unwrap();
537        let file = temp_file.as_file();
538        let path = get_path_from_file(file).unwrap();
539        assert_eq!(path, temp_file.as_path().display().to_string());
540
541        let invalid_fd: RawFd = -1;
542        assert!(get_path_from_file(&invalid_fd).is_none());
543    }
544}