Skip to main content

rustpython_common/
fileutils.rs

1// Python/fileutils.c in CPython
2#![allow(non_snake_case)]
3
4#[cfg(not(windows))]
5pub use libc::stat as StatStruct;
6
7#[cfg(windows)]
8pub use windows::{StatStruct, fstat};
9
10#[cfg(not(windows))]
11pub fn fstat(fd: crate::crt_fd::Borrowed<'_>) -> std::io::Result<StatStruct> {
12    let mut stat = core::mem::MaybeUninit::uninit();
13    unsafe {
14        let ret = libc::fstat(fd.as_raw(), stat.as_mut_ptr());
15        if ret == -1 {
16            Err(crate::os::errno_io_error())
17        } else {
18            Ok(stat.assume_init())
19        }
20    }
21}
22
23#[cfg(windows)]
24pub mod windows {
25    use crate::crt_fd;
26    use crate::windows::ToWideString;
27    use alloc::ffi::CString;
28    use libc::{S_IFCHR, S_IFDIR, S_IFMT};
29    use std::ffi::{OsStr, OsString};
30    use std::os::windows::io::AsRawHandle;
31    use std::sync::OnceLock;
32    use windows_sys::Win32::Foundation::{
33        ERROR_INVALID_HANDLE, ERROR_NOT_SUPPORTED, FILETIME, FreeLibrary, SetLastError,
34    };
35    use windows_sys::Win32::Storage::FileSystem::{
36        BY_HANDLE_FILE_INFORMATION, FILE_ATTRIBUTE_DIRECTORY, FILE_ATTRIBUTE_READONLY,
37        FILE_ATTRIBUTE_REPARSE_POINT, FILE_BASIC_INFO, FILE_ID_INFO, FILE_TYPE_CHAR,
38        FILE_TYPE_DISK, FILE_TYPE_PIPE, FILE_TYPE_UNKNOWN, FileBasicInfo, FileIdInfo,
39        GetFileInformationByHandle, GetFileInformationByHandleEx, GetFileType,
40    };
41    use windows_sys::Win32::System::LibraryLoader::{GetProcAddress, LoadLibraryW};
42    use windows_sys::Win32::System::SystemServices::IO_REPARSE_TAG_SYMLINK;
43    use windows_sys::core::PCWSTR;
44
45    pub const S_IFIFO: libc::c_int = 0o010000;
46    pub const S_IFLNK: libc::c_int = 0o120000;
47
48    pub const SECS_BETWEEN_EPOCHS: i64 = 11644473600; // Seconds between 1.1.1601 and 1.1.1970
49
50    #[derive(Clone, Copy, Default)]
51    pub struct StatStruct {
52        pub st_dev: libc::c_ulong,
53        pub st_ino: u64,
54        pub st_mode: libc::c_ushort,
55        pub st_nlink: i32,
56        pub st_uid: i32,
57        pub st_gid: i32,
58        pub st_rdev: libc::c_ulong,
59        pub st_size: u64,
60        pub st_atime: libc::time_t,
61        pub st_atime_nsec: i32,
62        pub st_mtime: libc::time_t,
63        pub st_mtime_nsec: i32,
64        pub st_ctime: libc::time_t,
65        pub st_ctime_nsec: i32,
66        pub st_birthtime: libc::time_t,
67        pub st_birthtime_nsec: i32,
68        pub st_file_attributes: libc::c_ulong,
69        pub st_reparse_tag: u32,
70        pub st_ino_high: u64,
71    }
72
73    impl StatStruct {
74        // update_st_mode_from_path in cpython
75        pub fn update_st_mode_from_path(&mut self, path: &OsStr, attr: u32) {
76            if attr & FILE_ATTRIBUTE_DIRECTORY == 0 {
77                let file_extension = path
78                    .to_wide()
79                    .split(|&c| c == '.' as u16)
80                    .next_back()
81                    .and_then(|s| String::from_utf16(s).ok());
82
83                if let Some(file_extension) = file_extension
84                    && (file_extension.eq_ignore_ascii_case("exe")
85                        || file_extension.eq_ignore_ascii_case("bat")
86                        || file_extension.eq_ignore_ascii_case("cmd")
87                        || file_extension.eq_ignore_ascii_case("com"))
88                {
89                    self.st_mode |= 0o111;
90                }
91            }
92        }
93    }
94
95    // _Py_fstat_noraise in cpython
96    pub fn fstat(fd: crt_fd::Borrowed<'_>) -> std::io::Result<StatStruct> {
97        let h = crt_fd::as_handle(fd);
98        if h.is_err() {
99            unsafe { SetLastError(ERROR_INVALID_HANDLE) };
100        }
101        let h = h?;
102        let h = h.as_raw_handle();
103        // reset stat?
104
105        let file_type = unsafe { GetFileType(h as _) };
106        if file_type == FILE_TYPE_UNKNOWN {
107            return Err(std::io::Error::last_os_error());
108        }
109        if file_type != FILE_TYPE_DISK {
110            let st_mode = if file_type == FILE_TYPE_CHAR {
111                S_IFCHR
112            } else if file_type == FILE_TYPE_PIPE {
113                S_IFIFO
114            } else {
115                0
116            } as u16;
117            return Ok(StatStruct {
118                st_mode,
119                ..Default::default()
120            });
121        }
122
123        let mut info = unsafe { core::mem::zeroed() };
124        let mut basic_info: FILE_BASIC_INFO = unsafe { core::mem::zeroed() };
125        let mut id_info: FILE_ID_INFO = unsafe { core::mem::zeroed() };
126
127        if unsafe { GetFileInformationByHandle(h as _, &mut info) } == 0
128            || unsafe {
129                GetFileInformationByHandleEx(
130                    h as _,
131                    FileBasicInfo,
132                    &mut basic_info as *mut _ as *mut _,
133                    core::mem::size_of_val(&basic_info) as u32,
134                )
135            } == 0
136        {
137            return Err(std::io::Error::last_os_error());
138        }
139
140        let p_id_info = if unsafe {
141            GetFileInformationByHandleEx(
142                h as _,
143                FileIdInfo,
144                &mut id_info as *mut _ as *mut _,
145                core::mem::size_of_val(&id_info) as u32,
146            )
147        } == 0
148        {
149            None
150        } else {
151            Some(&id_info)
152        };
153
154        Ok(attribute_data_to_stat(
155            &info,
156            0,
157            Some(&basic_info),
158            p_id_info,
159        ))
160    }
161
162    fn large_integer_to_time_t_nsec(input: i64) -> (libc::time_t, libc::c_int) {
163        let nsec_out = (input % 10_000_000) * 100; // FILETIME is in units of 100 nsec.
164        let time_out = ((input / 10_000_000) - SECS_BETWEEN_EPOCHS) as libc::time_t;
165        (time_out, nsec_out as _)
166    }
167
168    fn file_time_to_time_t_nsec(in_ptr: &FILETIME) -> (libc::time_t, libc::c_int) {
169        let in_val: i64 = unsafe { core::mem::transmute_copy(in_ptr) };
170        let nsec_out = (in_val % 10_000_000) * 100; // FILETIME is in units of 100 nsec.
171        let time_out = (in_val / 10_000_000) - SECS_BETWEEN_EPOCHS;
172        (time_out, nsec_out as _)
173    }
174
175    fn attribute_data_to_stat(
176        info: &BY_HANDLE_FILE_INFORMATION,
177        reparse_tag: u32,
178        basic_info: Option<&FILE_BASIC_INFO>,
179        id_info: Option<&FILE_ID_INFO>,
180    ) -> StatStruct {
181        let mut st_mode = attributes_to_mode(info.dwFileAttributes);
182        let st_size = ((info.nFileSizeHigh as u64) << 32) + info.nFileSizeLow as u64;
183        let st_dev: libc::c_ulong = if let Some(id_info) = id_info {
184            id_info.VolumeSerialNumber as _
185        } else {
186            info.dwVolumeSerialNumber
187        };
188        let st_rdev = 0;
189
190        let (st_birthtime, st_ctime, st_mtime, st_atime) = if let Some(basic_info) = basic_info {
191            (
192                large_integer_to_time_t_nsec(basic_info.CreationTime),
193                large_integer_to_time_t_nsec(basic_info.ChangeTime),
194                large_integer_to_time_t_nsec(basic_info.LastWriteTime),
195                large_integer_to_time_t_nsec(basic_info.LastAccessTime),
196            )
197        } else {
198            (
199                file_time_to_time_t_nsec(&info.ftCreationTime),
200                (0, 0),
201                file_time_to_time_t_nsec(&info.ftLastWriteTime),
202                file_time_to_time_t_nsec(&info.ftLastAccessTime),
203            )
204        };
205        let st_nlink = info.nNumberOfLinks as i32;
206
207        let st_ino = if let Some(id_info) = id_info {
208            let file_id: [u64; 2] = unsafe { core::mem::transmute_copy(&id_info.FileId) };
209            file_id
210        } else {
211            let ino = ((info.nFileIndexHigh as u64) << 32) + info.nFileIndexLow as u64;
212            [ino, 0]
213        };
214
215        if info.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT != 0
216            && reparse_tag == IO_REPARSE_TAG_SYMLINK
217        {
218            st_mode = (st_mode & !(S_IFMT as u16)) | (S_IFLNK as u16);
219        }
220        let st_file_attributes = info.dwFileAttributes;
221
222        StatStruct {
223            st_dev,
224            st_ino: st_ino[0],
225            st_mode,
226            st_nlink,
227            st_uid: 0,
228            st_gid: 0,
229            st_rdev,
230            st_size,
231            st_atime: st_atime.0,
232            st_atime_nsec: st_atime.1,
233            st_mtime: st_mtime.0,
234            st_mtime_nsec: st_mtime.1,
235            st_ctime: st_ctime.0,
236            st_ctime_nsec: st_ctime.1,
237            st_birthtime: st_birthtime.0,
238            st_birthtime_nsec: st_birthtime.1,
239            st_file_attributes,
240            st_reparse_tag: reparse_tag,
241            st_ino_high: st_ino[1],
242        }
243    }
244
245    const fn attributes_to_mode(attr: u32) -> u16 {
246        let mut m = 0;
247        if attr & FILE_ATTRIBUTE_DIRECTORY != 0 {
248            m |= libc::S_IFDIR | 0o111; // IFEXEC for user,group,other
249        } else {
250            m |= libc::S_IFREG;
251        }
252        if attr & FILE_ATTRIBUTE_READONLY != 0 {
253            m |= 0o444;
254        } else {
255            m |= 0o666;
256        }
257        m as _
258    }
259
260    #[derive(Clone, Copy)]
261    #[repr(C)]
262    pub struct FILE_STAT_BASIC_INFORMATION {
263        pub FileId: i64,
264        pub CreationTime: i64,
265        pub LastAccessTime: i64,
266        pub LastWriteTime: i64,
267        pub ChangeTime: i64,
268        pub AllocationSize: i64,
269        pub EndOfFile: i64,
270        pub FileAttributes: u32,
271        pub ReparseTag: u32,
272        pub NumberOfLinks: u32,
273        pub DeviceType: u32,
274        pub DeviceCharacteristics: u32,
275        pub Reserved: u32,
276        pub VolumeSerialNumber: i64,
277        pub FileId128: [u64; 2],
278    }
279
280    #[allow(dead_code)]
281    #[derive(Clone, Copy)]
282    #[repr(C)]
283    pub enum FILE_INFO_BY_NAME_CLASS {
284        FileStatByNameInfo,
285        FileStatLxByNameInfo,
286        FileCaseSensitiveByNameInfo,
287        FileStatBasicByNameInfo,
288        MaximumFileInfoByNameClass,
289    }
290
291    // _Py_GetFileInformationByName in cpython
292    pub fn get_file_information_by_name(
293        file_name: &OsStr,
294        file_information_class: FILE_INFO_BY_NAME_CLASS,
295    ) -> std::io::Result<FILE_STAT_BASIC_INFORMATION> {
296        static GET_FILE_INFORMATION_BY_NAME: OnceLock<
297            Option<
298                unsafe extern "system" fn(
299                    PCWSTR,
300                    FILE_INFO_BY_NAME_CLASS,
301                    *mut libc::c_void,
302                    u32,
303                ) -> i32,
304            >,
305        > = OnceLock::new();
306
307        let GetFileInformationByName = GET_FILE_INFORMATION_BY_NAME
308            .get_or_init(|| {
309                let library_name =
310                    OsString::from("api-ms-win-core-file-l2-1-4.dll").to_wide_with_nul();
311                let module = unsafe { LoadLibraryW(library_name.as_ptr()) };
312                if module.is_null() {
313                    return None;
314                }
315                let name = CString::new("GetFileInformationByName").unwrap();
316                if let Some(proc) =
317                    unsafe { GetProcAddress(module, name.as_bytes_with_nul().as_ptr()) }
318                {
319                    Some(unsafe {
320                        core::mem::transmute::<
321                            unsafe extern "system" fn() -> isize,
322                            unsafe extern "system" fn(
323                                *const u16,
324                                FILE_INFO_BY_NAME_CLASS,
325                                *mut libc::c_void,
326                                u32,
327                            ) -> i32,
328                        >(proc)
329                    })
330                } else {
331                    unsafe { FreeLibrary(module) };
332                    None
333                }
334            })
335            .ok_or_else(|| std::io::Error::from_raw_os_error(ERROR_NOT_SUPPORTED as _))?;
336
337        let file_name = file_name.to_wide_with_nul();
338        let file_info_buffer_size = core::mem::size_of::<FILE_STAT_BASIC_INFORMATION>() as u32;
339        let mut file_info_buffer = core::mem::MaybeUninit::<FILE_STAT_BASIC_INFORMATION>::uninit();
340        unsafe {
341            if GetFileInformationByName(
342                file_name.as_ptr(),
343                file_information_class as _,
344                file_info_buffer.as_mut_ptr() as _,
345                file_info_buffer_size,
346            ) == 0
347            {
348                Err(std::io::Error::last_os_error())
349            } else {
350                Ok(file_info_buffer.assume_init())
351            }
352        }
353    }
354
355    pub fn stat_basic_info_to_stat(info: &FILE_STAT_BASIC_INFORMATION) -> StatStruct {
356        use windows_sys::Win32::Storage::FileSystem;
357        use windows_sys::Win32::System::Ioctl;
358
359        const S_IFMT: u16 = self::S_IFMT as _;
360        const S_IFDIR: u16 = self::S_IFDIR as _;
361        const S_IFCHR: u16 = self::S_IFCHR as _;
362        const S_IFIFO: u16 = self::S_IFIFO as _;
363        const S_IFLNK: u16 = self::S_IFLNK as _;
364
365        let mut st_mode = attributes_to_mode(info.FileAttributes);
366        let st_size = info.EndOfFile as u64;
367        let st_birthtime = large_integer_to_time_t_nsec(info.CreationTime);
368        let st_ctime = large_integer_to_time_t_nsec(info.ChangeTime);
369        let st_mtime = large_integer_to_time_t_nsec(info.LastWriteTime);
370        let st_atime = large_integer_to_time_t_nsec(info.LastAccessTime);
371        let st_nlink = info.NumberOfLinks as _;
372        let st_dev = info.VolumeSerialNumber as u32;
373        // File systems with less than 128-bits zero pad into this field
374        let st_ino = info.FileId128;
375        // bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
376        // open other name surrogate reparse points without traversing them. To
377        // detect/handle these, check st_file_attributes and st_reparse_tag.
378        let st_reparse_tag = info.ReparseTag;
379        if info.FileAttributes & FILE_ATTRIBUTE_REPARSE_POINT != 0
380            && info.ReparseTag == IO_REPARSE_TAG_SYMLINK
381        {
382            // set the bits that make this a symlink
383            st_mode = (st_mode & !S_IFMT) | S_IFLNK;
384        }
385        let st_file_attributes = info.FileAttributes;
386        match info.DeviceType {
387            FileSystem::FILE_DEVICE_DISK
388            | Ioctl::FILE_DEVICE_VIRTUAL_DISK
389            | Ioctl::FILE_DEVICE_DFS
390            | FileSystem::FILE_DEVICE_CD_ROM
391            | Ioctl::FILE_DEVICE_CONTROLLER
392            | Ioctl::FILE_DEVICE_DATALINK => {}
393            Ioctl::FILE_DEVICE_DISK_FILE_SYSTEM
394            | Ioctl::FILE_DEVICE_CD_ROM_FILE_SYSTEM
395            | Ioctl::FILE_DEVICE_NETWORK_FILE_SYSTEM => {
396                st_mode = (st_mode & !S_IFMT) | 0x6000; // _S_IFBLK
397            }
398            Ioctl::FILE_DEVICE_CONSOLE
399            | Ioctl::FILE_DEVICE_NULL
400            | Ioctl::FILE_DEVICE_KEYBOARD
401            | Ioctl::FILE_DEVICE_MODEM
402            | Ioctl::FILE_DEVICE_MOUSE
403            | Ioctl::FILE_DEVICE_PARALLEL_PORT
404            | Ioctl::FILE_DEVICE_PRINTER
405            | Ioctl::FILE_DEVICE_SCREEN
406            | Ioctl::FILE_DEVICE_SERIAL_PORT
407            | Ioctl::FILE_DEVICE_SOUND => {
408                st_mode = (st_mode & !S_IFMT) | S_IFCHR;
409            }
410            Ioctl::FILE_DEVICE_NAMED_PIPE => {
411                st_mode = (st_mode & !S_IFMT) | S_IFIFO;
412            }
413            _ => {
414                if info.FileAttributes & FILE_ATTRIBUTE_DIRECTORY != 0 {
415                    st_mode = (st_mode & !S_IFMT) | S_IFDIR;
416                }
417            }
418        }
419
420        StatStruct {
421            st_dev,
422            st_ino: st_ino[0],
423            st_mode,
424            st_nlink,
425            st_uid: 0,
426            st_gid: 0,
427            st_rdev: 0,
428            st_size,
429            st_atime: st_atime.0,
430            st_atime_nsec: st_atime.1,
431            st_mtime: st_mtime.0,
432            st_mtime_nsec: st_mtime.1,
433            st_ctime: st_ctime.0,
434            st_ctime_nsec: st_ctime.1,
435            st_birthtime: st_birthtime.0,
436            st_birthtime_nsec: st_birthtime.1,
437            st_file_attributes,
438            st_reparse_tag,
439            st_ino_high: st_ino[1],
440        }
441    }
442}
443
444// _Py_fopen_obj in cpython (Python/fileutils.c:1757-1835)
445// Open a file using std::fs::File and convert to FILE*
446// Automatically handles path encoding and EINTR retries
447pub fn fopen(path: &std::path::Path, mode: &str) -> std::io::Result<*mut libc::FILE> {
448    use alloc::ffi::CString;
449    use std::fs::File;
450
451    // Currently only supports read mode
452    // Can be extended to support "wb", "w+b", etc. if needed
453    if mode != "rb" {
454        return Err(std::io::Error::new(
455            std::io::ErrorKind::InvalidInput,
456            format!("unsupported mode: {}", mode),
457        ));
458    }
459
460    // Open file using std::fs::File (handles path encoding and EINTR automatically)
461    let file = File::open(path)?;
462
463    #[cfg(windows)]
464    {
465        use std::os::windows::io::IntoRawHandle;
466
467        // Convert File handle to CRT file descriptor
468        let handle = file.into_raw_handle();
469        let fd = unsafe { libc::open_osfhandle(handle as isize, libc::O_RDONLY) };
470        if fd == -1 {
471            return Err(std::io::Error::last_os_error());
472        }
473
474        // Convert fd to FILE*
475        let mode_cstr = CString::new(mode).unwrap();
476        let fp = unsafe { libc::fdopen(fd, mode_cstr.as_ptr()) };
477        if fp.is_null() {
478            unsafe { libc::close(fd) };
479            return Err(std::io::Error::last_os_error());
480        }
481
482        // Set non-inheritable (Windows needs this explicitly)
483        if let Err(e) = set_inheritable(fd, false) {
484            unsafe { libc::fclose(fp) };
485            return Err(e);
486        }
487
488        Ok(fp)
489    }
490
491    #[cfg(not(windows))]
492    {
493        use std::os::fd::IntoRawFd;
494
495        // Convert File to raw fd
496        let fd = file.into_raw_fd();
497
498        // Convert fd to FILE*
499        let mode_cstr = CString::new(mode).unwrap();
500        let fp = unsafe { libc::fdopen(fd, mode_cstr.as_ptr()) };
501        if fp.is_null() {
502            unsafe { libc::close(fd) };
503            return Err(std::io::Error::last_os_error());
504        }
505
506        // Unix: O_CLOEXEC is already set by File::open, so non-inheritable is automatic
507        Ok(fp)
508    }
509}
510
511// set_inheritable in cpython (Python/fileutils.c:1443-1570)
512// Set the inheritable flag of the specified file descriptor
513// Only used on Windows; Unix automatically sets O_CLOEXEC
514#[cfg(windows)]
515fn set_inheritable(fd: libc::c_int, inheritable: bool) -> std::io::Result<()> {
516    use windows_sys::Win32::Foundation::{
517        HANDLE, HANDLE_FLAG_INHERIT, INVALID_HANDLE_VALUE, SetHandleInformation,
518    };
519
520    let handle = unsafe { libc::get_osfhandle(fd) };
521    if handle == INVALID_HANDLE_VALUE as isize {
522        return Err(std::io::Error::last_os_error());
523    }
524
525    let flags = if inheritable { HANDLE_FLAG_INHERIT } else { 0 };
526    let result = unsafe { SetHandleInformation(handle as HANDLE, HANDLE_FLAG_INHERIT, flags) };
527    if result == 0 {
528        return Err(std::io::Error::last_os_error());
529    }
530
531    Ok(())
532}