edit/sys/
unix.rs

1// Copyright (c) Microsoft Corporation.
2// Licensed under the MIT License.
3
4//! Unix-specific platform code.
5//!
6//! Read the `windows` module for reference.
7//! TODO: This reminds me that the sys API should probably be a trait.
8
9use std::ffi::{CStr, c_int, c_void};
10use std::fs::{self, File};
11use std::mem::{self, ManuallyDrop, MaybeUninit};
12use std::os::fd::{AsRawFd as _, FromRawFd as _};
13use std::path::Path;
14use std::ptr::{self, NonNull, null_mut};
15use std::{thread, time};
16
17use crate::arena::{Arena, ArenaString, scratch_arena};
18use crate::helpers::*;
19use crate::{apperr, arena_format};
20
21#[cfg(target_os = "netbsd")]
22const fn desired_mprotect(flags: c_int) -> c_int {
23    // NetBSD allows an mmap(2) caller to specify what protection flags they
24    // will use later via mprotect. It does not allow a caller to move from
25    // PROT_NONE to PROT_READ | PROT_WRITE.
26    //
27    // see PROT_MPROTECT in man 2 mmap
28    flags << 3
29}
30
31#[cfg(not(target_os = "netbsd"))]
32const fn desired_mprotect(_: c_int) -> c_int {
33    libc::PROT_NONE
34}
35
36struct State {
37    stdin: libc::c_int,
38    stdin_flags: libc::c_int,
39    stdout: libc::c_int,
40    stdout_initial_termios: Option<libc::termios>,
41    inject_resize: bool,
42    // Buffer for incomplete UTF-8 sequences (max 4 bytes needed)
43    utf8_buf: [u8; 4],
44    utf8_len: usize,
45}
46
47static mut STATE: State = State {
48    stdin: libc::STDIN_FILENO,
49    stdin_flags: 0,
50    stdout: libc::STDOUT_FILENO,
51    stdout_initial_termios: None,
52    inject_resize: false,
53    utf8_buf: [0; 4],
54    utf8_len: 0,
55};
56
57extern "C" fn sigwinch_handler(_: libc::c_int) {
58    unsafe {
59        STATE.inject_resize = true;
60    }
61}
62
63pub fn init() -> apperr::Result<Deinit> {
64    unsafe {
65        // Reopen stdin if it's redirected (= piped input).
66        if libc::isatty(STATE.stdin) == 0 {
67            STATE.stdin = check_int_return(libc::open(c"/dev/tty".as_ptr(), libc::O_RDONLY))?;
68        }
69
70        // Store the stdin flags so we can more easily toggle `O_NONBLOCK` later on.
71        STATE.stdin_flags = check_int_return(libc::fcntl(STATE.stdin, libc::F_GETFL))?;
72
73        Ok(Deinit)
74    }
75}
76
77pub struct Deinit;
78
79impl Drop for Deinit {
80    fn drop(&mut self) {
81        unsafe {
82            #[allow(static_mut_refs)]
83            if let Some(termios) = STATE.stdout_initial_termios.take() {
84                // Restore the original terminal modes.
85                libc::tcsetattr(STATE.stdout, libc::TCSANOW, &termios);
86            }
87        }
88    }
89}
90
91pub fn switch_modes() -> apperr::Result<()> {
92    unsafe {
93        // Set STATE.inject_resize to true whenever we get a SIGWINCH.
94        let mut sigwinch_action: libc::sigaction = mem::zeroed();
95        sigwinch_action.sa_sigaction = sigwinch_handler as libc::sighandler_t;
96        check_int_return(libc::sigaction(libc::SIGWINCH, &sigwinch_action, null_mut()))?;
97
98        // Get the original terminal modes so we can disable raw mode on exit.
99        let mut termios = MaybeUninit::<libc::termios>::uninit();
100        check_int_return(libc::tcgetattr(STATE.stdin, termios.as_mut_ptr()))?;
101        let mut termios = termios.assume_init();
102        STATE.stdout_initial_termios = Some(termios);
103
104        termios.c_iflag &= !(
105            // When neither IGNBRK...
106            libc::IGNBRK
107            // ...nor BRKINT are set, a BREAK reads as a null byte ('\0'), ...
108            | libc::BRKINT
109            // ...except when PARMRK is set, in which case it reads as the sequence \377 \0 \0.
110            | libc::PARMRK
111            // Disable input parity checking.
112            | libc::INPCK
113            // Disable stripping of eighth bit.
114            | libc::ISTRIP
115            // Disable mapping of NL to CR on input.
116            | libc::INLCR
117            // Disable ignoring CR on input.
118            | libc::IGNCR
119            // Disable mapping of CR to NL on input.
120            | libc::ICRNL
121            // Disable software flow control.
122            | libc::IXON
123        );
124        // Disable output processing.
125        termios.c_oflag &= !libc::OPOST;
126        termios.c_cflag &= !(
127            // Reset character size mask.
128            libc::CSIZE
129            // Disable parity generation.
130            | libc::PARENB
131        );
132        // Set character size back to 8 bits.
133        termios.c_cflag |= libc::CS8;
134        termios.c_lflag &= !(
135            // Disable signal generation (SIGINT, SIGTSTP, SIGQUIT).
136            libc::ISIG
137            // Disable canonical mode (line buffering).
138            | libc::ICANON
139            // Disable echoing of input characters.
140            | libc::ECHO
141            // Disable echoing of NL.
142            | libc::ECHONL
143            // Disable extended input processing (e.g. Ctrl-V).
144            | libc::IEXTEN
145        );
146
147        // Set the terminal to raw mode.
148        termios.c_lflag &= !(libc::ICANON | libc::ECHO);
149        check_int_return(libc::tcsetattr(STATE.stdin, libc::TCSANOW, &termios))?;
150
151        Ok(())
152    }
153}
154
155pub fn inject_window_size_into_stdin() {
156    unsafe {
157        STATE.inject_resize = true;
158    }
159}
160
161fn get_window_size() -> (u16, u16) {
162    let mut winsz: libc::winsize = unsafe { mem::zeroed() };
163
164    for attempt in 1.. {
165        let ret = unsafe { libc::ioctl(STATE.stdout, libc::TIOCGWINSZ, &raw mut winsz) };
166        if ret == -1 || (winsz.ws_col != 0 && winsz.ws_row != 0) {
167            break;
168        }
169
170        if attempt == 10 {
171            winsz.ws_col = 80;
172            winsz.ws_row = 24;
173            break;
174        }
175
176        // Some terminals are bad emulators and don't report TIOCGWINSZ immediately.
177        thread::sleep(time::Duration::from_millis(10 * attempt));
178    }
179
180    (winsz.ws_col, winsz.ws_row)
181}
182
183/// Reads from stdin.
184///
185/// Returns `None` if there was an error reading from stdin.
186/// Returns `Some("")` if the given timeout was reached.
187/// Otherwise, it returns the read, non-empty string.
188pub fn read_stdin(arena: &Arena, mut timeout: time::Duration) -> Option<ArenaString<'_>> {
189    unsafe {
190        if STATE.inject_resize {
191            timeout = time::Duration::ZERO;
192        }
193
194        let read_poll = timeout != time::Duration::MAX;
195        let mut buf = Vec::new_in(arena);
196
197        // We don't know if the input is valid UTF8, so we first use a Vec and then
198        // later turn it into UTF8 using `from_utf8_lossy_owned`.
199        // It is important that we allocate the buffer with an explicit capacity,
200        // because we later use `spare_capacity_mut` to access it.
201        buf.reserve(4 * KIBI);
202
203        // We got some leftover broken UTF8 from a previous read? Prepend it.
204        if STATE.utf8_len != 0 {
205            STATE.utf8_len = 0;
206            buf.extend_from_slice(&STATE.utf8_buf[..STATE.utf8_len]);
207        }
208
209        loop {
210            if timeout != time::Duration::MAX {
211                let beg = time::Instant::now();
212
213                let mut pollfd = libc::pollfd { fd: STATE.stdin, events: libc::POLLIN, revents: 0 };
214                let ret;
215                #[cfg(target_os = "linux")]
216                {
217                    let ts = libc::timespec {
218                        tv_sec: timeout.as_secs() as libc::time_t,
219                        tv_nsec: timeout.subsec_nanos() as libc::c_long,
220                    };
221                    ret = libc::ppoll(&mut pollfd, 1, &ts, ptr::null());
222                }
223                #[cfg(not(target_os = "linux"))]
224                {
225                    ret = libc::poll(&mut pollfd, 1, timeout.as_millis() as libc::c_int);
226                }
227                if ret < 0 {
228                    return None; // Error? Let's assume it's an EOF.
229                }
230                if ret == 0 {
231                    break; // Timeout? We can stop reading.
232                }
233
234                timeout = timeout.saturating_sub(beg.elapsed());
235            };
236
237            // If we're asked for a non-blocking read we need
238            // to manipulate `O_NONBLOCK` and vice versa.
239            set_tty_nonblocking(read_poll);
240
241            // Read from stdin.
242            let spare = buf.spare_capacity_mut();
243            let ret = libc::read(STATE.stdin, spare.as_mut_ptr() as *mut _, spare.len());
244            if ret > 0 {
245                buf.set_len(buf.len() + ret as usize);
246                break;
247            }
248            if ret == 0 {
249                return None; // EOF
250            }
251            if ret < 0 {
252                match errno() {
253                    libc::EINTR if STATE.inject_resize => break,
254                    libc::EAGAIN if timeout == time::Duration::ZERO => break,
255                    libc::EINTR | libc::EAGAIN => {}
256                    _ => return None,
257                }
258            }
259        }
260
261        if !buf.is_empty() {
262            // We only need to check the last 3 bytes for UTF-8 continuation bytes,
263            // because we should be able to assume that any 4 byte sequence is complete.
264            let lim = buf.len().saturating_sub(3);
265            let mut off = buf.len() - 1;
266
267            // Find the start of the last potentially incomplete UTF-8 sequence.
268            while off > lim && buf[off] & 0b1100_0000 == 0b1000_0000 {
269                off -= 1;
270            }
271
272            let seq_len = match buf[off] {
273                b if b & 0b1000_0000 == 0 => 1,
274                b if b & 0b1110_0000 == 0b1100_0000 => 2,
275                b if b & 0b1111_0000 == 0b1110_0000 => 3,
276                b if b & 0b1111_1000 == 0b1111_0000 => 4,
277                // If the lead byte we found isn't actually one, we don't cache it.
278                // `from_utf8_lossy_owned` will replace it with U+FFFD.
279                _ => 0,
280            };
281
282            // Cache incomplete sequence if any.
283            if off + seq_len > buf.len() {
284                STATE.utf8_len = buf.len() - off;
285                STATE.utf8_buf[..STATE.utf8_len].copy_from_slice(&buf[off..]);
286                buf.truncate(off);
287            }
288        }
289
290        let mut result = ArenaString::from_utf8_lossy_owned(buf);
291
292        // We received a SIGWINCH? Add a fake window size sequence for our input parser.
293        // I prepend it so that on startup, the TUI system gets first initialized with a size.
294        if STATE.inject_resize {
295            STATE.inject_resize = false;
296            let (w, h) = get_window_size();
297            if w > 0 && h > 0 {
298                let scratch = scratch_arena(Some(arena));
299                let seq = arena_format!(&scratch, "\x1b[8;{h};{w}t");
300                result.replace_range(0..0, &seq);
301            }
302        }
303
304        result.shrink_to_fit();
305        Some(result)
306    }
307}
308
309pub fn write_stdout(text: &str) {
310    if text.is_empty() {
311        return;
312    }
313
314    // If we don't set the TTY to blocking mode,
315    // the write will potentially fail with EAGAIN.
316    set_tty_nonblocking(false);
317
318    let buf = text.as_bytes();
319    let mut written = 0;
320
321    while written < buf.len() {
322        let w = &buf[written..];
323        let w = &buf[..w.len().min(GIBI)];
324        let n = unsafe { libc::write(STATE.stdout, w.as_ptr() as *const _, w.len()) };
325
326        if n >= 0 {
327            written += n as usize;
328            continue;
329        }
330
331        let err = errno();
332        if err != libc::EINTR {
333            return;
334        }
335    }
336}
337
338/// Sets/Resets `O_NONBLOCK` on the TTY handle.
339///
340/// Note that setting this flag applies to both stdin and stdout, because the
341/// TTY is a bidirectional device and both handles refer to the same thing.
342fn set_tty_nonblocking(nonblock: bool) {
343    unsafe {
344        let is_nonblock = (STATE.stdin_flags & libc::O_NONBLOCK) != 0;
345        if is_nonblock != nonblock {
346            STATE.stdin_flags ^= libc::O_NONBLOCK;
347            let _ = libc::fcntl(STATE.stdin, libc::F_SETFL, STATE.stdin_flags);
348        }
349    }
350}
351
352pub fn open_stdin_if_redirected() -> Option<File> {
353    unsafe {
354        // Did we reopen stdin during `init()`?
355        if STATE.stdin != libc::STDIN_FILENO {
356            Some(File::from_raw_fd(libc::STDIN_FILENO))
357        } else {
358            None
359        }
360    }
361}
362
363#[derive(Clone, PartialEq, Eq)]
364pub struct FileId {
365    st_dev: libc::dev_t,
366    st_ino: libc::ino_t,
367}
368
369/// Returns a unique identifier for the given file by handle or path.
370pub fn file_id(file: Option<&File>, path: &Path) -> apperr::Result<FileId> {
371    let file = match file {
372        Some(f) => f,
373        None => &File::open(path)?,
374    };
375
376    unsafe {
377        let mut stat = MaybeUninit::<libc::stat>::uninit();
378        check_int_return(libc::fstat(file.as_raw_fd(), stat.as_mut_ptr()))?;
379        let stat = stat.assume_init();
380        Ok(FileId { st_dev: stat.st_dev, st_ino: stat.st_ino })
381    }
382}
383
384/// Reserves a virtual memory region of the given size.
385/// To commit the memory, use `virtual_commit`.
386/// To release the memory, use `virtual_release`.
387///
388/// # Safety
389///
390/// This function is unsafe because it uses raw pointers.
391/// Don't forget to release the memory when you're done with it or you'll leak it.
392pub unsafe fn virtual_reserve(size: usize) -> apperr::Result<NonNull<u8>> {
393    unsafe {
394        let ptr = libc::mmap(
395            null_mut(),
396            size,
397            desired_mprotect(libc::PROT_READ | libc::PROT_WRITE),
398            libc::MAP_PRIVATE | libc::MAP_ANONYMOUS,
399            -1,
400            0,
401        );
402        if ptr.is_null() || ptr::eq(ptr, libc::MAP_FAILED) {
403            Err(errno_to_apperr(libc::ENOMEM))
404        } else {
405            Ok(NonNull::new_unchecked(ptr as *mut u8))
406        }
407    }
408}
409
410/// Releases a virtual memory region of the given size.
411///
412/// # Safety
413///
414/// This function is unsafe because it uses raw pointers.
415/// Make sure to only pass pointers acquired from `virtual_reserve`.
416pub unsafe fn virtual_release(base: NonNull<u8>, size: usize) {
417    unsafe {
418        libc::munmap(base.cast().as_ptr(), size);
419    }
420}
421
422/// Commits a virtual memory region of the given size.
423///
424/// # Safety
425///
426/// This function is unsafe because it uses raw pointers.
427/// Make sure to only pass pointers acquired from `virtual_reserve`
428/// and to pass a size less than or equal to the size passed to `virtual_reserve`.
429pub unsafe fn virtual_commit(base: NonNull<u8>, size: usize) -> apperr::Result<()> {
430    unsafe {
431        let status = libc::mprotect(base.cast().as_ptr(), size, libc::PROT_READ | libc::PROT_WRITE);
432        if status != 0 { Err(errno_to_apperr(libc::ENOMEM)) } else { Ok(()) }
433    }
434}
435
436unsafe fn load_library(name: &CStr) -> apperr::Result<NonNull<c_void>> {
437    unsafe {
438        NonNull::new(libc::dlopen(name.as_ptr(), libc::RTLD_LAZY))
439            .ok_or_else(|| errno_to_apperr(libc::ENOENT))
440    }
441}
442
443/// Loads a function from a dynamic library.
444///
445/// # Safety
446///
447/// This function is highly unsafe as it requires you to know the exact type
448/// of the function you're loading. No type checks whatsoever are performed.
449//
450// It'd be nice to constrain T to std::marker::FnPtr, but that's unstable.
451pub unsafe fn get_proc_address<T>(handle: NonNull<c_void>, name: &CStr) -> apperr::Result<T> {
452    unsafe {
453        let sym = libc::dlsym(handle.as_ptr(), name.as_ptr());
454        if sym.is_null() {
455            Err(errno_to_apperr(libc::ENOENT))
456        } else {
457            Ok(mem::transmute_copy(&sym))
458        }
459    }
460}
461
462pub fn load_libicuuc() -> apperr::Result<NonNull<c_void>> {
463    unsafe { load_library(c"libicuuc.so") }
464}
465
466pub fn load_libicui18n() -> apperr::Result<NonNull<c_void>> {
467    unsafe { load_library(c"libicui18n.so") }
468}
469
470/// ICU, by default, adds the major version as a suffix to each exported symbol.
471/// They also recommend to disable this for system-level installations (`runConfigureICU Linux --disable-renaming`),
472/// but I found that many (most?) Linux distributions don't do this for some reason.
473/// This function returns the suffix, if any.
474#[allow(clippy::not_unsafe_ptr_arg_deref)]
475pub fn icu_proc_suffix(arena: &Arena, handle: NonNull<c_void>) -> ArenaString<'_> {
476    unsafe {
477        type T = *const c_void;
478
479        let mut res = ArenaString::new_in(arena);
480
481        // Check if the ICU library is using unversioned symbols.
482        // Return an empty suffix in that case.
483        if get_proc_address::<T>(handle, c"u_errorName").is_ok() {
484            return res;
485        }
486
487        // In the versions (63-76) and distributions (Arch/Debian) I tested,
488        // this symbol seems to be always present. This allows us to call `dladdr`.
489        // It's the `UCaseMap::~UCaseMap()` destructor which for some reason isn't
490        // in a namespace. Thank you ICU maintainers for this oversight.
491        let proc = match get_proc_address::<T>(handle, c"_ZN8UCaseMapD1Ev") {
492            Ok(proc) => proc,
493            Err(_) => return res,
494        };
495
496        // `dladdr` is specific to GNU's libc unfortunately.
497        let mut info: libc::Dl_info = mem::zeroed();
498        let ret = libc::dladdr(proc, &mut info);
499        if ret == 0 {
500            return res;
501        }
502
503        // The library path is in `info.dli_fname`.
504        let path = match CStr::from_ptr(info.dli_fname).to_str() {
505            Ok(name) => name,
506            Err(_) => return res,
507        };
508
509        let path = match fs::read_link(path) {
510            Ok(path) => path,
511            Err(_) => path.into(),
512        };
513
514        // I'm going to assume it's something like "libicuuc.so.76.1".
515        let path = path.into_os_string();
516        let path = path.to_string_lossy();
517        let suffix_start = match path.rfind(".so.") {
518            Some(pos) => pos + 4,
519            None => return res,
520        };
521        let version = &path[suffix_start..];
522        let version_end = version.find('.').unwrap_or(version.len());
523        let version = &version[..version_end];
524
525        res.push('_');
526        res.push_str(version);
527        res
528    }
529}
530
531pub fn add_icu_proc_suffix<'a, 'b, 'r>(arena: &'a Arena, name: &'b CStr, suffix: &str) -> &'r CStr
532where
533    'a: 'r,
534    'b: 'r,
535{
536    if suffix.is_empty() {
537        name
538    } else {
539        // SAFETY: In this particular case we know that the string
540        // is valid UTF-8, because it comes from icu.rs.
541        let name = unsafe { name.to_str().unwrap_unchecked() };
542
543        let mut res = ArenaString::new_in(arena);
544        res.reserve(name.len() + suffix.len() + 1);
545        res.push_str(name);
546        res.push_str(suffix);
547        res.push('\0');
548
549        let bytes: &'a [u8] = unsafe { mem::transmute(res.as_bytes()) };
550        unsafe { CStr::from_bytes_with_nul_unchecked(bytes) }
551    }
552}
553
554pub fn preferred_languages(arena: &Arena) -> Vec<ArenaString<'_>, &Arena> {
555    let mut locales = Vec::new_in(arena);
556
557    for key in ["LANGUAGE", "LC_ALL", "LANG"] {
558        if let Ok(val) = std::env::var(key)
559            && !val.is_empty()
560        {
561            locales.extend(val.split(':').filter(|s| !s.is_empty()).map(|s| {
562                // Replace all underscores with dashes,
563                // because the localization code expects pt-br, not pt_BR.
564                let mut res = Vec::new_in(arena);
565                res.extend(s.as_bytes().iter().map(|&b| if b == b'_' { b'-' } else { b }));
566                unsafe { ArenaString::from_utf8_unchecked(res) }
567            }));
568            break;
569        }
570    }
571
572    locales
573}
574
575#[inline]
576fn errno() -> i32 {
577    // Under `-O -Copt-level=s` the 1.87 compiler fails to fully inline and
578    // remove the raw_os_error() call. This leaves us with the drop() call.
579    // ManuallyDrop fixes that and results in a direct `std::sys::os::errno` call.
580    ManuallyDrop::new(std::io::Error::last_os_error()).raw_os_error().unwrap_or(0)
581}
582
583#[inline]
584pub(crate) fn io_error_to_apperr(err: std::io::Error) -> apperr::Error {
585    errno_to_apperr(err.raw_os_error().unwrap_or(0))
586}
587
588pub fn apperr_format(f: &mut std::fmt::Formatter<'_>, code: u32) -> std::fmt::Result {
589    write!(f, "Error {code}")?;
590
591    unsafe {
592        let ptr = libc::strerror(code as i32);
593        if !ptr.is_null() {
594            let msg = CStr::from_ptr(ptr).to_string_lossy();
595            write!(f, ": {msg}")?;
596        }
597    }
598
599    Ok(())
600}
601
602pub fn apperr_is_not_found(err: apperr::Error) -> bool {
603    err == errno_to_apperr(libc::ENOENT)
604}
605
606const fn errno_to_apperr(no: c_int) -> apperr::Error {
607    apperr::Error::new_sys(if no < 0 { 0 } else { no as u32 })
608}
609
610fn check_int_return(ret: libc::c_int) -> apperr::Result<libc::c_int> {
611    if ret < 0 { Err(errno_to_apperr(errno())) } else { Ok(ret) }
612}