starry-kernel 0.5.11

A Linux-compatible OS kernel built on ArceOS unikernel
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
use alloc::{collections::VecDeque, format, sync::Arc};
use core::{
    any::Any,
    sync::atomic::{AtomicU32, Ordering},
    task::Context,
    time::Duration,
};

/// Number of registered `/dev/input/event*` nodes. Populated by
/// [`input_devices`] at boot and read by sysfs so
/// `/sys/class/input/event<N>` matches reality.
static EVENT_DEVICE_COUNT: AtomicU32 = AtomicU32::new(0);

/// Returns the number of `/dev/input/event*` devices currently exposed.
pub fn input_device_count() -> u32 {
    EVENT_DEVICE_COUNT.load(Ordering::Acquire)
}

#[allow(unused_imports)]
use ax_driver::prelude::{
    AbsInfo, AxInputDevice, BaseDriverOps, DevError, Event, EventType, InputDeviceId,
    InputDriverOps,
};
use ax_errno::{AxError, AxResult};
use ax_hal::time::wall_time;
use ax_sync::Mutex;
use axfs_ng_vfs::{DeviceId, NodeFlags, NodeType, VfsResult};
use axpoll::{IoEvents, Pollable};
use bitmaps::Bitmap;
use linux_raw_sys::{
    general::{__kernel_old_time_t, __kernel_suseconds_t},
    ioctl::{EVIOCGID, EVIOCGRAB, EVIOCGVERSION},
};
use zerocopy::{FromBytes, Immutable, IntoBytes};

use crate::{
    mm::UserPtr,
    pseudofs::{Device, DeviceOps, DirMapping, SimpleFs},
};
const KEY_CNT: usize = EventType::Key.bits_count();

/// Bound on the in-kernel evdev buffer. Linux uses a per-client ring of
/// 64 entries by default; we hold a bit more headroom so a 20-key burst
/// (key down + key up + EV_SYN per key = 60 entries) never drops events
/// before userspace drains it. When the queue is full we follow Linux's
/// behavior and drop the oldest entry rather than blocking the driver.
const READ_AHEAD_CAP: usize = 256;

struct Inner {
    device: AxInputDevice,
    read_ahead: VecDeque<(Duration, Event)>,
    key_state: Bitmap<KEY_CNT>,
}
impl Inner {
    /// Drain everything the driver currently has buffered into `read_ahead`,
    /// updating cached key state along the way. Stops at the first
    /// `DevError::Again` (driver queue empty) or after a hard ceiling of
    /// `READ_AHEAD_CAP` pulls per call to bound a single pass.
    ///
    /// Returns `true` if at least one event is now queued for userspace.
    fn drain_into_queue(&mut self) -> bool {
        for _ in 0..READ_AHEAD_CAP {
            match self.device.read_event() {
                Ok(event) => {
                    if event.event_type == EventType::Key as u16 {
                        if event.value == 0 {
                            self.key_state.set(event.code as usize, false);
                        } else if event.value == 1 {
                            self.key_state.set(event.code as usize, true);
                        }
                    }
                    if self.read_ahead.len() >= READ_AHEAD_CAP {
                        // Mirror Linux evdev: drop oldest on overflow so
                        // the most recent input wins. Keeps the driver
                        // ring from stalling under a burst we cannot
                        // forward to a slow reader.
                        self.read_ahead.pop_front();
                    }
                    self.read_ahead.push_back((wall_time(), event));
                }
                Err(DevError::Again) => break,
                Err(err) => {
                    warn!("Failed to read event: {err:?}");
                    break;
                }
            }
        }
        !self.read_ahead.is_empty()
    }

    fn has_event(&mut self) -> bool {
        self.drain_into_queue()
    }
}

/// Linux `INPUT_PROP_CNT` — the property bitmap is 4 bytes (32 properties).
const INPUT_PROP_CNT: usize = 0x20;
/// Linux `INPUT_PROP_POINTER` — emulates a relative pointer or maps absolute
/// coordinates to screen space. libinput hides the cursor on absolute-axis
/// devices that do not advertise this until proximity is reported.
const INPUT_PROP_POINTER: usize = 0x00;
/// Linux `INPUT_PROP_DIRECT` — direct-mapped axes (touchscreens).
const INPUT_PROP_DIRECT: usize = 0x01;

/// Linux uapi `struct input_absinfo` — six `i32`s returned by
/// `EVIOCGABS(axis)`.
#[repr(C)]
#[derive(Default, Clone, Copy, FromBytes, IntoBytes, Immutable)]
struct InputAbsInfo {
    value: i32,
    minimum: i32,
    maximum: i32,
    fuzz: i32,
    flat: i32,
    resolution: i32,
}

/// Maximum number of absolute axes Linux's EVIOCGABS encodes (0..0x3F).
const ABS_MAX: usize = 0x40;

pub struct EventDev {
    inner: Mutex<Inner>,
    /// IRQ line the underlying driver delivers buffered events on, when
    /// the driver advertises one. `Pollable::register` wires the caller's
    /// waker to this IRQ so virtio-input wakes its consumer in
    /// microseconds rather than waiting for the next safety-net tick.
    irq: Option<usize>,
    ev_bits: Bitmap<{ EventType::COUNT as usize }>,
    /// Cached `EVIOCGPROP` bitmap. Computed once at probe from the driver's
    /// raw bits with a synthesized `INPUT_PROP_POINTER` for absolute or
    /// relative pointing devices that aren't touchscreens. QEMU's
    /// virtio-mouse / virtio-tablet do not set the bit themselves, so
    /// libinput would otherwise classify the tablet as a graphics tablet
    /// and suppress the cursor pending a never-firing `BTN_TOOL_PEN`.
    prop_bits: [u8; INPUT_PROP_CNT.div_ceil(8)],
    /// Cached `EV_ABS` bitmap. Used by `EVIOCGABS` to refuse axes the
    /// device doesn't advertise with `EINVAL`, matching Linux's
    /// `evdev_handle_get_val` behavior. virtio-drivers reports the
    /// underlying `Error::IoError` when the AbsInfo selector has size 0,
    /// which would otherwise surface as EIO and confuse libinput.
    abs_bits: [u8; ABS_MAX.div_ceil(8)],
}

impl EventDev {
    pub fn new(mut device: AxInputDevice) -> Self {
        let mut ev_bits = Bitmap::new();
        for i in 0..EventType::COUNT {
            let Some(ty) = EventType::from_repr(i) else {
                continue;
            };
            if device
                .get_event_bits(ty, &mut [])
                .is_ok_and(|success| success)
            {
                ev_bits.set(i as usize, true);
            }
        }

        let mut prop_bits = [0u8; INPUT_PROP_CNT.div_ceil(8)];
        let prop_bits_reliable = match device.get_prop_bits(&mut prop_bits) {
            Ok(_) => true,
            Err(err) => {
                warn!("Failed to get input property bits: {err:?}");
                false
            }
        };
        let is_touchscreen = prop_bits[INPUT_PROP_DIRECT / 8] & (1 << (INPUT_PROP_DIRECT % 8)) != 0;
        let has_axes =
            ev_bits.get(EventType::Relative as usize) || ev_bits.get(EventType::Absolute as usize);
        if prop_bits_reliable && has_axes && !is_touchscreen {
            prop_bits[INPUT_PROP_POINTER / 8] |= 1 << (INPUT_PROP_POINTER % 8);
        }

        let mut abs_bits = [0u8; ABS_MAX.div_ceil(8)];
        if ev_bits.get(EventType::Absolute as usize) {
            let _ = device.get_event_bits(EventType::Absolute, &mut abs_bits);
        }

        let irq = device.irq_num();
        Self {
            inner: Mutex::new(Inner {
                device,
                read_ahead: VecDeque::with_capacity(READ_AHEAD_CAP),
                key_state: Bitmap::new(),
            }),
            irq,
            ev_bits,
            prop_bits,
            abs_bits,
        }
    }

    fn axis_supported(&self, axis: u8) -> bool {
        let bit = axis as usize;
        if bit >= ABS_MAX {
            return false;
        }
        self.abs_bits[bit / 8] & (1 << (bit % 8)) != 0
    }

    fn get_event_bits(&self, arg: usize, size: usize, ty: u8) -> AxResult<usize> {
        let bits = UserPtr::<u8>::from(arg).get_as_mut_slice(size)?;
        if ty == 0 {
            Ok(copy_bytes(self.ev_bits.as_bytes(), bits))
        } else {
            let ty = EventType::from_repr(ty).ok_or(AxError::InvalidInput)?;
            match self.inner.lock().device.get_event_bits(ty, bits) {
                Ok(true) => {}
                Ok(false) => {
                    debug!("No events for {ty:?}");
                }
                Err(err) => {
                    warn!("Failed to get event bits: {err:?}");
                }
            }
            Ok(bits.len().min(ty.bits_count().div_ceil(8)))
        }
    }
}

fn copy_bytes(src: &[u8], dst: &mut [u8]) -> usize {
    let len = src.len().min(dst.len());
    dst[..len].copy_from_slice(&src[..len]);
    len
}

fn return_str(arg: usize, size: usize, s: &str) -> AxResult<usize> {
    let slice = UserPtr::<u8>::from(arg).get_as_mut_slice(size)?;
    Ok(copy_bytes(s.as_bytes(), slice))
}

fn dev_error_to_ax_error(err: DevError) -> AxError {
    match err {
        DevError::AlreadyExists => AxError::AlreadyExists,
        DevError::Again => AxError::WouldBlock,
        DevError::BadState => AxError::BadState,
        DevError::InvalidParam | DevError::Unsupported => AxError::InvalidInput,
        DevError::Io => AxError::Io,
        DevError::NoMemory => AxError::NoMemory,
        DevError::ResourceBusy => AxError::ResourceBusy,
    }
}

fn return_zero_bits(arg: usize, size: usize, bits: usize) -> AxResult<usize> {
    let slice = UserPtr::<u8>::from(arg).get_as_mut_slice(size)?;
    let len = bits.div_ceil(8).min(slice.len());
    slice[..len].fill(0);
    Ok(len)
}

#[repr(C)]
#[derive(FromBytes, IntoBytes, Immutable)]
pub struct KernelTimeval {
    pub tv_sec: __kernel_old_time_t,
    pub tv_usec: __kernel_suseconds_t,
}

#[repr(C)]
#[derive(FromBytes, IntoBytes, Immutable)]
struct InputEvent {
    time: KernelTimeval,
    event_type: u16,
    code: u16,
    value: i32,
}

#[unsafe(no_mangle)]
#[inline(never)]
pub extern "C" fn ongkey() {
    core::hint::black_box(());
}

impl DeviceOps for EventDev {
    fn read_at(&self, buf: &mut [u8], _offset: u64) -> VfsResult<usize> {
        if buf.is_empty() {
            return Ok(0);
        }
        if buf.len() < size_of::<InputEvent>() {
            return Err(AxError::InvalidInput);
        }
        let mut read = 0;
        let mut inner = self.inner.lock();
        // Drain the driver queue once up front so a single read() syscall
        // can return as many buffered events as the user buffer holds.
        inner.drain_into_queue();
        for out in buf.chunks_exact_mut(size_of::<InputEvent>()) {
            let Some((time, event)) = inner.read_ahead.pop_front() else {
                break;
            };
            let input_event = InputEvent {
                time: KernelTimeval {
                    tv_sec: time.as_secs() as _,
                    tv_usec: time.subsec_micros() as _,
                },
                event_type: event.event_type,
                code: event.code,
                value: event.value as _,
            };
            out.copy_from_slice(input_event.as_bytes());
            read += out.len();
        }
        if read == 0 {
            Err(AxError::WouldBlock)
        } else {
            Ok(read)
        }
    }

    fn write_at(&self, _buf: &[u8], _offset: u64) -> VfsResult<usize> {
        Err(AxError::InvalidInput)
    }

    fn flags(&self) -> NodeFlags {
        NodeFlags::NON_CACHEABLE | NodeFlags::STREAM
    }

    fn as_any(&self) -> &dyn Any {
        self
    }

    fn as_pollable(&self) -> Option<&dyn Pollable> {
        Some(self)
    }

    fn ioctl(&self, cmd: u32, arg: usize) -> VfsResult<usize> {
        match cmd {
            EVIOCGVERSION => {
                *UserPtr::<u32>::from(arg).get_as_mut()? = 0x10001;
                Ok(0)
            }
            EVIOCGID => {
                *UserPtr::<InputDeviceId>::from(arg).get_as_mut()? =
                    self.inner.lock().device.device_id();
                Ok(0)
            }
            EVIOCGRAB => Ok(0),
            other => {
                // variable-length command
                let mut tmp = other;
                let nr = (tmp & 0xff) as u8;
                tmp >>= 8;
                let ty = (tmp & 0xff) as u8;
                tmp >>= 8;
                let size = (tmp & 0x3fff) as usize;
                tmp >>= 14;
                let dir = tmp & 0x3;

                if ty != b'E' {
                    warn!("unknown ioctl for evdev: {cmd} {arg}");
                    return Err(AxError::InvalidInput);
                }

                match dir {
                    // IOC_WRITE
                    1 => return Err(AxError::InvalidInput),
                    // IOC_READ
                    2 => {
                        #[allow(clippy::single_match)]
                        match nr {
                            // EVIOCGNAME
                            0x06 => {
                                return return_str(
                                    arg,
                                    size,
                                    self.inner.lock().device.device_name(),
                                );
                            }
                            // EVIOCGPHYS
                            0x07 => {
                                return return_str(
                                    arg,
                                    size,
                                    self.inner.lock().device.physical_location(),
                                );
                            }
                            // EVIOCGUNIQ
                            0x08 => {
                                return return_str(arg, size, self.inner.lock().device.unique_id());
                            }
                            // EVIOCGPROP — device property bitmap. libinput
                            // uses INPUT_PROP_POINTER to keep the cursor
                            // visible on absolute-axis pointing devices like
                            // virtio-tablet; we synthesize the bit at probe
                            // for any non-touchscreen with REL/ABS axes.
                            0x09 => {
                                let slice = UserPtr::<u8>::from(arg).get_as_mut_slice(size)?;
                                return Ok(copy_bytes(&self.prop_bits, slice));
                            }
                            // EVIOCGKEY
                            0x18 => {
                                let bits = UserPtr::<u8>::from(arg).get_as_mut_slice(size)?;
                                return Ok(copy_bytes(
                                    self.inner.lock().key_state.as_bytes(),
                                    bits,
                                ));
                            }
                            // EVIOCGLED
                            0x19 => {
                                return return_zero_bits(arg, size, EventType::Led.bits_count());
                            }
                            // EVIOCGSND
                            0x1a => {
                                return return_zero_bits(arg, size, EventType::Sound.bits_count());
                            }
                            // EVIOCGSW
                            0x1b => {
                                return return_zero_bits(arg, size, EventType::Switch.bits_count());
                            }
                            _ => {}
                        }
                        if nr & !EventType::MAX == EventType::COUNT {
                            return self.get_event_bits(arg, size, nr & EventType::MAX);
                        }
                        const ABS_CNT: u8 = 0x40;
                        if nr & !(ABS_CNT - 1) == ABS_CNT {
                            // EVIOCGABS(axis) — absolute axis info.
                            // libinput needs min/max/res to map the
                            // virtio-tablet's 0..0x7FFF absolute range to
                            // screen pixels; without it motion is treated
                            // as noise.
                            if size < size_of::<InputAbsInfo>() {
                                return Err(AxError::InvalidInput);
                            }
                            let axis = nr & (ABS_CNT - 1);
                            // Linux's evdev returns EINVAL for any axis the
                            // device does not advertise in its EV_ABS bitmap.
                            // virtio-drivers surfaces the same as Error::IoError
                            // (size==0 selector), so without this pre-check
                            // userspace would see EIO and reject the device.
                            if !self.axis_supported(axis) {
                                return Err(AxError::InvalidInput);
                            }
                            let info = match self.inner.lock().device.get_abs_info(axis) {
                                Ok(info) => info,
                                Err(err) => return Err(dev_error_to_ax_error(err)),
                            };
                            let abs = InputAbsInfo {
                                value: 0,
                                minimum: info.min as i32,
                                maximum: info.max as i32,
                                fuzz: info.fuzz as i32,
                                flat: info.flat as i32,
                                resolution: info.res as i32,
                            };
                            let bytes = abs.as_bytes();
                            let slice = UserPtr::<u8>::from(arg).get_as_mut_slice(size)?;
                            slice[..bytes.len()].copy_from_slice(bytes);
                            return Ok(bytes.len());
                        }
                        return Err(AxError::InvalidInput);
                    }
                    _ => {}
                }

                Err(AxError::InvalidInput)
            }
        }
    }
}

impl Pollable for EventDev {
    fn poll(&self) -> IoEvents {
        let mut events = IoEvents::empty();
        events.set(IoEvents::IN, self.inner.lock().has_event());
        events
    }

    fn register(&self, context: &mut Context<'_>, events: IoEvents) {
        if !events.contains(IoEvents::IN) {
            return;
        }
        // If the driver advertises an IRQ, route the caller's waker
        // through the per-IRQ waker list so the next virtio-input
        // notification wakes the consumer directly. The unconditional
        // wake the previous implementation issued here turned epoll
        // (level-triggered) into a register → wake → consume-empty →
        // re-register loop spinning at ~500 Hz; that hot loop is what
        // libinput saw as continuous activity.
        if let Some(irq) = self.irq {
            ax_task::future::register_irq_waker(irq, context.waker());
        }
        // No IRQ advertised: fall back to an immediate wake so the
        // caller doesn't sleep forever on devices that never deliver
        // an IRQ at all (observed for QEMU virtio-keyboard-pci on
        // aarch64 HVF). For these the consumer effectively polls.
        else if self.inner.lock().has_event() {
            context.waker().wake_by_ref();
        }
    }
}

pub fn input_devices(fs: Arc<SimpleFs>) -> DirMapping {
    let mut inputs = DirMapping::new();
    let mut mice_alias: Option<Arc<EventDev>> = None;
    let mut input_id: u32 = 0;
    let input_devices = ax_input::take_inputs();
    for mut device in input_devices.into_iter() {
        let mut keys = [0; 0x300usize.div_ceil(8)];
        assert!(device.get_event_bits(EventType::Key, &mut keys).unwrap());

        const BTN_MOUSE: usize = 0x110;
        let is_mouse = keys[BTN_MOUSE / 8] & (1 << (BTN_MOUSE % 8)) != 0;

        let event_dev = Arc::new(EventDev::new(device));
        let dev = Device::new(
            fs.clone(),
            NodeType::CharacterDevice,
            DeviceId::new(13, 64 + input_id),
            event_dev.clone(),
        );
        inputs.add(format!("event{input_id}"), dev);
        input_id += 1;

        if is_mouse && mice_alias.is_none() {
            mice_alias = Some(event_dev);
        }
    }

    if let Some(event_dev) = mice_alias {
        inputs.add(
            "mice",
            Device::new(
                fs,
                NodeType::CharacterDevice,
                DeviceId::new(13, 63),
                event_dev,
            ),
        );
    }

    EVENT_DEVICE_COUNT.store(input_id, Ordering::Release);
    inputs
}