libhermit-rs 0.6.3

A Rust-based library operating system
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
use alloc::vec::Vec;
#[cfg(feature = "smp")]
use core::arch::x86_64::_mm_mfence;
use core::hint::spin_loop;
#[cfg(feature = "smp")]
use core::ptr;
use core::sync::atomic::Ordering;
use core::{cmp, fmt, mem, u32};

use align_address::Align;
#[cfg(feature = "smp")]
use arch::x86_64::kernel::core_local::*;
use arch::x86_64::kernel::{interrupts, processor};
use hermit_sync::{without_interrupts, OnceCell, SpinMutex};
#[cfg(feature = "smp")]
use x86::controlregs::*;
use x86::msr::*;

use super::interrupts::IDT;
#[cfg(feature = "acpi")]
use crate::arch::x86_64::kernel::acpi;
use crate::arch::x86_64::kernel::CURRENT_STACK_ADDRESS;
use crate::arch::x86_64::mm::paging::{
	BasePageSize, PageSize, PageTableEntryFlags, PageTableEntryFlagsExt,
};
use crate::arch::x86_64::mm::{paging, virtualmem, PhysAddr, VirtAddr};
use crate::config::*;
use crate::scheduler::CoreId;
use crate::{arch, env, mm, scheduler};

const MP_FLT_SIGNATURE: u32 = 0x5f504d5f;
const MP_CONFIG_SIGNATURE: u32 = 0x504d4350;

const APIC_ICR2: usize = 0x0310;

const APIC_DIV_CONF_DIVIDE_BY_8: u64 = 0b0010;
const APIC_EOI_ACK: u64 = 0;
#[cfg(feature = "smp")]
const APIC_ICR_DELIVERY_MODE_FIXED: u64 = 0x000;
#[cfg(feature = "smp")]
const APIC_ICR_DELIVERY_MODE_INIT: u64 = 0x500;
#[cfg(feature = "smp")]
const APIC_ICR_DELIVERY_MODE_STARTUP: u64 = 0x600;
const APIC_ICR_DELIVERY_STATUS_PENDING: u32 = 1 << 12;
#[cfg(feature = "smp")]
const APIC_ICR_LEVEL_TRIGGERED: u64 = 1 << 15;
#[cfg(feature = "smp")]
const APIC_ICR_LEVEL_ASSERT: u64 = 1 << 14;
const APIC_LVT_MASK: u64 = 1 << 16;
const APIC_LVT_TIMER_TSC_DEADLINE: u64 = 1 << 18;
const APIC_SIVR_ENABLED: u64 = 1 << 8;

/// Register index: ID
#[allow(dead_code)]
const IOAPIC_REG_ID: u32 = 0x0000;
/// Register index: version
const IOAPIC_REG_VER: u32 = 0x0001;
/// Redirection table base
const IOAPIC_REG_TABLE: u32 = 0x0010;

#[cfg(feature = "smp")]
const TLB_FLUSH_INTERRUPT_NUMBER: u8 = 112;
#[cfg(feature = "smp")]
const WAKEUP_INTERRUPT_NUMBER: u8 = 121;
pub const TIMER_INTERRUPT_NUMBER: u8 = 123;
const ERROR_INTERRUPT_NUMBER: u8 = 126;
const SPURIOUS_INTERRUPT_NUMBER: u8 = 127;

/// Physical and virtual memory address for our SMP boot code.
///
/// While our boot processor is already in x86-64 mode, application processors boot up in 16-bit real mode
/// and need an address in the CS:IP addressing scheme to jump to.
/// The CS:IP addressing scheme is limited to 2^20 bytes (= 1 MiB).
#[cfg(feature = "smp")]
const SMP_BOOT_CODE_ADDRESS: VirtAddr = VirtAddr(0x8000);

#[cfg(feature = "smp")]
const SMP_BOOT_CODE_OFFSET_ENTRY: usize = 0x08;
#[cfg(feature = "smp")]
const SMP_BOOT_CODE_OFFSET_CPU_ID: usize = SMP_BOOT_CODE_OFFSET_ENTRY + 0x08;
#[cfg(feature = "smp")]
const SMP_BOOT_CODE_OFFSET_BOOTINFO: usize = SMP_BOOT_CODE_OFFSET_CPU_ID + 0x04;
#[cfg(feature = "smp")]
const SMP_BOOT_CODE_OFFSET_PML4: usize = SMP_BOOT_CODE_OFFSET_BOOTINFO + 0x08;

const X2APIC_ENABLE: u64 = 1 << 10;

static LOCAL_APIC_ADDRESS: OnceCell<VirtAddr> = OnceCell::new();
static IOAPIC_ADDRESS: OnceCell<VirtAddr> = OnceCell::new();

/// Stores the Local APIC IDs of all CPUs. The index equals the Core ID.
/// Both numbers often match, but don't need to (e.g. when a core has been disabled).
static CPU_LOCAL_APIC_IDS: SpinMutex<Vec<u8>> = SpinMutex::new(Vec::new());

/// After calibration, initialize the APIC Timer with this counter value to let it fire an interrupt
/// after 1 microsecond.
static CALIBRATED_COUNTER_VALUE: OnceCell<u64> = OnceCell::new();

/// MP Floating Pointer Structure
#[repr(C, packed)]
struct ApicMP {
	signature: u32,
	mp_config: u32,
	length: u8,
	version: u8,
	checksum: u8,
	features: [u8; 5],
}

/// MP Configuration Table
#[repr(C, packed)]
struct ApicConfigTable {
	signature: u32,
	length: u16,
	revision: u8,
	checksum: u8,
	oem_id: [u8; 8],
	product_id: [u8; 12],
	oem_table: u32,
	oem_table_size: u16,
	entry_count: u16,
	lapic: u32,
	extended_table_length: u16,
	extended_table_checksum: u8,
	reserved: u8,
}

/// APIC Processor Entry
#[repr(C, packed)]
struct ApicProcessorEntry {
	typ: u8,
	id: u8,
	version: u8,
	cpu_flags: u8,
	cpu_signature: u32,
	cpu_feature: u32,
	reserved: [u32; 2],
}

/// IO APIC Entry
#[repr(C, packed)]
struct ApicIoEntry {
	typ: u8,
	id: u8,
	version: u8,
	enabled: u8,
	addr: u32,
}

#[cfg(feature = "acpi")]
#[repr(C, packed)]
struct AcpiMadtHeader {
	local_apic_address: u32,
	flags: u32,
}

#[cfg(feature = "acpi")]
#[repr(C, packed)]
struct AcpiMadtRecordHeader {
	entry_type: u8,
	length: u8,
}

#[repr(C, packed)]
struct ProcessorLocalApicRecord {
	acpi_processor_id: u8,
	apic_id: u8,
	flags: u32,
}

impl fmt::Display for ProcessorLocalApicRecord {
	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
		write!(f, "{{ acpi_processor_id: {}, ", { self.acpi_processor_id })?;
		write!(f, "apic_id: {}, ", { self.apic_id })?;
		write!(f, "flags: {} }}", { self.flags })?;
		Ok(())
	}
}

#[cfg(feature = "acpi")]
const CPU_FLAG_ENABLED: u32 = 1 << 0;

#[repr(C, packed)]
struct IoApicRecord {
	id: u8,
	reserved: u8,
	address: u32,
	global_system_interrupt_base: u32,
}

impl fmt::Display for IoApicRecord {
	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
		write!(f, "{{ id: {}, ", { self.id })?;
		write!(f, "reserved: {}, ", { self.reserved })?;
		write!(f, "address: {:#X}, ", { self.address })?;
		write!(f, "global_system_interrupt_base: {} }}", {
			self.global_system_interrupt_base
		})?;
		Ok(())
	}
}

#[cfg(feature = "smp")]
extern "x86-interrupt" fn tlb_flush_handler(_stack_frame: interrupts::ExceptionStackFrame) {
	debug!("Received TLB Flush Interrupt");
	increment_irq_counter(TLB_FLUSH_INTERRUPT_NUMBER);
	unsafe {
		cr3_write(cr3());
	}
	eoi();
}

extern "x86-interrupt" fn error_interrupt_handler(stack_frame: interrupts::ExceptionStackFrame) {
	error!("APIC LVT Error Interrupt");
	error!("ESR: {:#X}", local_apic_read(IA32_X2APIC_ESR));
	error!("{:#?}", stack_frame);
	eoi();
	scheduler::abort();
}

extern "x86-interrupt" fn spurious_interrupt_handler(stack_frame: interrupts::ExceptionStackFrame) {
	error!("Spurious Interrupt: {:#?}", stack_frame);
	scheduler::abort();
}

#[cfg(feature = "smp")]
extern "x86-interrupt" fn wakeup_handler(_stack_frame: interrupts::ExceptionStackFrame) {
	debug!("Received Wakeup Interrupt");
	increment_irq_counter(WAKEUP_INTERRUPT_NUMBER);
	let core_scheduler = core_scheduler();
	core_scheduler.check_input();
	eoi();
	if core_scheduler.is_scheduling() {
		core_scheduler.reschedule();
	}
}

#[inline]
pub fn add_local_apic_id(id: u8) {
	CPU_LOCAL_APIC_IDS.lock().push(id);
}

#[cfg(feature = "smp")]
pub fn local_apic_id_count() -> u32 {
	CPU_LOCAL_APIC_IDS.lock().len() as u32
}

fn init_ioapic_address(phys_addr: PhysAddr) {
	let ioapic_address = virtualmem::allocate(BasePageSize::SIZE as usize).unwrap();
	IOAPIC_ADDRESS.set(ioapic_address).unwrap();
	debug!("Mapping IOAPIC at {phys_addr:p} to virtual address {ioapic_address:p}",);

	let mut flags = PageTableEntryFlags::empty();
	flags.device().writable().execute_disable();
	paging::map::<BasePageSize>(ioapic_address, phys_addr, 1, flags);
}

#[cfg(not(feature = "acpi"))]
fn detect_from_acpi() -> Result<PhysAddr, ()> {
	// dummy implementation if acpi support is disabled
	Err(())
}

#[cfg(feature = "acpi")]
fn detect_from_acpi() -> Result<PhysAddr, ()> {
	// Get the Multiple APIC Description Table (MADT) from the ACPI information and its specific table header.
	let madt = acpi::get_madt().ok_or(())?;
	let madt_header = unsafe { &*(madt.table_start_address() as *const AcpiMadtHeader) };

	// Jump to the actual table entries (after the table header).
	let mut current_address = madt.table_start_address() + mem::size_of::<AcpiMadtHeader>();

	// Loop through all table entries.
	while current_address < madt.table_end_address() {
		let record = unsafe { &*(current_address as *const AcpiMadtRecordHeader) };
		current_address += mem::size_of::<AcpiMadtRecordHeader>();

		match record.entry_type {
			0 => {
				// Processor Local APIC
				let processor_local_apic_record =
					unsafe { &*(current_address as *const ProcessorLocalApicRecord) };
				debug!(
					"Found Processor Local APIC record: {}",
					processor_local_apic_record
				);

				if processor_local_apic_record.flags & CPU_FLAG_ENABLED > 0 {
					add_local_apic_id(processor_local_apic_record.apic_id);
				}
			}
			1 => {
				// I/O APIC
				let ioapic_record = unsafe { &*(current_address as *const IoApicRecord) };
				debug!("Found I/O APIC record: {}", ioapic_record);

				init_ioapic_address(PhysAddr(ioapic_record.address.into()));
			}
			_ => {
				// Just ignore other entries for now.
			}
		}

		current_address += record.length as usize - mem::size_of::<AcpiMadtRecordHeader>();
	}

	// Successfully derived all information from the MADT.
	// Return the physical address of the Local APIC.
	Ok(PhysAddr(madt_header.local_apic_address.into()))
}

/// Helper function to search Floating Pointer Structure of the Multiprocessing Specification
fn search_mp_floating(start: PhysAddr, end: PhysAddr) -> Result<&'static ApicMP, ()> {
	let virtual_address = virtualmem::allocate(BasePageSize::SIZE as usize).map_err(|_| ())?;

	for current_address in (start.as_usize()..end.as_usize()).step_by(BasePageSize::SIZE as usize) {
		let mut flags = PageTableEntryFlags::empty();
		flags.normal().writable();
		paging::map::<BasePageSize>(
			virtual_address,
			PhysAddr::from(current_address.align_down(BasePageSize::SIZE as usize)),
			1,
			flags,
		);

		for i in 0..BasePageSize::SIZE / 4 {
			let mut tmp: *const u32 = virtual_address.as_ptr();
			tmp = unsafe { tmp.offset(i.try_into().unwrap()) };
			let apic_mp: &ApicMP = unsafe { &(*(tmp as *const ApicMP)) };
			if apic_mp.signature == MP_FLT_SIGNATURE
				&& !(apic_mp.version > 4 || apic_mp.features[0] != 0)
			{
				return Ok(apic_mp);
			}
		}
	}

	// frees obsolete virtual memory region for MMIO devices
	virtualmem::deallocate(virtual_address, BasePageSize::SIZE as usize);

	Err(())
}

/// Helper function to detect APIC by the Multiprocessor Specification
fn detect_from_mp() -> Result<PhysAddr, ()> {
	let mp_float = if let Ok(mpf) = search_mp_floating(PhysAddr(0x9F000u64), PhysAddr(0xA0000u64)) {
		Ok(mpf)
	} else if let Ok(mpf) = search_mp_floating(PhysAddr(0xF0000u64), PhysAddr(0x100000u64)) {
		Ok(mpf)
	} else {
		Err(())
	}?;

	info!("Found MP config at {:#x}", { mp_float.mp_config });
	info!(
		"System uses Multiprocessing Specification 1.{}",
		mp_float.version
	);
	info!("MP features 1: {}", mp_float.features[0]);

	if mp_float.features[1] & 0x80 > 0 {
		info!("PIC mode implemented");
	} else {
		info!("Virtual-Wire mode implemented");
	}

	let virtual_address = virtualmem::allocate(BasePageSize::SIZE as usize).map_err(|_| ())?;

	let mut flags = PageTableEntryFlags::empty();
	flags.normal().writable();
	paging::map::<BasePageSize>(
		virtual_address,
		PhysAddr::from((mp_float.mp_config as usize).align_down(BasePageSize::SIZE as usize)),
		1,
		flags,
	);

	let mut addr: usize = virtual_address.as_usize()
		| (mp_float.mp_config as usize & (BasePageSize::SIZE as usize - 1));
	let mp_config: &ApicConfigTable = unsafe { &*(addr as *const ApicConfigTable) };
	if mp_config.signature != MP_CONFIG_SIGNATURE {
		warn!("Invalid MP config table");
		virtualmem::deallocate(virtual_address, BasePageSize::SIZE as usize);
		return Err(());
	}

	if mp_config.entry_count == 0 {
		warn!("No MP table entries! Guess IO-APIC!");
		let default_address = PhysAddr(0xFEC0_0000);

		init_ioapic_address(default_address);
	} else {
		// entries starts directly after the config table
		addr += mem::size_of::<ApicConfigTable>();
		for _i in 0..mp_config.entry_count {
			match unsafe { *(addr as *const u8) } {
				// CPU entry
				0 => {
					let cpu_entry: &ApicProcessorEntry =
						unsafe { &*(addr as *const ApicProcessorEntry) };
					if cpu_entry.cpu_flags & 0x01 == 0x01 {
						add_local_apic_id(cpu_entry.id);
					}
					addr += mem::size_of::<ApicProcessorEntry>();
				}
				// IO-APIC entry
				2 => {
					let io_entry: &ApicIoEntry = unsafe { &*(addr as *const ApicIoEntry) };
					let ioapic = PhysAddr(io_entry.addr.into());
					info!("Found IOAPIC at 0x{:p}", ioapic);

					init_ioapic_address(ioapic);

					addr += mem::size_of::<ApicIoEntry>();
				}
				_ => {
					addr += 8;
				}
			}
		}
	}

	Ok(PhysAddr(mp_config.lapic as u64))
}

fn default_apic() -> PhysAddr {
	warn!("Try to use default APIC address");

	let default_address = PhysAddr(0xFEE0_0000);

	// currently, uhyve doesn't support an IO-APIC
	if !env::is_uhyve() {
		init_ioapic_address(default_address);
	}

	default_address
}

#[no_mangle]
pub extern "C" fn eoi() {
	local_apic_write(IA32_X2APIC_EOI, APIC_EOI_ACK);
}

pub fn init() {
	// Detect CPUs and APICs.
	let local_apic_physical_address = detect_from_acpi()
		.or_else(|_| detect_from_mp())
		.unwrap_or_else(|_| default_apic());

	// Initialize x2APIC or xAPIC, depending on what's available.
	init_x2apic();
	if !processor::supports_x2apic() {
		// We use the traditional xAPIC mode available on all x86-64 CPUs.
		// It uses a mapped page for communication.
		let local_apic_address = virtualmem::allocate(BasePageSize::SIZE as usize).unwrap();
		LOCAL_APIC_ADDRESS.set(local_apic_address).unwrap();
		debug!(
			"Mapping Local APIC at {:p} to virtual address {:p}",
			local_apic_physical_address, local_apic_address
		);

		let mut flags = PageTableEntryFlags::empty();
		flags.device().writable().execute_disable();
		paging::map::<BasePageSize>(local_apic_address, local_apic_physical_address, 1, flags);
	}

	// Set gates to ISRs for the APIC interrupts we are going to enable.
	unsafe {
		let idt = &mut IDT;
		idt[ERROR_INTERRUPT_NUMBER as usize]
			.set_handler_fn(error_interrupt_handler)
			.set_stack_index(0);
		idt[SPURIOUS_INTERRUPT_NUMBER as usize]
			.set_handler_fn(spurious_interrupt_handler)
			.set_stack_index(0);
		#[cfg(feature = "smp")]
		{
			idt[TLB_FLUSH_INTERRUPT_NUMBER as usize]
				.set_handler_fn(tlb_flush_handler)
				.set_stack_index(0);
			interrupts::add_irq_name(TLB_FLUSH_INTERRUPT_NUMBER - 32, "TLB flush");
			idt[WAKEUP_INTERRUPT_NUMBER as usize]
				.set_handler_fn(wakeup_handler)
				.set_stack_index(0);
			interrupts::add_irq_name(WAKEUP_INTERRUPT_NUMBER - 32, "Wakeup");
		}
	}

	// Initialize interrupt handling over APIC.
	// All interrupts of the PIC have already been masked, so it doesn't need to be disabled again.
	init_local_apic();

	if !processor::supports_tsc_deadline() {
		// We have an older APIC Timer without TSC Deadline support, which has a maximum timeout
		// and needs to be calibrated.
		calibrate_timer();
	}

	// currently, IO-APIC isn't supported by uhyve
	if !env::is_uhyve() {
		// initialize IO-APIC
		init_ioapic();
	}
}

fn init_ioapic() {
	let max_entry = ioapic_max_redirection_entry() + 1;
	info!("IOAPIC v{} has {} entries", ioapic_version(), max_entry);

	// now lets turn everything else on
	for i in 0..max_entry {
		if i != 2 {
			ioapic_inton(i, 0 /*apic_processors[boot_processor]->id*/).unwrap();
		} else {
			// now, we don't longer need the IOAPIC timer and turn it off
			info!("Disable IOAPIC timer");
			ioapic_intoff(2, 0 /*apic_processors[boot_processor]->id*/).unwrap();
		}
	}
}

fn ioapic_inton(irq: u8, apicid: u8) -> Result<(), ()> {
	if irq > 24 {
		error!("IOAPIC: trying to turn on irq {} which is too high\n", irq);
		return Err(());
	}

	let off = u32::from(irq * 2);
	let ioredirect_upper: u32 = u32::from(apicid) << 24;
	let ioredirect_lower: u32 = u32::from(0x20 + irq);

	ioapic_write(IOAPIC_REG_TABLE + off, ioredirect_lower);
	ioapic_write(IOAPIC_REG_TABLE + 1 + off, ioredirect_upper);

	Ok(())
}

fn ioapic_intoff(irq: u32, apicid: u32) -> Result<(), ()> {
	if irq > 24 {
		error!("IOAPIC: trying to turn off irq {} which is too high\n", irq);
		return Err(());
	}

	let off = irq * 2;
	let ioredirect_upper: u32 = apicid << 24;
	let ioredirect_lower: u32 = (0x20 + irq) | (1 << 16); // turn it off (start masking)

	ioapic_write(IOAPIC_REG_TABLE + off, ioredirect_lower);
	ioapic_write(IOAPIC_REG_TABLE + 1 + off, ioredirect_upper);

	Ok(())
}

pub fn init_local_apic() {
	// Mask out all interrupts we don't need right now.
	local_apic_write(IA32_X2APIC_LVT_TIMER, APIC_LVT_MASK);
	local_apic_write(IA32_X2APIC_LVT_THERMAL, APIC_LVT_MASK);
	local_apic_write(IA32_X2APIC_LVT_PMI, APIC_LVT_MASK);
	local_apic_write(IA32_X2APIC_LVT_LINT0, APIC_LVT_MASK);
	local_apic_write(IA32_X2APIC_LVT_LINT1, APIC_LVT_MASK);

	// Set the interrupt number of the Error interrupt.
	local_apic_write(IA32_X2APIC_LVT_ERROR, u64::from(ERROR_INTERRUPT_NUMBER));

	// allow all interrupts
	local_apic_write(IA32_X2APIC_TPR, 0x00);

	// Finally, enable the Local APIC by setting the interrupt number for spurious interrupts
	// and providing the enable bit.
	local_apic_write(
		IA32_X2APIC_SIVR,
		APIC_SIVR_ENABLED | (u64::from(SPURIOUS_INTERRUPT_NUMBER)),
	);
}

fn calibrate_timer() {
	// The APIC Timer is used to provide a one-shot interrupt for the tickless timer
	// implemented through processor::get_timer_ticks.
	// Therefore determine a counter value for 1 microsecond, which is the resolution
	// used throughout all of HermitCore. Wait 30ms for accuracy.
	let microseconds = 30_000;

	// Be sure that all interrupts for calibration accuracy and initialize the counter are disabled.
	// Dividing the counter value by 8 still provides enough accuracy for 1 microsecond resolution,
	// but allows for longer timeouts than a smaller divisor.
	// For example, on an Intel Xeon E5-2650 v3 @ 2.30GHz, the counter is usually calibrated to
	// 125, which allows for timeouts of approximately 34 seconds (u32::MAX / 125).

	local_apic_write(IA32_X2APIC_DIV_CONF, APIC_DIV_CONF_DIVIDE_BY_8);
	local_apic_write(IA32_X2APIC_INIT_COUNT, u64::from(u32::MAX));

	// Wait until the calibration time has elapsed.
	processor::udelay(microseconds);

	// Save the difference of the initial value and current value as the result of the calibration
	// and re-enable interrupts.
	let calibrated_counter_value =
		(u64::from(u32::MAX - local_apic_read(IA32_X2APIC_CUR_COUNT))) / microseconds;
	CALIBRATED_COUNTER_VALUE
		.set(calibrated_counter_value)
		.unwrap();
	debug!(
			"Calibrated APIC Timer with a counter value of {calibrated_counter_value} for 1 microsecond",
		);
}

fn __set_oneshot_timer(wakeup_time: Option<u64>) {
	if let Some(wt) = wakeup_time {
		if processor::supports_tsc_deadline() {
			// wt is the absolute wakeup time in microseconds based on processor::get_timer_ticks.
			// We can simply multiply it by the processor frequency to get the absolute Time-Stamp Counter deadline
			// (see processor::get_timer_ticks).
			let tsc_deadline = wt * (u64::from(processor::get_frequency()));

			// Enable the APIC Timer in TSC-Deadline Mode and let it start by writing to the respective MSR.
			local_apic_write(
				IA32_X2APIC_LVT_TIMER,
				APIC_LVT_TIMER_TSC_DEADLINE | u64::from(TIMER_INTERRUPT_NUMBER),
			);
			unsafe {
				wrmsr(IA32_TSC_DEADLINE, tsc_deadline);
			}
		} else {
			// Calculate the relative timeout from the absolute wakeup time.
			// Maintain a minimum value of one tick, otherwise the timer interrupt does not fire at all.
			// The Timer Counter Register is also a 32-bit register, which we must not overflow for longer timeouts.
			let current_time = processor::get_timer_ticks();
			let ticks = if wt > current_time {
				wt - current_time
			} else {
				1
			};
			let init_count = cmp::min(
				CALIBRATED_COUNTER_VALUE.get().unwrap() * ticks,
				u64::from(u32::MAX),
			);

			// Enable the APIC Timer in One-Shot Mode and let it start by setting the initial counter value.
			local_apic_write(IA32_X2APIC_LVT_TIMER, u64::from(TIMER_INTERRUPT_NUMBER));
			local_apic_write(IA32_X2APIC_INIT_COUNT, init_count);
		}
	} else {
		// Disable the APIC Timer.
		local_apic_write(IA32_X2APIC_LVT_TIMER, APIC_LVT_MASK);
	}
}

pub fn set_oneshot_timer(wakeup_time: Option<u64>) {
	without_interrupts(|| {
		__set_oneshot_timer(wakeup_time);
	});
}

pub fn init_x2apic() {
	if processor::supports_x2apic() {
		debug!("Enable x2APIC support");
		// The CPU supports the modern x2APIC mode, which uses MSRs for communication.
		// Enable it.
		let mut apic_base = unsafe { rdmsr(IA32_APIC_BASE) };
		apic_base |= X2APIC_ENABLE;
		unsafe {
			wrmsr(IA32_APIC_BASE, apic_base);
		}
	}
}

/// Initialize the required _start variables for the next CPU to be booted.
pub fn init_next_processor_variables() {
	// Allocate stack for the CPU and pass the addresses.
	// Keep the stack executable to possibly support dynamically generated code on the stack (see https://security.stackexchange.com/a/47825).
	let stack = mm::allocate(KERNEL_STACK_SIZE, true);
	CURRENT_STACK_ADDRESS.store(stack.as_u64(), Ordering::Relaxed);
}

/// Boot all Application Processors
/// This algorithm is derived from Intel MultiProcessor Specification 1.4, B.4, but testing has shown
/// that a second STARTUP IPI and setting the BIOS Reset Vector are no longer necessary.
/// This is partly confirmed by <https://wiki.osdev.org/Symmetric_Multiprocessing>
#[cfg(all(target_os = "none", feature = "smp"))]
pub fn boot_application_processors() {
	use core::hint;

	use include_transformed::include_nasm_bin;

	use super::{raw_boot_info, start};

	let smp_boot_code = include_nasm_bin!("boot.asm");

	// We shouldn't have any problems fitting the boot code into a single page, but let's better be sure.
	assert!(
		smp_boot_code.len() < BasePageSize::SIZE as usize,
		"SMP Boot Code is larger than a page"
	);
	debug!("SMP boot code is {} bytes long", smp_boot_code.len());

	// Identity-map the boot code page and copy over the code.
	debug!(
		"Mapping SMP boot code to physical and virtual address {:p}",
		SMP_BOOT_CODE_ADDRESS
	);
	let mut flags = PageTableEntryFlags::empty();
	flags.normal().writable();
	paging::map::<BasePageSize>(
		SMP_BOOT_CODE_ADDRESS,
		PhysAddr(SMP_BOOT_CODE_ADDRESS.as_u64()),
		1,
		flags,
	);
	unsafe {
		ptr::copy_nonoverlapping(
			smp_boot_code.as_ptr(),
			SMP_BOOT_CODE_ADDRESS.as_mut_ptr(),
			smp_boot_code.len(),
		);
	}

	unsafe {
		// Pass the PML4 page table address to the boot code.
		*((SMP_BOOT_CODE_ADDRESS + SMP_BOOT_CODE_OFFSET_PML4).as_mut_ptr::<u32>()) =
			cr3().try_into().unwrap();
		// Set entry point
		debug!(
			"Set entry point for application processor to {:p}",
			start::_start as *const ()
		);
		ptr::write_unaligned(
			(SMP_BOOT_CODE_ADDRESS + SMP_BOOT_CODE_OFFSET_ENTRY).as_mut_ptr(),
			start::_start as usize,
		);
		ptr::write_unaligned(
			(SMP_BOOT_CODE_ADDRESS + SMP_BOOT_CODE_OFFSET_BOOTINFO).as_mut_ptr(),
			raw_boot_info() as *const _ as u64,
		);
	}

	// Now wake up each application processor.
	let apic_ids = CPU_LOCAL_APIC_IDS.lock();
	let core_id = core_id();

	for (core_id_to_boot, &apic_id) in apic_ids.iter().enumerate() {
		let core_id_to_boot = core_id_to_boot as u32;
		if core_id_to_boot != core_id {
			unsafe {
				*((SMP_BOOT_CODE_ADDRESS + SMP_BOOT_CODE_OFFSET_CPU_ID).as_mut_ptr()) =
					core_id_to_boot;
			}
			let destination = u64::from(apic_id) << 32;

			debug!(
				"Waking up CPU {} with Local APIC ID {}",
				core_id_to_boot, apic_id
			);
			init_next_processor_variables();

			// Save the current number of initialized CPUs.
			let current_processor_count = arch::get_processor_count();

			// Send an INIT IPI.
			local_apic_write(
				IA32_X2APIC_ICR,
				destination
					| APIC_ICR_LEVEL_TRIGGERED
					| APIC_ICR_LEVEL_ASSERT
					| APIC_ICR_DELIVERY_MODE_INIT,
			);
			processor::udelay(200);

			local_apic_write(
				IA32_X2APIC_ICR,
				destination | APIC_ICR_LEVEL_TRIGGERED | APIC_ICR_DELIVERY_MODE_INIT,
			);
			processor::udelay(10000);

			// Send a STARTUP IPI.
			local_apic_write(
				IA32_X2APIC_ICR,
				destination
					| APIC_ICR_DELIVERY_MODE_STARTUP
					| ((SMP_BOOT_CODE_ADDRESS.as_u64()) >> 12),
			);
			debug!("Waiting for it to respond");

			// Wait until the application processor has finished initializing.
			// It will indicate this by counting up cpu_online.
			while current_processor_count == arch::get_processor_count() {
				hint::spin_loop();
			}
		}
	}
}

#[cfg(feature = "smp")]
pub fn ipi_tlb_flush() {
	if arch::get_processor_count() > 1 {
		let apic_ids = CPU_LOCAL_APIC_IDS.lock();
		let core_id = core_id();

		// Ensure that all memory operations have completed before issuing a TLB flush.
		unsafe {
			_mm_mfence();
		}

		// Send an IPI with our TLB Flush interrupt number to all other CPUs.
		without_interrupts(|| {
			for (core_id_to_interrupt, &apic_id) in apic_ids.iter().enumerate() {
				if core_id_to_interrupt != core_id.try_into().unwrap() {
					let destination = u64::from(apic_id) << 32;
					local_apic_write(
						IA32_X2APIC_ICR,
						destination
							| APIC_ICR_LEVEL_ASSERT | APIC_ICR_DELIVERY_MODE_FIXED
							| u64::from(TLB_FLUSH_INTERRUPT_NUMBER),
					);
				}
			}
		});
	}
}

/// Send an inter-processor interrupt to wake up a CPU Core that is in a HALT state.
#[allow(unused_variables)]
pub fn wakeup_core(core_id_to_wakeup: CoreId) {
	#[cfg(feature = "smp")]
	if core_id_to_wakeup != core_id() {
		without_interrupts(|| {
			let apic_ids = CPU_LOCAL_APIC_IDS.lock();
			let local_apic_id = apic_ids[core_id_to_wakeup as usize];
			let destination = u64::from(local_apic_id) << 32;
			local_apic_write(
				IA32_X2APIC_ICR,
				destination
					| APIC_ICR_LEVEL_ASSERT
					| APIC_ICR_DELIVERY_MODE_FIXED
					| u64::from(WAKEUP_INTERRUPT_NUMBER),
			);
		});
	}
}

/// Translate the x2APIC MSR into an xAPIC memory address.
#[inline]
fn translate_x2apic_msr_to_xapic_address(x2apic_msr: u32) -> VirtAddr {
	*LOCAL_APIC_ADDRESS.get().unwrap() + ((x2apic_msr as u64 & 0xFF) << 4)
}

fn local_apic_read(x2apic_msr: u32) -> u32 {
	if processor::supports_x2apic() {
		// x2APIC is simple, we can just read from the given MSR.
		unsafe { rdmsr(x2apic_msr) as u32 }
	} else {
		unsafe { *(translate_x2apic_msr_to_xapic_address(x2apic_msr).as_ptr::<u32>()) }
	}
}

fn ioapic_write(reg: u32, value: u32) {
	unsafe {
		core::ptr::write_volatile(IOAPIC_ADDRESS.get().unwrap().as_mut_ptr::<u32>(), reg);
		core::ptr::write_volatile(
			(*IOAPIC_ADDRESS.get().unwrap() + 4 * mem::size_of::<u32>()).as_mut_ptr::<u32>(),
			value,
		);
	}
}

fn ioapic_read(reg: u32) -> u32 {
	let value;

	unsafe {
		core::ptr::write_volatile(IOAPIC_ADDRESS.get().unwrap().as_mut_ptr::<u32>(), reg);
		value = core::ptr::read_volatile(
			(*IOAPIC_ADDRESS.get().unwrap() + 4 * mem::size_of::<u32>()).as_ptr::<u32>(),
		);
	}

	value
}

fn ioapic_version() -> u32 {
	ioapic_read(IOAPIC_REG_VER) & 0xFF
}

fn ioapic_max_redirection_entry() -> u8 {
	((ioapic_read(IOAPIC_REG_VER) >> 16) & 0xFF) as u8
}

fn local_apic_write(x2apic_msr: u32, value: u64) {
	if processor::supports_x2apic() {
		// x2APIC is simple, we can just write the given value to the given MSR.
		unsafe {
			wrmsr(x2apic_msr, value);
		}
	} else {
		// Write the value.
		let value_ref = unsafe {
			&mut *(translate_x2apic_msr_to_xapic_address(x2apic_msr).as_mut_ptr::<u32>())
		};

		if x2apic_msr == IA32_X2APIC_ICR {
			// The ICR1 register in xAPIC mode also has a Delivery Status bit.
			// Wait until previous interrupt was deliverd.
			// This bit does not exist in x2APIC mode (cf. Intel Vol. 3A, 10.12.9).
			while (unsafe { core::ptr::read_volatile(value_ref) }
				& APIC_ICR_DELIVERY_STATUS_PENDING)
				> 0
			{
				spin_loop();
			}

			// Instead of a single 64-bit ICR register, xAPIC has two 32-bit registers (ICR1 and ICR2).
			// There is a gap between them and the destination field in ICR2 is also 8 bits instead of 32 bits.
			let destination = ((value >> 8) & 0xFF00_0000) as u32;
			let icr2 = unsafe {
				&mut *((*LOCAL_APIC_ADDRESS.get().unwrap() + APIC_ICR2).as_mut_ptr::<u32>())
			};
			*icr2 = destination;

			// The remaining data without the destination will now be written into ICR1.
		}

		*value_ref = value as u32;
	}
}

pub fn print_information() {
	infoheader!(" MULTIPROCESSOR INFORMATION ");
	infoentry!(
		"APIC in use",
		if processor::supports_x2apic() {
			"x2APIC"
		} else {
			"xAPIC"
		}
	);
	infoentry!("Initialized CPUs", arch::get_processor_count());
	infofooter!();
}