cyclone_fpga/
f1.rs

1use crate::{align::HalfAligned, Aligned, Error, Flush, ReadWrite, Result, Write};
2
3use core::arch::x86_64::{
4    // 256-bit SIMD register, requires avx
5    __m256i as u256,
6    _mm256_load_si256 as load_u256,
7    _mm256_stream_si256 as stream_u256,
8};
9
10use cyclone_f1_sys::{
11    c_void, fpga_pci_attach, fpga_pci_detach, fpga_pci_get_address, fpga_pci_peek, fpga_pci_poke,
12};
13
14#[derive(Clone)]
15/// AWS F1 FPGA.
16pub struct F1 {
17    ctrl_bar: i32,
18    ctrl_offset: u64,
19    stream_bar: i32,
20    stream_slice: &'static [u256],
21}
22
23impl Drop for F1 {
24    fn drop(&mut self) {
25        unsafe {
26            fpga_pci_detach(self.ctrl_bar);
27            fpga_pci_detach(self.stream_bar);
28        }
29    }
30}
31
32pub type Packet = Aligned<[u64; 8]>;
33
34pub type Stream<'a, B> = crate::Stream<'a, F1, B>;
35
36const FPGA_APP_PF: i32 = 0;
37const APP_PF_BAR0: i32 = 0;
38const APP_PF_BAR4: i32 = 4;
39const BURST_CAPABLE: u32 = 1;
40
41#[cfg(feature = "f1")]
42impl F1 {
43    pub fn new(
44        slot: i32,
45        ctrl_offset: usize,
46        stream_offset: usize,
47        stream_size: usize,
48    ) -> Result<Self> {
49        unsafe {
50            // fpga_pci_init does not actually do anything
51
52            let mut ctrl_bar = 0;
53            if 0 != fpga_pci_attach(slot, FPGA_APP_PF, APP_PF_BAR0, 0, &mut ctrl_bar) {
54                return Err(Error::SudoRequired);
55            }
56
57            let mut stream_bar = 0;
58            if 0 != fpga_pci_attach(
59                slot,
60                FPGA_APP_PF,
61                APP_PF_BAR4,
62                BURST_CAPABLE,
63                &mut stream_bar,
64            ) {
65                fpga_pci_detach(ctrl_bar);
66                return Err(Error::SudoRequired);
67            }
68
69            let mut stream_addr: *mut c_void = core::ptr::null_mut();
70            fpga_pci_get_address(
71                stream_bar,
72                stream_offset as u64,
73                stream_size as u64,
74                &mut stream_addr as *mut _,
75            );
76            let stream_slice = core::slice::from_raw_parts(stream_addr as *const u256, stream_size);
77
78            Ok(F1 {
79                ctrl_bar,
80                ctrl_offset: ctrl_offset as u64,
81                stream_bar,
82                stream_slice,
83            })
84        }
85    }
86}
87
88impl Flush for F1 {
89    fn flush(&mut self) {
90        unsafe {
91            core::arch::x86_64::_mm_sfence();
92        }
93    }
94}
95
96impl Write<u32> for F1 {
97    fn write(&mut self, index: usize, value: &u32) {
98        let offset = (2 << 30) | (index << 2);
99        unsafe {
100            // the other order does not work.
101            fpga_pci_poke(self.ctrl_bar, self.ctrl_offset + 4, *value);
102            fpga_pci_poke(self.ctrl_bar, self.ctrl_offset, offset as _);
103        }
104    }
105}
106
107impl ReadWrite<u32> for F1 {
108    fn read(&self, index: usize) -> u32 {
109        let offset = (1 << 30) | (index << 2);
110        let mut value = 0;
111        unsafe {
112            fpga_pci_poke(self.ctrl_bar, self.ctrl_offset, offset as _);
113            fpga_pci_peek(self.ctrl_bar, self.ctrl_offset, &mut value);
114        }
115        value
116    }
117}
118
119type HalfPacket = HalfAligned<[u64; 4]>;
120
121impl Packet {
122    #[inline(always)]
123    fn split(&self) -> (&HalfPacket, &HalfPacket) {
124        use core::mem::transmute;
125        unsafe { (transmute(&self.value[4]), transmute(&self.value[0])) }
126    }
127}
128
129impl Write<HalfPacket> for F1 {
130    fn write(&mut self, index: usize, packet: &HalfPacket) {
131        unsafe {
132            let register = load_u256(&packet[0] as *const u64 as *const u256);
133            stream_u256(
134                &self.stream_slice[index] as *const u256 as *mut u256,
135                register,
136            );
137        }
138    }
139}
140
141impl Write<Packet> for F1 {
142    fn write(&mut self, index: usize, packet: &Packet) {
143        // x86 doesn't support 512bit writes, but
144        // sometimes the two half-packets are combined into
145        // one TLP anyway.
146        let (hi, lo) = packet.split();
147        self.write(2 * index, lo);
148        self.write(2 * index + 1, hi);
149    }
150}