Skip to main content

luwen_api/chip/
remote.rs

1// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
2// SPDX-License-Identifier: Apache-2.0
3
4use super::{eth_addr::EthAddr, HlComms, MemorySlices, Wormhole};
5use crate::{
6    chip::communication::{
7        chip_comms::{axi_translate, AxiData, AxiError, ChipComms},
8        chip_interface::ChipInterface,
9    },
10    error::PlatformError,
11};
12
13pub struct RemoteArcIf {
14    pub addr: EthAddr,
15    pub axi_data: Option<MemorySlices>,
16}
17
18impl ChipComms for RemoteArcIf {
19    fn axi_translate(&self, addr: &str) -> Result<AxiData, AxiError> {
20        axi_translate(self.axi_data.as_ref(), addr)
21    }
22
23    fn axi_read(
24        &self,
25        chip_if: &dyn ChipInterface,
26        addr: u64,
27        data: &mut [u8],
28    ) -> Result<(), Box<dyn std::error::Error>> {
29        chip_if.eth_noc_read(self.addr, 0, 0, 10, addr, data)
30    }
31
32    fn axi_write(
33        &self,
34        chip_if: &dyn ChipInterface,
35        addr: u64,
36        data: &[u8],
37    ) -> Result<(), Box<dyn std::error::Error>> {
38        chip_if.eth_noc_write(self.addr, 0, 0, 10, addr, data)
39    }
40
41    fn noc_read(
42        &self,
43        chip_if: &dyn ChipInterface,
44        noc_id: u8,
45        x: u8,
46        y: u8,
47        addr: u64,
48        data: &mut [u8],
49    ) -> Result<(), Box<dyn std::error::Error>> {
50        chip_if.eth_noc_read(self.addr, noc_id, x, y, addr, data)
51    }
52
53    fn noc_write(
54        &self,
55        chip_if: &dyn ChipInterface,
56        noc_id: u8,
57        x: u8,
58        y: u8,
59        addr: u64,
60        data: &[u8],
61    ) -> Result<(), Box<dyn std::error::Error>> {
62        chip_if.eth_noc_write(self.addr, noc_id, x, y, addr, data)
63    }
64
65    fn noc_multicast(
66        &self,
67        chip_if: &dyn ChipInterface,
68        noc_id: u8,
69        start: (u8, u8),
70        end: (u8, u8),
71        addr: u64,
72        data: &[u8],
73    ) -> Result<(), Box<dyn std::error::Error>> {
74        chip_if.eth_noc_multicast(self.addr, noc_id, start, end, addr, data)
75    }
76
77    fn noc_broadcast(
78        &self,
79        chip_if: &dyn ChipInterface,
80        noc_id: u8,
81        addr: u64,
82        data: &[u8],
83    ) -> Result<(), Box<dyn std::error::Error>> {
84        chip_if.eth_noc_broadcast(self.addr, noc_id, addr, data)
85    }
86}
87
88#[derive(Clone, Default)]
89pub struct EthAddresses {
90    pub masked_version: u32,
91
92    pub version: u64,
93    pub boot_params: u64,
94    pub node_info: u64,
95    pub eth_conn_info: u64,
96    pub debug_buf: u64,
97    pub results_buf: u64,
98    pub shelf_rack_routing: bool,
99    pub heartbeat: u64,
100    pub erisc_app: u64,
101    pub erisc_app_config: u64,
102    pub erisc_remote_board_type_offset: u64,
103    pub erisc_local_board_type_offset: u64,
104}
105
106impl EthAddresses {
107    pub fn new(fw_version: u32) -> Self {
108        let masked_version = fw_version & 0x00FFFFFF;
109
110        let version;
111        let boot_params;
112        let node_info;
113        let eth_conn_info;
114        let debug_buf;
115        let results_buf;
116        let shelf_rack_routing;
117        let heartbeat;
118        let erisc_app;
119        let erisc_app_config;
120        let erisc_remote_board_type_offset;
121        let erisc_local_board_type_offset;
122
123        if masked_version >= 0x050000 {
124            boot_params = 0x1000;
125            node_info = 0x1100;
126            eth_conn_info = 0x1200;
127            debug_buf = 0x12c0;
128            results_buf = 0x1ec0;
129            shelf_rack_routing = true;
130        } else if masked_version >= 0x030000 {
131            boot_params = 0x1000;
132            node_info = 0x1100;
133            eth_conn_info = 0x1200;
134            debug_buf = 0x1240;
135            results_buf = 0x1e40;
136            shelf_rack_routing = false;
137        } else {
138            boot_params = 0x5000;
139            node_info = 0x5100;
140            eth_conn_info = 0x5200;
141            debug_buf = 0x5240;
142            results_buf = 0x5e40;
143            shelf_rack_routing = false;
144        }
145
146        if masked_version >= 0x060000 {
147            version = 0x210;
148            heartbeat = 0x1c;
149            erisc_app = 0x9040;
150            erisc_app_config = 0x12000;
151        } else {
152            version = 0x210;
153            heartbeat = 0x1f80;
154            erisc_app = 0x8020;
155            erisc_app_config = 0x12000;
156        }
157
158        if masked_version >= 0x06C000 {
159            erisc_remote_board_type_offset = 77;
160            erisc_local_board_type_offset = 69;
161        } else {
162            erisc_remote_board_type_offset = 72;
163            erisc_local_board_type_offset = 64;
164        }
165
166        EthAddresses {
167            version,
168            masked_version,
169            boot_params,
170            node_info,
171            eth_conn_info,
172            debug_buf,
173            results_buf,
174            shelf_rack_routing,
175            heartbeat,
176            erisc_app,
177            erisc_app_config,
178            erisc_remote_board_type_offset,
179            erisc_local_board_type_offset,
180        }
181    }
182}
183
184impl Wormhole {
185    pub fn get_local_chip_coord(&self) -> Result<EthAddr, PlatformError> {
186        let coord = self.noc_read32(0, 9, 0, self.eth_addrs.node_info + 8)?;
187
188        Ok(EthAddr {
189            rack_x: (coord & 0xFF) as u8,
190            rack_y: ((coord >> 8) & 0xFF) as u8,
191            shelf_x: ((coord >> 16) & 0xFF) as u8,
192            shelf_y: ((coord >> 24) & 0xFF) as u8,
193        })
194    }
195
196    pub(crate) fn check_ethernet_training_complete(&mut self) -> Result<Vec<bool>, PlatformError> {
197        self.init_eth_addrs()?;
198
199        let mut initial_heartbeat = Vec::with_capacity(self.eth_locations.len());
200        for core in self.eth_locations.iter() {
201            if core.enabled {
202                initial_heartbeat.push(Some(self.noc_read32(
203                    0,
204                    core.x,
205                    core.y,
206                    self.eth_addrs.heartbeat,
207                )?));
208            } else {
209                initial_heartbeat.push(None);
210            }
211        }
212
213        let start_time = std::time::Instant::now();
214
215        // During initial training the erisc cores aren't running their heartbeats. In addition
216        // ethernet needs active retraining after initial training has completed. This retraining
217        // can only occur if the heartbeat is running. Therefore if the heartbeat is not running I
218        // assume that the link is not retrained.
219        //
220        // This procedure will block for 100 ms because I did not want to add state to the Wormhole
221        // struct to track the last time a heartbeat was incremented on each core because this
222        // function is only called during initialization.
223        let mut heartbeat = Vec::with_capacity(self.eth_locations.len());
224        loop {
225            heartbeat.clear();
226            for core in self.eth_locations.iter() {
227                if core.enabled {
228                    heartbeat.push(Some(self.noc_read32(
229                        0,
230                        core.x,
231                        core.y,
232                        self.eth_addrs.heartbeat,
233                    )?));
234                } else {
235                    heartbeat.push(None);
236                }
237            }
238
239            let valid_heartbeat = initial_heartbeat
240                .iter_mut()
241                .zip(heartbeat.iter().copied())
242                .map(|(h1, h2)| {
243                    if h1.is_none() && h2.is_some() {
244                        *h1 = h2
245                    }
246                    *h1 != h2
247                })
248                .collect::<Vec<_>>();
249
250            let init_finished = valid_heartbeat.iter().all(|&x| x);
251            if init_finished || start_time.elapsed() > std::time::Duration::from_millis(100) {
252                return Ok(valid_heartbeat);
253            }
254        }
255    }
256
257    pub(crate) fn check_ethernet_fw_version(&mut self) -> Result<Vec<bool>, PlatformError> {
258        let mut valid_fw_version = Vec::with_capacity(self.eth_locations.len());
259        for core in &self.eth_locations {
260            let eth_fw_version = self.eth_addrs.masked_version;
261            let msbyte = (eth_fw_version >> 24) & 0xFF;
262            if msbyte != 0x0
263                || msbyte != 0x6
264                || self.noc_read32(0, core.x, core.y, self.eth_addrs.version)? & 0x00FFFFFF
265                    != eth_fw_version
266            {
267                valid_fw_version.push(true);
268            } else {
269                valid_fw_version.push(false);
270            }
271        }
272
273        Ok(valid_fw_version)
274    }
275}