1use std::collections::HashSet;
5
6use luwen_def::Arch;
7
8use crate::{
9 chip::{wait_for_init, Chip, InitError, InitStatus},
10 error::{BtWrapper, PlatformError},
11 ChipImpl, EthAddr,
12};
13
14#[derive(PartialEq, Eq, Hash, Debug, Clone)]
15enum InterfaceIdOrCoord {
16 Id(u32),
17 Coord(EthAddr),
18}
19
20pub enum UninitChip {
22 Partially {
24 status: Box<InitStatus>,
26 underlying: Chip,
29 },
30 Initialized(Chip),
32}
33
34fn clone_chip(chip: &Chip) -> Chip {
36 if let Some(wh) = chip.as_wh() {
37 Chip::from(Box::new(wh.clone()) as Box<dyn ChipImpl>)
38 } else if let Some(bh) = chip.as_bh() {
39 Chip::from(Box::new(bh.clone()) as Box<dyn ChipImpl>)
40 } else {
41 unimplemented!(
42 "Don't have a clone handler for chip with arch {:?}.",
43 chip.get_arch()
44 )
45 }
46}
47
48impl Clone for UninitChip {
49 fn clone(&self) -> Self {
50 match self {
51 Self::Partially { status, underlying } => Self::Partially {
52 status: status.clone(),
53 underlying: clone_chip(underlying),
54 },
55 Self::Initialized(chip) => Self::Initialized(clone_chip(chip)),
56 }
57 }
58}
59
60impl UninitChip {
61 pub fn new(status: InitStatus, chip: &Chip) -> Self {
62 let chip = clone_chip(chip);
63 if status.init_complete() && !status.has_error() {
64 UninitChip::Initialized(chip)
65 } else {
66 UninitChip::Partially {
67 status: Box::new(status),
68 underlying: chip,
69 }
70 }
71 }
72
73 pub fn status(&self) -> Option<&InitStatus> {
74 match self {
75 UninitChip::Partially { status, .. } => Some(status),
76 UninitChip::Initialized(_) => None,
77 }
78 }
79
80 pub fn init<E>(
83 self,
84 init_callback: &mut impl FnMut(crate::chip::ChipDetectState) -> Result<(), E>,
85 ) -> Result<Chip, InitError<E>> {
86 match self {
87 UninitChip::Partially { mut underlying, .. } => {
88 wait_for_init(&mut underlying, init_callback, false, false)?;
89
90 Ok(underlying)
91 }
92 UninitChip::Initialized(chip) => Ok(chip),
93 }
94 }
95
96 pub fn upgrade(self) -> Chip {
97 match self {
98 UninitChip::Partially { underlying, .. } => underlying,
99 UninitChip::Initialized(chip) => chip,
100 }
101 }
102
103 pub fn try_upgrade(&self) -> Option<&Chip> {
104 match self {
105 UninitChip::Partially { status, underlying } => {
106 if status.init_complete() && !status.has_error() {
107 Some(underlying)
108 } else {
109 None
110 }
111 }
112 UninitChip::Initialized(chip) => Some(chip),
113 }
114 }
115
116 pub fn is_initialized(&self) -> bool {
117 match self {
118 UninitChip::Partially { status, .. } => status.init_complete(),
119 UninitChip::Initialized(_) => true,
120 }
121 }
122
123 pub fn is_healthy(&self) -> Option<bool> {
124 match self {
125 UninitChip::Partially { status, .. } => {
126 if status.init_complete() {
127 Some(status.has_error())
128 } else {
129 None
130 }
131 }
132 UninitChip::Initialized(_) => Some(true),
133 }
134 }
135
136 pub fn arc_alive(&self) -> bool {
137 match self {
138 UninitChip::Partially { status, .. } => {
139 !status.arc_status.is_waiting() && !status.arc_status.has_error()
140 }
141 UninitChip::Initialized(_) => true,
142 }
143 }
144
145 pub fn dram_safe(&self) -> bool {
146 match self {
147 UninitChip::Partially { status, .. } => {
148 !status.dram_status.is_waiting() && !status.dram_status.has_error()
149 }
150 UninitChip::Initialized(_) => true,
151 }
152 }
153
154 pub fn eth_safe(&self) -> bool {
155 match self {
156 UninitChip::Partially { status, .. } => {
157 !status.eth_status.is_waiting() && !status.eth_status.has_error()
158 }
159 UninitChip::Initialized(_) => true,
160 }
161 }
162
163 pub fn cpu_safe(&self) -> bool {
164 match self {
165 UninitChip::Partially { status, .. } => {
166 !status.cpu_status.is_waiting() && !status.cpu_status.has_error()
167 }
168 UninitChip::Initialized(_) => true,
169 }
170 }
171}
172
173pub struct ChipDetectOptions {
174 pub continue_on_failure: bool,
177 pub local_only: bool,
180 pub chip_filter: Vec<Arch>,
182 pub noc_safe: bool,
184}
185
186impl Default for ChipDetectOptions {
187 fn default() -> Self {
188 Self {
189 continue_on_failure: true,
190 local_only: false,
191 chip_filter: Vec::new(),
192 noc_safe: false,
193 }
194 }
195}
196
197impl ChipDetectOptions {
198 pub fn new() -> Self {
199 Self::default()
200 }
201
202 pub fn continue_on_failure(mut self, continue_on_failure: bool) -> Self {
203 self.continue_on_failure = continue_on_failure;
204 self
205 }
206
207 pub fn local_only(mut self, local_only: bool) -> Self {
208 self.local_only = local_only;
209 self
210 }
211
212 pub fn noc_safe(mut self, noc_safe: bool) -> Self {
213 self.noc_safe = noc_safe;
214 self
215 }
216}
217
218pub fn detect_chips<E>(
250 mut root_chips: Vec<Chip>,
251 init_callback: &mut impl FnMut(crate::chip::ChipDetectState) -> Result<(), E>,
252 options: ChipDetectOptions,
253) -> Result<Vec<UninitChip>, InitError<E>> {
254 let ChipDetectOptions {
255 continue_on_failure,
256 local_only,
257 chip_filter,
258 noc_safe,
259 } = options;
260
261 let mut remotes_to_investigate = Vec::new();
262 let mut seen_chips = HashSet::new();
263
264 let mut output = Vec::new();
265 for (root_index, root_chip) in root_chips.iter_mut().enumerate() {
266 if !chip_filter.is_empty() && !chip_filter.contains(&root_chip.get_arch()) {
267 Err(PlatformError::WrongChipArchs {
268 actual: root_chip.get_arch(),
269 expected: chip_filter.clone(),
270 backtrace: BtWrapper::capture(),
271 })?;
272 }
273
274 let status = wait_for_init(root_chip, init_callback, continue_on_failure, noc_safe)?;
275
276 let chip = UninitChip::new(status, root_chip);
278
279 let remote_ready = chip.eth_safe();
282 let arc_ready = chip.arc_alive();
283
284 output.push(chip);
285
286 let ident = if let Some(wh) = root_chip.as_wh() {
287 if arc_ready {
288 if let Ok(telem) = root_chip.get_telemetry() {
289 let board_type: u64 =
291 telem.board_id_low as u64 | ((telem.board_id_high as u64) << 32);
292 let board_upi: u64 = (board_type >> 36) & 0xFFFFF;
293 const WH_6U_GLX_UPI: u64 = 0x35;
294
295 if !local_only && remote_ready && board_upi != WH_6U_GLX_UPI {
297 remotes_to_investigate.push(root_index);
298 }
299
300 (
301 Some(telem.board_id),
302 Some(InterfaceIdOrCoord::Coord(wh.get_local_chip_coord()?)),
303 )
304 } else {
305 continue;
306 }
307 } else {
308 continue;
309 }
310 } else {
311 (
312 None,
316 root_chip
317 .get_device_info()?
318 .map(|v| InterfaceIdOrCoord::Id(v.interface_id)),
319 )
320 };
321
322 if !seen_chips.insert(ident) {
323 continue;
324 }
325 }
326
327 for root_chip in remotes_to_investigate.into_iter().map(|v| &root_chips[v]) {
328 let mut to_check = root_chip.get_neighbouring_chips()?;
329
330 let mut seen_coords = HashSet::new();
331 while let Some(nchip) = to_check.pop() {
332 if !nchip.routing_enabled {
333 continue;
334 }
335
336 if !seen_coords.insert(nchip.eth_addr) {
337 continue;
338 }
339
340 if !chip_filter.is_empty() && !chip_filter.contains(&root_chip.get_arch()) {
341 continue;
342 }
343
344 if let Some(wh) = root_chip.as_wh() {
345 let mut wh = wh.open_remote(nchip.eth_addr)?;
346
347 let status = wait_for_init(&mut wh, init_callback, continue_on_failure, noc_safe)?;
348
349 let local_coord = wh.get_local_chip_coord()?;
350
351 if local_coord != nchip.eth_addr {
352 Err(PlatformError::Generic(
353 format!("When detecting chips in mesh found a mismatch between the expected chip coordinate {} and the actual {}", nchip.eth_addr, local_coord),
354 crate::error::BtWrapper::capture(),
355 ))?;
356 }
357
358 if !status.arc_status.has_error() {
361 let telem = wh.get_telemetry()?;
362
363 let ident = (
364 Some(telem.board_id),
365 Some(InterfaceIdOrCoord::Coord(local_coord)),
366 );
367
368 if !seen_chips.insert(ident) {
369 init_callback(crate::chip::ChipDetectState {
370 chip: root_chip,
371 call: crate::chip::CallReason::NotNew,
372 })
373 .map_err(InitError::CallbackError)?;
374 continue;
375 }
376
377 for nchip in wh.get_neighbouring_chips()? {
378 to_check.push(nchip);
379 }
380 }
381
382 let chip = Chip::from(Box::new(wh) as Box<dyn ChipImpl>);
383 output.push(UninitChip::new(status, &chip));
384 } else {
385 unimplemented!("Don't have a handler for non-WH chips with ethernet support yet.")
386 }
387 }
388 }
389
390 Ok(output)
391}
392
393pub fn detect_initialized_chips<E>(
394 root_chips: Vec<Chip>,
395 init_callback: &mut impl FnMut(crate::chip::ChipDetectState) -> Result<(), E>,
396 options: ChipDetectOptions,
397) -> Result<Vec<Chip>, InitError<E>> {
398 let chips = detect_chips(root_chips, init_callback, options)?;
399
400 let mut output = Vec::with_capacity(chips.len());
401 for chip in chips {
402 if chip.is_initialized() {
403 output.push(chip.upgrade());
404 } else {
405 output.push(chip.init(&mut |_| Ok(()))?);
406 }
407 }
408
409 Ok(output)
410}
411
412pub fn detect_chips_silent(
413 root_chips: Vec<Chip>,
414 options: ChipDetectOptions,
415) -> Result<Vec<Chip>, PlatformError> {
416 detect_initialized_chips::<std::convert::Infallible>(root_chips, &mut |_| Ok(()), options)
417 .map_err(|v| match v {
418 InitError::PlatformError(err) => err,
419 InitError::CallbackError(_) => unreachable!(),
420 })
421}