1use std::cmp::Ordering;
2use std::collections::HashMap;
3use std::convert::TryFrom;
4use std::fmt::{self, Display, Formatter};
5use std::path::PathBuf;
6use std::str::FromStr;
7use std::sync::Mutex;
8
9use lazy_static::lazy_static;
10use regex::Regex;
11use strum::IntoEnumIterator;
12
13use crate::arch::Arch;
14use crate::hwmon;
15use crate::perf_regs::PerformanceCounter;
16use crate::status::{get_device_status, DeviceStatus};
17use crate::sysfs::npu_mgmt::{self, *};
18use crate::sysfs::pci;
19use crate::{devfs, DeviceError, DeviceResult};
20
21#[derive(Debug, Clone)]
22
23pub struct Device {
46 device_info: DeviceInfo,
47 hwmon_fetcher: hwmon::Fetcher,
48 pub(crate) cores: Vec<CoreIdx>,
49 pub(crate) dev_files: Vec<DeviceFile>,
50}
51
52impl Device {
53 pub(crate) fn new(
54 device_info: DeviceInfo,
55 hwmon_fetcher: hwmon::Fetcher,
56 cores: Vec<CoreIdx>,
57 dev_files: Vec<DeviceFile>,
58 ) -> Self {
59 Self {
60 device_info,
61 hwmon_fetcher,
62 cores,
63 dev_files,
64 }
65 }
66
67 pub fn name(&self) -> String {
69 format!("npu{}", self.device_index())
70 }
71
72 pub fn device_index(&self) -> u8 {
74 self.device_info.device_index
75 }
76
77 fn device_info(&self) -> &DeviceInfo {
79 &self.device_info
80 }
81
82 pub fn arch(&self) -> Arch {
84 self.device_info().arch()
85 }
86
87 pub fn alive(&self) -> DeviceResult<bool> {
89 self.device_info.get(&DynamicMgmtFile::Alive).and_then(|v| {
90 npu_mgmt::parse_zero_or_one_to_bool(&v).ok_or_else(|| {
91 DeviceError::unexpected_value(format!(
92 "Bad alive value: {v} (only 0 or 1 expected)"
93 ))
94 })
95 })
96 }
97
98 pub fn atr_error(&self) -> DeviceResult<HashMap<String, u32>> {
100 self.device_info
101 .get(&DynamicMgmtFile::AtrError)
102 .map(npu_mgmt::build_atr_error_map)
103 }
104
105 pub fn busname(&self) -> DeviceResult<String> {
107 self.device_info.get(&StaticMgmtFile::Busname)
108 }
109
110 pub fn pci_dev(&self) -> DeviceResult<String> {
112 self.device_info.get(&StaticMgmtFile::Dev)
113 }
114
115 pub fn device_sn(&self) -> DeviceResult<String> {
117 self.device_info.get(&StaticMgmtFile::DeviceSn)
118 }
119
120 pub fn device_uuid(&self) -> DeviceResult<String> {
122 self.device_info.get(&StaticMgmtFile::DeviceUuid)
123 }
124
125 pub fn firmware_version(&self) -> DeviceResult<String> {
127 self.device_info.get(&DynamicMgmtFile::FwVersion)
128 }
129
130 pub fn driver_version(&self) -> DeviceResult<String> {
132 self.device_info.get(&DynamicMgmtFile::Version)
133 }
134
135 pub fn heartbeat(&self) -> DeviceResult<u32> {
137 self.device_info
138 .get(&DynamicMgmtFile::Heartbeat)
139 .and_then(|str| {
140 str.parse::<u32>().map_err(|_| {
141 DeviceError::unexpected_value(format!("Bad heartbeat value: {str}"))
142 })
143 })
144 }
145
146 pub fn clock_frequency(&self) -> DeviceResult<Vec<ClockFrequency>> {
148 self.device_info
149 .get(&DynamicMgmtFile::NeClkFreqInfo)
150 .map(|str| str.lines().flat_map(ClockFrequency::try_from).collect())
151 }
152
153 #[allow(dead_code)]
155 fn ctrl_device_led(&self, led: (bool, bool, bool)) -> DeviceResult<()> {
156 self.device_info.ctrl(
157 CtrlFile::DeviceLed,
158 &(led.0 as i32 + 0b10 * led.1 as i32 + 0b100 * led.2 as i32).to_string(),
159 )
160 }
161
162 #[allow(dead_code)]
164 fn ctrl_ne_clock(&self, toggle: npu_mgmt::Toggle) -> DeviceResult<()> {
165 self.device_info
166 .ctrl(CtrlFile::NeClock, &(toggle as u8).to_string())
167 }
168
169 #[allow(dead_code)]
171 fn ctrl_ne_dtm_policy(&self, policy: npu_mgmt::DtmPolicy) -> DeviceResult<()> {
172 self.device_info
173 .ctrl(CtrlFile::NeDtmPolicy, &(policy as u8).to_string())
174 }
175
176 #[allow(dead_code)]
178 fn ctrl_performance_level(&self, level: npu_mgmt::PerfLevel) -> DeviceResult<()> {
179 self.device_info
180 .ctrl(CtrlFile::PerformanceLevel, &(level as u8).to_string())
181 }
182
183 #[allow(dead_code)]
185 fn ctrl_performance_mode(&self, mode: npu_mgmt::PerfMode) -> DeviceResult<()> {
186 self.device_info
187 .ctrl(CtrlFile::PerformanceMode, &(mode as u8).to_string())
188 }
189
190 pub fn numa_node(&self) -> DeviceResult<NumaNode> {
192 self.device_info.get_numa_node()
193 }
194
195 pub fn core_num(&self) -> u8 {
197 u8::try_from(self.cores.len()).unwrap()
198 }
199
200 pub fn cores(&self) -> &Vec<CoreIdx> {
202 &self.cores
203 }
204
205 pub fn dev_files(&self) -> &Vec<DeviceFile> {
207 &self.dev_files
208 }
209
210 pub fn performance_counters(&self) -> Vec<(&DeviceFile, PerformanceCounter)> {
212 let mut counters = vec![];
213
214 for dev_file in self.dev_files() {
215 if let Ok(perf_counter) = self.device_info().get_performance_counter(dev_file) {
216 counters.push((dev_file, perf_counter));
217 }
218 }
219
220 counters
221 }
222
223 pub async fn get_status_core(&self, core: CoreIdx) -> DeviceResult<CoreStatus> {
225 for file in &self.dev_files {
226 if file.mode() != DeviceMode::Single {
228 continue;
229 }
230 if (file.core_range().contains(&core))
231 && get_device_status(&file.path).await? == DeviceStatus::Occupied
232 {
233 return Ok(CoreStatus::Occupied(file.to_string()));
234 }
235 }
236 Ok(CoreStatus::Available)
237 }
238
239 pub async fn get_status_all(&self) -> DeviceResult<HashMap<CoreIdx, CoreStatus>> {
241 let mut status_map = self.new_status_map();
242
243 for core in self.cores() {
244 let status = self.get_status_core(*core).await?;
245 status_map.insert(*core, status);
246 }
247 Ok(status_map)
248 }
249
250 pub(crate) fn new_status_map(&self) -> HashMap<CoreIdx, CoreStatus> {
251 self.cores
252 .iter()
253 .map(|k| (*k, CoreStatus::Available))
254 .collect()
255 }
256
257 pub fn get_hwmon_fetcher(&self) -> &hwmon::Fetcher {
259 &self.hwmon_fetcher
260 }
261}
262
263impl Display for Device {
264 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
265 write!(f, "npu{}", self.device_index())
266 }
267}
268
269impl Eq for Device {}
270
271impl Ord for Device {
272 fn cmp(&self, other: &Self) -> Ordering {
273 self.device_index().cmp(&other.device_index())
274 }
275}
276
277impl PartialEq for Device {
278 fn eq(&self, other: &Self) -> bool {
279 self.device_info == other.device_info
280 && self.hwmon_fetcher == other.hwmon_fetcher
281 && self.cores == other.cores
282 && self.dev_files == other.dev_files
283 }
284}
285
286impl PartialOrd for Device {
287 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
288 Some(self.cmp(other))
289 }
290}
291
292#[derive(Debug, Eq, PartialEq, Copy, Clone)]
294pub enum NumaNode {
295 UnSupported,
296 Id(usize),
297}
298
299#[derive(Debug)]
300pub struct DeviceInfo {
301 device_index: u8,
302 dev_root: PathBuf,
303 sys_root: PathBuf,
304 arch: Arch,
305 meta: HashMap<&'static str, String>,
306 numa_node: Mutex<Option<NumaNode>>,
307}
308
309impl DeviceInfo {
310 pub(crate) fn new(device_index: u8, dev_root: PathBuf, sys_root: PathBuf) -> DeviceInfo {
311 let mut meta = HashMap::default();
312 for file in StaticMgmtFile::iter() {
313 let filename = file.filename();
314 let value = npu_mgmt::read_mgmt_file(&sys_root, filename, device_index).unwrap();
315 meta.insert(filename, value);
316 }
317 let device_type = meta.get(&StaticMgmtFile::DeviceType.filename()).unwrap();
318 let soc_rev = meta.get(&StaticMgmtFile::SocRev.filename()).unwrap();
319 let arch = Arch::from_str(format!("{device_type}{soc_rev}").as_str())
320 .map_err(|_| DeviceError::UnknownArch {
321 arch: device_type.clone(),
322 rev: soc_rev.clone(),
323 })
324 .unwrap();
325 Self {
326 device_index,
327 dev_root,
328 sys_root,
329 arch,
330 meta,
331 numa_node: Mutex::new(None),
332 }
333 }
334
335 pub fn arch(&self) -> Arch {
336 self.arch
337 }
338
339 pub fn get(&self, mgmt_file: &dyn MgmtFile) -> DeviceResult<String> {
340 if mgmt_file.is_static() {
341 Ok(self.meta.get(mgmt_file.filename()).unwrap().to_string())
342 } else {
343 let value =
344 npu_mgmt::read_mgmt_file(&self.sys_root, mgmt_file.filename(), self.device_index)?;
345 Ok(value)
346 }
347 }
348
349 pub fn ctrl(&self, ctrl_file: CtrlFile, contents: &str) -> DeviceResult<()> {
350 npu_mgmt::write_ctrl_file(
351 &self.sys_root,
352 &ctrl_file.to_string(),
353 self.device_index,
354 contents,
355 )?;
356
357 Ok(())
358 }
359
360 pub fn get_numa_node(&self) -> DeviceResult<NumaNode> {
361 let mut numa_node = self.numa_node.lock().unwrap();
362 if let Some(node) = *numa_node {
363 return Ok(node);
364 }
365
366 let busname = self.get(&StaticMgmtFile::Busname)?;
367 let id = pci::numa::read_numa_node(&self.sys_root, &busname)?
368 .parse::<i32>()
369 .unwrap();
370
371 let node = if id >= 0 {
372 NumaNode::Id(id as usize)
373 } else if id == -1 {
374 NumaNode::UnSupported
375 } else {
376 return Err(DeviceError::unexpected_value(format!(
377 "Unexpected numa node id: {id}"
378 )));
379 };
380
381 *numa_node = Some(node);
382 Ok(node)
383 }
384
385 pub fn get_performance_counter(&self, file: &DeviceFile) -> DeviceResult<PerformanceCounter> {
386 PerformanceCounter::read(&self.sys_root, file.filename())
387 .map_err(DeviceError::performance_counter_error)
388 }
389}
390
391impl Eq for DeviceInfo {}
392
393impl PartialEq for DeviceInfo {
394 fn eq(&self, other: &Self) -> bool {
395 self.device_index == other.device_index
396 && self.dev_root == other.dev_root
397 && self.sys_root == other.sys_root
398 && self.arch == other.arch
399 && self.meta == other.meta
400 && *self.numa_node.lock().unwrap() == *other.numa_node.lock().unwrap()
401 }
402}
403
404impl Clone for DeviceInfo {
405 fn clone(&self) -> Self {
406 Self {
407 device_index: self.device_index,
408 dev_root: self.dev_root.clone(),
409 sys_root: self.sys_root.clone(),
410 arch: self.arch,
411 meta: self.meta.clone(),
412 numa_node: Mutex::new(*self.numa_node.lock().unwrap()),
413 }
414 }
415}
416
417#[derive(Debug, Clone, Eq, PartialEq)]
419pub enum CoreStatus {
420 Available,
421 Occupied(String),
422 Unavailable,
423}
424
425impl Display for CoreStatus {
426 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
427 match self {
428 CoreStatus::Available => write!(f, "available"),
429 CoreStatus::Occupied(devfile) => write!(f, "occupied by {devfile}"),
430 CoreStatus::Unavailable => write!(f, "unavailable"),
431 }
432 }
433}
434
435pub(crate) type CoreIdx = u8;
436
437#[derive(Debug, Eq, PartialEq, Clone, Copy, Hash)]
438pub enum CoreRange {
439 All, Range((u8, u8)),
441}
442
443impl CoreRange {
444 pub fn contains(&self, idx: &CoreIdx) -> bool {
445 match self {
446 CoreRange::All => true,
447 CoreRange::Range((s, e)) => (*s..=*e).contains(idx),
448 }
449 }
450}
451
452impl Ord for CoreRange {
453 fn cmp(&self, other: &Self) -> Ordering {
454 match self {
455 CoreRange::All => {
456 if self == other {
457 std::cmp::Ordering::Equal
458 } else {
459 std::cmp::Ordering::Less
460 }
461 }
462 CoreRange::Range(r) => match other {
463 CoreRange::All => std::cmp::Ordering::Greater,
464 CoreRange::Range(other) => (r.1 - r.0).cmp(&(other.1 - other.0)).then(r.cmp(other)),
465 },
466 }
467 }
468}
469
470impl PartialOrd for CoreRange {
471 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
472 Some(self.cmp(other))
473 }
474}
475
476impl From<u8> for CoreRange {
477 fn from(id: u8) -> Self {
478 Self::Range((id, id))
479 }
480}
481
482impl TryFrom<(u8, u8)> for CoreRange {
483 type Error = ();
484 fn try_from(v: (u8, u8)) -> Result<Self, Self::Error> {
485 if v.0 < v.1 {
486 Ok(Self::Range(v))
487 } else {
488 Err(())
489 }
490 }
491}
492
493#[derive(Debug, Eq, PartialEq, Clone, Hash)]
495pub struct DeviceFile {
496 pub(crate) device_index: u8,
497 pub(crate) core_range: CoreRange,
498 pub(crate) path: PathBuf,
499 pub(crate) mode: DeviceMode,
500}
501
502impl Display for DeviceFile {
503 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
504 write!(f, "{}", self.path.file_name().unwrap().to_str().unwrap())
505 }
506}
507
508impl DeviceFile {
509 pub fn path(&self) -> &PathBuf {
511 &self.path
512 }
513
514 pub fn filename(&self) -> &str {
516 self.path
518 .file_name()
519 .expect("not a file")
520 .to_str()
521 .expect("invalid UTF-8 encoding")
522 }
523
524 pub fn device_index(&self) -> u8 {
526 self.device_index
527 }
528
529 pub fn core_range(&self) -> CoreRange {
531 self.core_range
532 }
533
534 pub fn mode(&self) -> DeviceMode {
536 self.mode
537 }
538}
539
540impl TryFrom<&PathBuf> for DeviceFile {
541 type Error = DeviceError;
542
543 fn try_from(path: &PathBuf) -> Result<Self, Self::Error> {
544 let file_name = path
545 .file_name()
546 .expect("not a file")
547 .to_string_lossy()
548 .to_string();
549
550 let (device_index, core_indices) = devfs::parse_indices(file_name)?;
551
552 let (mode, core_range) = match core_indices.len() {
553 0 => (DeviceMode::MultiCore, CoreRange::All),
554 1 => (DeviceMode::Single, CoreRange::from(core_indices[0])),
555 n => (
556 DeviceMode::Fusion,
557 CoreRange::try_from((core_indices[0], core_indices[n - 1]))
558 .map_err(|_| DeviceError::unrecognized_file(path.to_string_lossy()))?,
559 ),
560 };
561
562 Ok(DeviceFile {
563 device_index,
564 core_range,
565 path: path.clone(),
566 mode,
567 })
568 }
569}
570
571#[derive(Debug, Eq, PartialEq, Copy, Clone, Hash, enum_utils::FromStr)]
573#[enumeration(case_insensitive)]
574pub enum DeviceMode {
575 Single,
576 Fusion,
577 MultiCore,
578}
579
580lazy_static! {
581 static ref CLOCK_FREQUENCY_FMT: Regex =
583 Regex::new(r"(?P<name>(\w| )+)\((?P<unit>.*)\): (?P<value>\d+)").unwrap();
584}
585
586#[derive(Clone)]
587pub struct ClockFrequency {
588 pub(crate) name: String,
589 pub(crate) unit: String,
590 pub(crate) value: u32,
591}
592
593impl TryFrom<&str> for ClockFrequency {
594 type Error = ();
595
596 fn try_from(line: &str) -> Result<Self, Self::Error> {
597 let items = CLOCK_FREQUENCY_FMT.captures(line).ok_or(())?;
598 let name = items.name("name").ok_or(())?.as_str().trim();
599 let unit = items.name("unit").ok_or(())?.as_str().trim();
600 let value = items.name("value").ok_or(())?.as_str().trim();
601
602 Ok(Self {
603 name: name.to_string(),
604 unit: unit.to_string(),
605 value: value.parse().map_err(|_| ())?,
606 })
607 }
608}
609
610impl ClockFrequency {
611 pub fn name(&self) -> &str {
612 self.name.as_str()
613 }
614
615 pub fn unit(&self) -> &str {
616 self.unit.as_str()
617 }
618
619 pub fn value(&self) -> u32 {
620 self.value
621 }
622}
623
624#[cfg(test)]
625mod tests {
626 use super::*;
627
628 #[test]
629 fn test_core_range_ordering() {
630 let all = CoreRange::All;
631 let core0 = CoreRange::Range((0, 0));
632 let core1 = CoreRange::Range((1, 1));
633 let core0_1 = CoreRange::Range((0, 1));
634 let core0_3 = CoreRange::Range((0, 3));
635 let core2_3 = CoreRange::Range((2, 3));
636
637 assert!(all < core0);
638 assert!(core0 < core1);
639 assert!(core1 < core0_1);
640 assert!(core0_1 < core2_3);
641 assert!(core2_3 < core0_3);
642 }
643
644 #[test]
645 fn test_try_from() -> Result<(), DeviceError> {
646 assert_eq!(
647 DeviceFile::try_from(&PathBuf::from("./npu0"))?,
648 DeviceFile {
649 device_index: 0,
650 path: PathBuf::from("./npu0"),
651 core_range: CoreRange::All,
652 mode: DeviceMode::MultiCore,
653 }
654 );
655 assert!(DeviceFile::try_from(&PathBuf::from("./npu0pe")).is_err());
656 assert_eq!(
657 DeviceFile::try_from(&PathBuf::from("./npu0pe0"))?,
658 DeviceFile {
659 device_index: 0,
660 path: PathBuf::from("./npu0pe0"),
661 core_range: CoreRange::Range((0, 0)),
662 mode: DeviceMode::Single,
663 }
664 );
665 assert_eq!(
666 DeviceFile::try_from(&PathBuf::from("./npu0pe1"))?,
667 DeviceFile {
668 device_index: 0,
669 path: PathBuf::from("./npu0pe1"),
670 core_range: CoreRange::Range((1, 1)),
671 mode: DeviceMode::Single,
672 }
673 );
674 assert_eq!(
675 DeviceFile::try_from(&PathBuf::from("./npu0pe0-1"))?,
676 DeviceFile {
677 device_index: 0,
678 path: PathBuf::from("./npu0pe0-1"),
679 core_range: CoreRange::Range((0, 1)),
680 mode: DeviceMode::Fusion,
681 }
682 );
683 assert_eq!(
684 DeviceFile::try_from(&PathBuf::from("./npu0pe0-2"))?,
685 DeviceFile {
686 device_index: 0,
687 path: PathBuf::from("./npu0pe0-2"),
688 core_range: CoreRange::Range((0, 2)),
689 mode: DeviceMode::Fusion,
690 }
691 );
692 assert!(DeviceFile::try_from(&PathBuf::from("./npu0pe0-")).is_err());
693 assert!(DeviceFile::try_from(&PathBuf::from("./npu0pe-1")).is_err());
694 Ok(())
695 }
696
697 #[test]
698 fn test_core_status_fmt() {
699 assert_eq!(format!("{}", CoreStatus::Available), "available");
700 assert_eq!(format!("{}", CoreStatus::Unavailable), "unavailable");
701 assert_eq!(
702 format!("{}", CoreStatus::Occupied(String::from("npu0pe0"))),
703 "occupied by npu0pe0"
704 );
705 }
706
707 #[test]
708 fn test_device_mode_from_str() {
709 assert_eq!("single".parse(), Ok(DeviceMode::Single));
710 assert_eq!("SiNgLe".parse(), Ok(DeviceMode::Single));
711 assert_eq!("fusion".parse(), Ok(DeviceMode::Fusion));
712 assert_eq!("fUsIoN".parse(), Ok(DeviceMode::Fusion));
713 assert_eq!("multicore".parse(), Ok(DeviceMode::MultiCore));
714 assert_eq!("MultiCore".parse(), Ok(DeviceMode::MultiCore));
715 assert_eq!("invalid".parse::<DeviceMode>(), Err(()));
716 }
717
718 #[test]
719 fn test_static_read_sysfs() -> DeviceResult<()> {
720 let device_info = DeviceInfo::new(
721 0,
722 PathBuf::from("../test_data/test-0/dev"),
723 PathBuf::from("../test_data/test-0/sys"),
724 );
725
726 assert_eq!(
727 device_info.meta.get(StaticMgmtFile::Busname.filename()),
728 Some(&String::from("0000:6d:00.0"))
729 );
730 assert_eq!(
731 device_info.get(&StaticMgmtFile::Busname).ok(),
732 Some(String::from("0000:6d:00.0"))
733 );
734 assert_eq!(
735 device_info.meta.get(StaticMgmtFile::Busname.filename()),
736 Some(&String::from("0000:6d:00.0"))
737 );
738
739 Ok(())
740 }
741
742 #[test]
743 fn test_dynamic_read_sysfs() -> DeviceResult<()> {
744 let device_info = DeviceInfo::new(
745 0,
746 PathBuf::from("../test_data/test-0/dev"),
747 PathBuf::from("../test_data/test-0/sys"),
748 );
749
750 assert_eq!(
751 device_info.meta.get(DynamicMgmtFile::FwVersion.filename()),
752 None
753 );
754 assert_eq!(
755 device_info.get(&DynamicMgmtFile::FwVersion).ok(),
756 Some(String::from("1.6.0, c1bebfd"))
757 );
758 assert_eq!(
759 device_info.meta.get(DynamicMgmtFile::FwVersion.filename()),
760 None
761 );
762
763 Ok(())
764 }
765
766 #[test]
767 fn test_numa_node() -> DeviceResult<()> {
768 let device_info = DeviceInfo::new(
770 0,
771 PathBuf::from("../test_data/test-0/dev"),
772 PathBuf::from("../test_data/test-0/sys"),
773 );
774
775 assert_eq!(*device_info.numa_node.lock().unwrap(), None);
776 assert_eq!(device_info.get_numa_node()?, NumaNode::Id(0));
777 assert_eq!(
778 *device_info.numa_node.lock().unwrap(),
779 Some(NumaNode::Id(0))
780 );
781
782 let device_info = DeviceInfo::new(
784 1,
785 PathBuf::from("../test_data/test-0/dev"),
786 PathBuf::from("../test_data/test-0/sys"),
787 );
788
789 assert_eq!(*device_info.numa_node.lock().unwrap(), None);
790 assert_eq!(device_info.get_numa_node()?, NumaNode::UnSupported);
791 assert_eq!(
792 *device_info.numa_node.lock().unwrap(),
793 Some(NumaNode::UnSupported)
794 );
795
796 Ok(())
797 }
798
799 #[test]
800 fn test_clock_frequency() {
801 let line = "ne tensor (MHz): 2000";
802 let res = ClockFrequency::try_from(line);
803 assert!(res.is_ok());
804
805 let res = res.unwrap();
806 assert_eq!(res.name(), "ne tensor");
807 assert_eq!(res.unit(), "MHz");
808 assert_eq!(res.value(), 2000);
809 }
810}