1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
//! Backend feature detection system for runtime capability discovery
use crate::device::{CpuDevice, Device, DeviceCapabilities, DeviceType, SimdFeatures};
use crate::error::Result;
use std::collections::HashMap;
/// Backend feature detection system for runtime capability discovery
#[derive(Debug, Clone)]
pub struct BackendFeatureDetector {
/// Available devices discovered at runtime
pub available_devices: Vec<DeviceInfo>,
/// Runtime feature flags
pub runtime_features: RuntimeFeatures,
/// Backend capabilities summary
pub backend_summary: BackendSummary,
}
/// Information about a discovered device
#[derive(Debug, Clone)]
pub struct DeviceInfo {
/// Device type and ID
pub device_type: DeviceType,
/// Device capabilities
pub capabilities: DeviceCapabilities,
/// Whether the device is currently available for use
pub is_available: bool,
/// Device priority for automatic selection (higher is better)
pub priority: u32,
/// Additional device-specific metadata
pub metadata: HashMap<String, String>,
}
/// Runtime feature detection results
#[derive(Debug, Clone, Default)]
pub struct RuntimeFeatures {
/// CPU features
pub cpu_features: CpuFeatures,
/// GPU features (if available)
pub gpu_features: GpuFeatures,
/// System features
pub system_features: SystemFeatures,
/// Compiler and build features
pub build_features: BuildFeatures,
}
/// CPU-specific runtime features
#[derive(Debug, Clone, Default)]
pub struct CpuFeatures {
/// Detected SIMD capabilities
pub simd: SimdFeatures,
/// Number of physical CPU cores
pub physical_cores: usize,
/// Number of logical CPU cores (including hyperthreading)
pub logical_cores: usize,
/// CPU architecture string
pub architecture: String,
/// CPU vendor (Intel, AMD, ARM, etc.)
pub vendor: Option<String>,
/// CPU model name
pub model_name: Option<String>,
/// CPU base frequency in Hz
pub base_frequency: Option<u64>,
/// Cache sizes (L1, L2, L3)
pub cache_sizes: CacheSizes,
}
/// Cache size information
#[derive(Debug, Clone, Default)]
pub struct CacheSizes {
/// L1 data cache size in bytes
pub l1_data: Option<usize>,
/// L1 instruction cache size in bytes
pub l1_instruction: Option<usize>,
/// L2 cache size in bytes
pub l2: Option<usize>,
/// L3 cache size in bytes
pub l3: Option<usize>,
}
/// GPU-specific runtime features
#[derive(Debug, Clone, Default)]
pub struct GpuFeatures {
/// CUDA support and version
pub cuda_version: Option<String>,
/// CUDA compute capability
pub cuda_compute_capability: Option<(u32, u32)>,
/// OpenCL support
pub opencl_version: Option<String>,
/// Vulkan support
pub vulkan_version: Option<String>,
/// Metal support (Apple)
pub metal_version: Option<String>,
/// WebGPU support
pub webgpu_available: bool,
/// Number of GPU devices detected
pub gpu_count: usize,
}
/// System-level features
#[derive(Debug, Clone, Default)]
pub struct SystemFeatures {
/// Operating system information
pub os_info: OsInfo,
/// Total system memory in bytes
pub total_memory: usize,
/// Page size in bytes
pub page_size: usize,
/// NUMA topology available
pub numa_available: bool,
/// Number of NUMA nodes
pub numa_nodes: usize,
/// Memory bandwidth estimate in bytes/sec
pub memory_bandwidth: Option<u64>,
}
/// Operating system information
#[derive(Debug, Clone, Default)]
pub struct OsInfo {
/// OS name (Linux, Windows, macOS, etc.)
pub name: String,
/// OS version
pub version: Option<String>,
/// OS architecture
pub arch: String,
/// Kernel version (for Linux/Unix systems)
pub kernel_version: Option<String>,
}
/// Build and compiler features
#[derive(Debug, Clone, Default)]
pub struct BuildFeatures {
/// Target triple for this build
pub target_triple: String,
/// Optimization level
pub opt_level: Option<String>,
/// Debug info available
pub debug_info: bool,
/// Feature flags enabled at compile time
pub compile_features: Vec<String>,
/// Cargo features enabled
pub cargo_features: Vec<String>,
}
/// Summary of available backend capabilities
#[derive(Debug, Clone, Default)]
pub struct BackendSummary {
/// Best available device for each type
pub best_devices: HashMap<DeviceType, DeviceInfo>,
/// Recommended device for general use
pub recommended_device: Option<DeviceInfo>,
/// Overall system performance tier (Low, Medium, High, Extreme)
pub performance_tier: PerformanceTier,
/// Features that may impact performance
pub performance_notes: Vec<String>,
/// Missing features or recommendations
pub recommendations: Vec<String>,
}
/// System performance classification
#[derive(Debug, Clone, PartialEq, Eq, Default)]
pub enum PerformanceTier {
/// Basic performance for simple tasks
Low,
/// Good performance for most workloads
#[default]
Medium,
/// High performance for demanding tasks
High,
/// Extreme performance for HPC workloads
Extreme,
}
/// Workload type for device selection
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum WorkloadType {
/// General purpose computing
GeneralCompute,
/// High precision mathematical operations
HighPrecisionMath,
/// Large matrix operations
LargeMatrices,
/// Highly parallel workloads
ParallelWorkloads,
}
impl BackendFeatureDetector {
/// Create a new feature detector and perform comprehensive detection
pub fn new() -> Result<Self> {
let mut detector = Self {
available_devices: Vec::new(),
runtime_features: RuntimeFeatures::default(),
backend_summary: BackendSummary::default(),
};
detector.detect_all_features()?;
detector.discover_devices()?;
detector.analyze_capabilities()?;
Ok(detector)
}
/// Detect all runtime features
fn detect_all_features(&mut self) -> Result<()> {
self.runtime_features.cpu_features = self.detect_cpu_features()?;
self.runtime_features.gpu_features = self.detect_gpu_features()?;
self.runtime_features.system_features = self.detect_system_features()?;
self.runtime_features.build_features = self.detect_build_features()?;
Ok(())
}
/// Detect CPU features comprehensively
fn detect_cpu_features(&self) -> Result<CpuFeatures> {
let cpu_device = CpuDevice::new();
let cpu_capabilities = cpu_device.capabilities()?;
let features = CpuFeatures {
simd: cpu_capabilities.simd_features().clone(),
physical_cores: num_cpus::get_physical(),
logical_cores: num_cpus::get(),
architecture: std::env::consts::ARCH.to_string(),
vendor: self.detect_cpu_vendor(),
model_name: None, // Custom properties access not available
base_frequency: None, // Clock frequency access not available
cache_sizes: self.detect_cache_sizes()?,
};
Ok(features)
}
/// Detect CPU vendor with detailed identification
fn detect_cpu_vendor(&self) -> Option<String> {
#[cfg(target_arch = "x86_64")]
{
self.detect_x86_cpu_vendor()
}
#[cfg(target_arch = "aarch64")]
{
self.detect_arm_cpu_vendor()
}
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
{
None
}
}
/// Detect x86/x86_64 CPU vendor using CPUID
#[cfg(target_arch = "x86_64")]
fn detect_x86_cpu_vendor(&self) -> Option<String> {
use std::arch::x86_64::__cpuid;
let cpuid_result = __cpuid(0);
let vendor_string = format!(
"{}{}{}",
std::str::from_utf8(&cpuid_result.ebx.to_le_bytes()).unwrap_or(""),
std::str::from_utf8(&cpuid_result.edx.to_le_bytes()).unwrap_or(""),
std::str::from_utf8(&cpuid_result.ecx.to_le_bytes()).unwrap_or("")
);
match vendor_string.as_str() {
"GenuineIntel" => Some("Intel".to_string()),
"AuthenticAMD" => Some("AMD".to_string()),
"VIA VIA VIA " => Some("VIA".to_string()),
"CyrixInstead" => Some("Cyrix".to_string()),
"CentaurHauls" => Some("Centaur".to_string()),
"NexGenDriven" => Some("NexGen".to_string()),
"HygonGenuine" => Some("Hygon".to_string()),
_ => Some(format!(
"Unknown ({})",
vendor_string.trim_end_matches('\0')
)),
}
}
/// Detect ARM CPU vendor from /proc/cpuinfo
#[cfg(target_arch = "aarch64")]
fn detect_arm_cpu_vendor(&self) -> Option<String> {
#[cfg(target_os = "linux")]
{
if let Ok(cpuinfo) = std::fs::read_to_string("/proc/cpuinfo") {
for line in cpuinfo.lines() {
if line.starts_with("CPU implementer") {
if let Some(implementer) = line.split(':').nth(1) {
let implementer = implementer.trim();
return match implementer {
"0x41" => Some("ARM".to_string()),
"0x42" => Some("Broadcom".to_string()),
"0x43" => Some("Cavium".to_string()),
"0x44" => Some("DEC".to_string()),
"0x46" => Some("Fujitsu".to_string()),
"0x48" => Some("HiSilicon".to_string()),
"0x49" => Some("Infineon".to_string()),
"0x4d" => Some("Motorola".to_string()),
"0x4e" => Some("NVIDIA".to_string()),
"0x50" => Some("Applied Micro".to_string()),
"0x51" => Some("Qualcomm".to_string()),
"0x56" => Some("Marvell".to_string()),
"0x61" => Some("Apple".to_string()),
_ => Some(format!("Unknown ARM implementer ({})", implementer)),
};
}
}
if line.starts_with("Hardware") && line.contains("BCM") {
return Some("Broadcom".to_string());
}
if line.starts_with("Hardware") && line.contains("Apple") {
return Some("Apple".to_string());
}
}
}
}
Some("ARM".to_string())
}
/// Detect cache sizes
fn detect_cache_sizes(&self) -> Result<CacheSizes> {
#[allow(unused_mut)] // mut needed for conditional compilation features
let mut cache_sizes = CacheSizes::default();
#[cfg(target_os = "linux")]
{
// Try to read cache info from sysfs
if let Ok(l1d) =
std::fs::read_to_string("/sys/devices/system/cpu/cpu0/cache/index0/size")
{
cache_sizes.l1_data = self.parse_cache_size(&l1d);
}
if let Ok(l1i) =
std::fs::read_to_string("/sys/devices/system/cpu/cpu0/cache/index1/size")
{
cache_sizes.l1_instruction = self.parse_cache_size(&l1i);
}
if let Ok(l2) =
std::fs::read_to_string("/sys/devices/system/cpu/cpu0/cache/index2/size")
{
cache_sizes.l2 = self.parse_cache_size(&l2);
}
if let Ok(l3) =
std::fs::read_to_string("/sys/devices/system/cpu/cpu0/cache/index3/size")
{
cache_sizes.l3 = self.parse_cache_size(&l3);
}
}
Ok(cache_sizes)
}
/// Parse cache size string (e.g., "32K" -> 32768)
#[allow(dead_code)] // Used in platform-specific cache detection
fn parse_cache_size(&self, size_str: &str) -> Option<usize> {
let size_str = size_str.trim();
if size_str.is_empty() {
return None;
}
let (number_part, suffix) = if let Some(stripped) = size_str.strip_suffix('K') {
(stripped, 1024)
} else if let Some(stripped) = size_str.strip_suffix('M') {
(stripped, 1024 * 1024)
} else if let Some(stripped) = size_str.strip_suffix('G') {
(stripped, 1024 * 1024 * 1024)
} else {
(size_str, 1)
};
number_part.parse::<usize>().ok().map(|n| n * suffix)
}
/// Detect GPU features
/// Detect GPU features using scirs2-core GPU backend detection
///
/// # SciRS2 POLICY COMPLIANCE
/// This method uses scirs2-core::gpu for GPU detection when the "gpu" feature is enabled.
/// Supports: CUDA, Metal, WebGPU, ROCm, OpenCL
fn detect_gpu_features(&self) -> Result<GpuFeatures> {
#[allow(unused_mut)]
let mut features = GpuFeatures::default();
// GPU detection using scirs2-core when available
#[cfg(feature = "gpu")]
{
// Detect CUDA support
#[cfg(feature = "cuda")]
{
features.cuda_version = self.detect_cuda_version();
features.cuda_compute_capability = self.detect_cuda_compute_capability();
}
// Detect Metal support (Apple platforms)
#[cfg(target_os = "macos")]
{
features.metal_version = self.detect_metal_version();
}
// Detect WebGPU support
#[cfg(feature = "wgpu")]
{
features.webgpu_available = self.detect_webgpu_support();
}
// Detect OpenCL support
features.opencl_version = self.detect_opencl_version();
// Detect Vulkan support
features.vulkan_version = self.detect_vulkan_version();
// Count available GPU devices
features.gpu_count = self.count_gpu_devices();
}
Ok(features)
}
/// Detect CUDA version if available
///
/// # SciRS2 Integration (Phase 2: GPU Kernel Integration)
/// This function will integrate with scirs2-core::gpu when available:
/// ```rust,ignore
/// #[cfg(scirs2_gpu_available)]
/// use scirs2_core::gpu::cuda::CudaContext;
/// let version = CudaContext::driver_version();
/// ```
///
/// # Current Status
/// Placeholder ready for scirs2-core GPU integration.
/// Detection logic will be activated when scirs2-core is built with CUDA support.
#[allow(dead_code)]
#[cfg(all(feature = "gpu", feature = "cuda"))]
fn detect_cuda_version(&self) -> Option<String> {
// Ready for scirs2-core::gpu::cuda integration
#[cfg(scirs2_gpu_available)]
{
// Will use: scirs2_core::gpu::cuda::CudaContext::driver_version()
None // Placeholder until scirs2-core GPU available
}
#[cfg(not(scirs2_gpu_available))]
{
None
}
}
#[allow(dead_code)]
#[cfg(not(all(feature = "gpu", feature = "cuda")))]
fn detect_cuda_version(&self) -> Option<String> {
None
}
/// Detect CUDA compute capability
///
/// # SciRS2 Integration (Phase 2: GPU Kernel Integration)
/// Returns compute capability as (major, minor) version tuple.
/// Integration path:
/// ```rust,ignore
/// #[cfg(scirs2_gpu_available)]
/// use scirs2_core::gpu::cuda::CudaDevice;
/// let device = CudaDevice::new(0)?;
/// let (major, minor) = device.compute_capability();
/// ```
///
/// # Performance Impact
/// Compute capability determines available GPU features and performance:
/// - 7.0+: Tensor Cores (mixed precision training)
/// - 8.0+: 3rd gen Tensor Cores, improved FP64
/// - 9.0+: 4th gen Tensor Cores, FP8 support
#[allow(dead_code)]
#[cfg(all(feature = "gpu", feature = "cuda"))]
fn detect_cuda_compute_capability(&self) -> Option<(u32, u32)> {
// Ready for scirs2-core::gpu::cuda integration
#[cfg(scirs2_gpu_available)]
{
// Will use: scirs2_core::gpu::cuda::CudaDevice::compute_capability()
None // Placeholder until scirs2-core GPU available
}
#[cfg(not(scirs2_gpu_available))]
{
None
}
}
#[allow(dead_code)]
#[cfg(not(all(feature = "gpu", feature = "cuda")))]
fn detect_cuda_compute_capability(&self) -> Option<(u32, u32)> {
None
}
/// Detect Metal version (Apple platforms)
///
/// # SciRS2 Integration (Phase 2: GPU Kernel Integration)
/// Integration path for Metal GPU backend:
/// ```rust,ignore
/// #[cfg(scirs2_gpu_available)]
/// use scirs2_core::gpu::metal::MetalDevice;
/// let device = MetalDevice::default()?;
/// let version = device.feature_set();
/// ```
///
/// # Platform Support
/// - macOS 10.13+: Metal 2.0
/// - macOS 10.15+: Metal 2.2 (enhanced ray tracing)
/// - macOS 11.0+: Metal 2.3 (Apple Silicon optimizations)
/// - macOS 12.0+: Metal 3.0 (mesh shaders, async compute)
#[allow(dead_code)]
#[cfg(target_os = "macos")]
fn detect_metal_version(&self) -> Option<String> {
// Ready for scirs2-core::gpu::metal integration
#[cfg(scirs2_gpu_available)]
{
// Will use: scirs2_core::gpu::metal::MetalDevice::feature_set()
Some("Metal 3".to_string()) // Placeholder - will query actual version
}
#[cfg(not(scirs2_gpu_available))]
{
// Metal is available on all modern macOS systems
Some("Metal 3".to_string())
}
}
#[allow(dead_code)]
#[cfg(not(target_os = "macos"))]
fn detect_metal_version(&self) -> Option<String> {
None
}
/// Detect WebGPU support
///
/// # SciRS2 Integration (Phase 2: GPU Kernel Integration)
/// WebGPU provides cross-platform GPU access via wgpu-rs.
/// Integration path:
/// ```rust,ignore
/// #[cfg(scirs2_gpu_available)]
/// use scirs2_core::gpu::webgpu::WebGpuBackend;
/// let available = WebGpuBackend::is_supported();
/// ```
///
/// # Platform Coverage
/// WebGPU enables GPU acceleration on:
/// - Windows: D3D12 backend
/// - Linux: Vulkan backend
/// - macOS: Metal backend
/// - Web: WebGPU API (WASM)
///
/// # Performance Note
/// Expected 10-100x speedup for large tensors (>50K elements) when GPU available.
#[allow(dead_code)]
#[cfg(feature = "wgpu")]
fn detect_webgpu_support(&self) -> bool {
// Ready for scirs2-core::gpu::webgpu integration
#[cfg(scirs2_gpu_available)]
{
// Will use: scirs2_core::gpu::webgpu::WebGpuBackend::is_supported()
true // Placeholder until scirs2-core GPU available
}
#[cfg(not(scirs2_gpu_available))]
{
true
}
}
#[allow(dead_code)]
#[cfg(not(feature = "wgpu"))]
fn detect_webgpu_support(&self) -> bool {
false
}
/// Detect OpenCL version
#[allow(dead_code)]
fn detect_opencl_version(&self) -> Option<String> {
// TODO: Integrate with scirs2-core::gpu::opencl when available
None
}
/// Detect Vulkan version
#[allow(dead_code)]
fn detect_vulkan_version(&self) -> Option<String> {
// TODO: Integrate with scirs2-core::gpu::vulkan when available
None
}
/// Count available GPU devices
///
/// # SciRS2 Integration (Phase 2: GPU Kernel Integration)
/// Enumerates all available GPU devices across backends.
/// Integration path:
/// ```rust,ignore
/// #[cfg(scirs2_gpu_available)]
/// use scirs2_core::gpu::GpuDeviceEnumerator;
/// let count = GpuDeviceEnumerator::new()?.count_all_devices();
/// ```
///
/// # Multi-Backend Support
/// Counts devices from all enabled backends:
/// - CUDA devices (NVIDIA GPUs)
/// - Metal devices (Apple GPUs)
/// - WebGPU devices (cross-platform)
/// - ROCm devices (AMD GPUs)
/// - OpenCL devices (generic GPU support)
///
/// # Use Case
/// Essential for multi-GPU training and device selection strategies.
#[allow(dead_code)]
#[cfg(feature = "gpu")]
fn count_gpu_devices(&self) -> usize {
// Ready for scirs2-core::gpu device enumeration
#[cfg(scirs2_gpu_available)]
{
// Will use: scirs2_core::gpu::GpuDeviceEnumerator::count_all_devices()
0 // Placeholder until scirs2-core GPU available
}
#[cfg(not(scirs2_gpu_available))]
{
0
}
}
#[allow(dead_code)]
#[cfg(not(feature = "gpu"))]
fn count_gpu_devices(&self) -> usize {
0
}
/// Detect system features
fn detect_system_features(&self) -> Result<SystemFeatures> {
let cpu_device = CpuDevice::new();
let memory_info = cpu_device.memory_info()?;
let _cpu_capabilities = cpu_device.capabilities()?;
let features = SystemFeatures {
os_info: self.detect_os_info(),
total_memory: memory_info.total as usize,
page_size: self.detect_page_size(),
numa_available: self.detect_numa_support(),
numa_nodes: self.detect_numa_nodes(),
memory_bandwidth: None, // Memory bandwidth not accessible
};
Ok(features)
}
/// Detect operating system information
fn detect_os_info(&self) -> OsInfo {
OsInfo {
name: std::env::consts::OS.to_string(),
version: self.get_os_version(),
arch: std::env::consts::ARCH.to_string(),
kernel_version: self.get_kernel_version(),
}
}
/// Get OS version
fn get_os_version(&self) -> Option<String> {
#[cfg(target_os = "linux")]
{
std::fs::read_to_string("/proc/version")
.ok()
.and_then(|v| v.split_whitespace().nth(2).map(|s| s.to_string()))
}
#[cfg(not(target_os = "linux"))]
{
None
}
}
/// Get kernel version
fn get_kernel_version(&self) -> Option<String> {
#[cfg(any(target_os = "linux", target_os = "macos"))]
{
use std::process::Command;
Command::new("uname")
.arg("-r")
.output()
.ok()
.and_then(|output| String::from_utf8(output.stdout).ok())
.map(|v| v.trim().to_string())
}
#[cfg(not(any(target_os = "linux", target_os = "macos")))]
{
None
}
}
/// Detect system page size
fn detect_page_size(&self) -> usize {
4096 // Default page size
}
/// Detect NUMA support
fn detect_numa_support(&self) -> bool {
#[cfg(target_os = "linux")]
{
std::path::Path::new("/sys/devices/system/node").exists()
}
#[cfg(not(target_os = "linux"))]
{
false
}
}
/// Detect number of NUMA nodes
fn detect_numa_nodes(&self) -> usize {
#[cfg(target_os = "linux")]
{
if let Ok(entries) = std::fs::read_dir("/sys/devices/system/node") {
entries
.filter_map(|entry| {
entry.ok().and_then(|e| {
let name = e.file_name();
let name_str = name.to_string_lossy();
if name_str.starts_with("node")
&& name_str[4..].chars().all(|c| c.is_ascii_digit())
{
Some(())
} else {
None
}
})
})
.count()
} else {
1
}
}
#[cfg(not(target_os = "linux"))]
{
1
}
}
/// Detect build features
fn detect_build_features(&self) -> Result<BuildFeatures> {
let features = BuildFeatures {
target_triple: format!("{}-{}", std::env::consts::ARCH, std::env::consts::OS),
opt_level: std::env::var("OPT_LEVEL").ok(),
debug_info: cfg!(debug_assertions),
compile_features: self.get_compile_features(),
cargo_features: self.get_cargo_features(),
};
Ok(features)
}
/// Get compile-time features
fn get_compile_features(&self) -> Vec<String> {
let mut features = Vec::new();
if cfg!(feature = "std") {
features.push("std".to_string());
}
if cfg!(feature = "no_std") {
features.push("no_std".to_string());
}
if cfg!(feature = "serialize") {
features.push("serialize".to_string());
}
if cfg!(feature = "half") {
features.push("half".to_string());
}
if cfg!(feature = "avx512") {
features.push("avx512".to_string());
}
if cfg!(feature = "simd") {
features.push("simd".to_string());
}
if cfg!(feature = "parallel") {
features.push("parallel".to_string());
}
if cfg!(feature = "fast-math") {
features.push("fast-math".to_string());
}
features
}
/// Get cargo features
fn get_cargo_features(&self) -> Vec<String> {
vec!["default".to_string()]
}
/// Discover available devices
fn discover_devices(&mut self) -> Result<()> {
// Always add CPU device
let cpu_device = CpuDevice::new();
let cpu_capabilities = cpu_device.capabilities()?;
self.available_devices.push(DeviceInfo {
device_type: DeviceType::Cpu,
capabilities: cpu_capabilities,
is_available: cpu_device.is_available().unwrap_or(false),
priority: 10, // Base priority for CPU
metadata: HashMap::new(),
});
Ok(())
}
/// Analyze capabilities and generate summary
fn analyze_capabilities(&mut self) -> Result<()> {
// Find best device for each type
for device in &self.available_devices {
self.backend_summary
.best_devices
.insert(device.device_type, device.clone());
}
// Find recommended device (highest priority available device)
self.backend_summary.recommended_device = self
.available_devices
.iter()
.filter(|d| d.is_available)
.max_by_key(|d| d.priority)
.cloned();
// Determine performance tier
self.backend_summary.performance_tier = self.classify_performance_tier();
// Generate performance notes and recommendations
self.generate_performance_analysis();
Ok(())
}
/// Classify overall system performance tier
fn classify_performance_tier(&self) -> PerformanceTier {
let cpu_features = &self.runtime_features.cpu_features;
let system_features = &self.runtime_features.system_features;
let memory_gb = system_features.total_memory / (1024 * 1024 * 1024);
let core_count = cpu_features.logical_cores;
let has_advanced_simd =
cpu_features.simd.avx2 || cpu_features.simd.avx512f || cpu_features.simd.neon;
if memory_gb >= 32 && core_count >= 16 && cpu_features.simd.avx512f {
PerformanceTier::Extreme
} else if memory_gb >= 16 && core_count >= 8 && has_advanced_simd {
PerformanceTier::High
} else if memory_gb >= 8 && core_count >= 4 {
PerformanceTier::Medium
} else {
PerformanceTier::Low
}
}
/// Generate performance analysis and recommendations
fn generate_performance_analysis(&mut self) {
let cpu_features = &self.runtime_features.cpu_features;
let system_features = &self.runtime_features.system_features;
// Performance notes
if cpu_features.simd.avx512f {
self.backend_summary
.performance_notes
.push("AVX-512 support detected - excellent SIMD performance".to_string());
} else if cpu_features.simd.avx2 {
self.backend_summary
.performance_notes
.push("AVX2 support detected - good SIMD performance".to_string());
} else if cpu_features.simd.neon {
self.backend_summary
.performance_notes
.push("NEON support detected - good ARM SIMD performance".to_string());
}
if system_features.numa_available {
self.backend_summary.performance_notes.push(format!(
"NUMA topology available with {} nodes",
system_features.numa_nodes
));
}
// Recommendations
if cpu_features.logical_cores < 4 {
self.backend_summary.recommendations.push(
"Consider upgrading to a CPU with more cores for better parallel performance"
.to_string(),
);
}
if system_features.total_memory < 8 * 1024 * 1024 * 1024 {
self.backend_summary
.recommendations
.push("Consider adding more RAM (minimum 8GB recommended)".to_string());
}
}
/// Get the best available device for a specific workload type
pub fn best_device_for_workload(&self, workload: WorkloadType) -> Option<&DeviceInfo> {
match workload {
WorkloadType::GeneralCompute => self.backend_summary.recommended_device.as_ref(),
WorkloadType::HighPrecisionMath => {
// Prefer devices with double precision support
self.available_devices
.iter()
.filter(|d| d.capabilities.supports_double_precision())
.max_by_key(|d| d.priority)
}
WorkloadType::LargeMatrices => {
// Prefer devices with lots of memory and good SIMD
self.available_devices.iter().max_by_key(|d| {
(
d.capabilities.total_memory(),
if d.capabilities.simd_features().avx512f {
8
} else if d.capabilities.simd_features().avx2 {
4
} else {
1
},
)
})
}
WorkloadType::ParallelWorkloads => {
// Prefer devices with many cores
self.available_devices
.iter()
.max_by_key(|d| d.capabilities.compute_units())
}
}
}
/// Check if a specific feature is available
pub fn has_feature(&self, feature: &str) -> bool {
match feature {
"simd" => {
let simd = &self.runtime_features.cpu_features.simd;
simd.sse || simd.avx || simd.avx2 || simd.avx512f || simd.neon
}
"avx2" => self.runtime_features.cpu_features.simd.avx2,
"avx512" => self.runtime_features.cpu_features.simd.avx512f,
"neon" => self.runtime_features.cpu_features.simd.neon,
"numa" => self.runtime_features.system_features.numa_available,
"double_precision" => self
.available_devices
.iter()
.any(|d| d.capabilities.supports_double_precision()),
"half_precision" => self
.available_devices
.iter()
.any(|d| d.capabilities.supports_half_precision()),
_ => false,
}
}
}
impl Default for BackendFeatureDetector {
fn default() -> Self {
Self::new().unwrap_or_else(|_| Self {
available_devices: vec![],
runtime_features: RuntimeFeatures::default(),
backend_summary: BackendSummary::default(),
})
}
}