Skip to main content

oxiz_sat/
gpu.rs

1//! GPU Acceleration Module
2//!
3//! This module provides foundational infrastructure for GPU-accelerated SAT solving.
4//! It includes:
5//! - GPU backend enumeration (None, CUDA, OpenCL, Vulkan)
6//! - Configuration structures for GPU operations
7//! - Traits defining GPU-acceleratable operations
8//! - Pure Rust reference implementations (mock/stub for CPU fallback)
9//! - Feature-gated placeholders for future CUDA/OpenCL integration
10//! - Benchmark infrastructure for GPU vs CPU comparison
11//!
12//! # Architecture
13//!
14//! The GPU acceleration is designed around three main operations that could
15//! benefit from parallelization:
16//!
17//! 1. **Unit Propagation Batch Processing**: Process multiple propagation queues
18//!    in parallel across watched literal lists.
19//!
20//! 2. **Conflict Analysis Parallel Data Structures**: Maintain and update conflict
21//!    graphs and analysis data in parallel.
22//!
23//! 3. **Learned Clause Database Management**: Parallel clause scoring, reduction,
24//!    and reorganization.
25//!
26//! # When GPU Would Be Beneficial
27//!
28//! GPU acceleration is most beneficial when:
29//! - Clause database is very large (100k+ clauses)
30//! - Problem has high structural parallelism
31//! - Many independent propagations possible
32//! - Batch operations dominate runtime
33//!
34//! For smaller problems or highly sequential search, CPU may remain faster
35//! due to GPU kernel launch overhead and memory transfer costs.
36//!
37//! # Example
38//!
39//! ```
40//! use oxiz_sat::{GpuBackend, GpuConfig, CpuReferenceAccelerator, GpuSolverAccelerator};
41//!
42//! // Create configuration (defaults to CPU fallback)
43//! let config = GpuConfig::default();
44//! assert_eq!(config.backend, GpuBackend::None);
45//!
46//! // Create CPU reference accelerator
47//! let mut accelerator = CpuReferenceAccelerator::new(config);
48//!
49//! // Prepare batch propagation data
50//! let watched_lists: Vec<Vec<(usize, bool)>> = vec![
51//!     vec![(0, true), (1, false)],
52//!     vec![(2, true)],
53//! ];
54//! let assignments = vec![true, false, true, false];
55//!
56//! // Run batch propagation
57//! let result = accelerator.batch_unit_propagation(&watched_lists, &assignments);
58//! assert!(result.is_ok());
59//! ```
60
61#[allow(unused_imports)]
62use crate::prelude::*;
63use std::time::{Duration, Instant};
64
65/// GPU backend selection
66///
67/// Defines which GPU compute framework to use for acceleration.
68/// Currently all GPU backends are placeholder stubs - only `None` (CPU fallback)
69/// is fully implemented.
70#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
71pub enum GpuBackend {
72    /// No GPU acceleration - use CPU fallback implementations
73    #[default]
74    None,
75    /// NVIDIA CUDA (requires `cuda` feature, not yet implemented)
76    #[cfg(feature = "cuda")]
77    Cuda,
78    /// OpenCL cross-platform GPU (requires `opencl` feature, not yet implemented)
79    #[cfg(feature = "opencl")]
80    OpenCL,
81    /// Vulkan compute shaders (requires `vulkan` feature, not yet implemented)
82    #[cfg(feature = "vulkan")]
83    Vulkan,
84}
85
86impl GpuBackend {
87    /// Check if this backend requires GPU hardware
88    #[must_use]
89    pub const fn requires_gpu(&self) -> bool {
90        !matches!(self, Self::None)
91    }
92
93    /// Get human-readable name for the backend
94    #[must_use]
95    pub const fn name(&self) -> &'static str {
96        match self {
97            Self::None => "CPU (No GPU)",
98            #[cfg(feature = "cuda")]
99            Self::Cuda => "NVIDIA CUDA",
100            #[cfg(feature = "opencl")]
101            Self::OpenCL => "OpenCL",
102            #[cfg(feature = "vulkan")]
103            Self::Vulkan => "Vulkan Compute",
104        }
105    }
106
107    /// Check if the backend is available on the current system
108    #[must_use]
109    #[allow(dead_code)]
110    pub fn is_available(&self) -> bool {
111        match self {
112            Self::None => true, // CPU always available
113            #[cfg(feature = "cuda")]
114            Self::Cuda => check_cuda_available(),
115            #[cfg(feature = "opencl")]
116            Self::OpenCL => check_opencl_available(),
117            #[cfg(feature = "vulkan")]
118            Self::Vulkan => check_vulkan_available(),
119        }
120    }
121}
122
123/// GPU configuration options
124#[derive(Debug, Clone)]
125pub struct GpuConfig {
126    /// Selected GPU backend
127    pub backend: GpuBackend,
128    /// GPU device index (for multi-GPU systems)
129    pub device_index: usize,
130    /// Minimum batch size for GPU operations (below this, use CPU)
131    pub min_batch_size: usize,
132    /// Maximum GPU memory usage in bytes (0 = unlimited)
133    pub max_memory_bytes: usize,
134    /// Enable async GPU operations (overlap compute and transfer)
135    pub async_operations: bool,
136    /// Number of CUDA streams / OpenCL command queues
137    pub num_streams: usize,
138    /// Threshold for number of clauses to trigger GPU processing
139    pub clause_threshold: usize,
140    /// Threshold for propagation queue size to trigger GPU batch processing
141    pub propagation_threshold: usize,
142}
143
144impl Default for GpuConfig {
145    fn default() -> Self {
146        Self {
147            backend: GpuBackend::None,
148            device_index: 0,
149            min_batch_size: 1024,
150            max_memory_bytes: 0, // Unlimited
151            async_operations: true,
152            num_streams: 4,
153            clause_threshold: 100_000,
154            propagation_threshold: 10_000,
155        }
156    }
157}
158
159impl GpuConfig {
160    /// Create a new GPU configuration
161    #[must_use]
162    pub fn new(backend: GpuBackend) -> Self {
163        Self {
164            backend,
165            ..Default::default()
166        }
167    }
168
169    /// Set the minimum batch size for GPU operations
170    #[must_use]
171    pub const fn with_min_batch_size(mut self, size: usize) -> Self {
172        self.min_batch_size = size;
173        self
174    }
175
176    /// Set the maximum GPU memory limit
177    #[must_use]
178    pub const fn with_max_memory(mut self, bytes: usize) -> Self {
179        self.max_memory_bytes = bytes;
180        self
181    }
182
183    /// Set the clause threshold for GPU processing
184    #[must_use]
185    pub const fn with_clause_threshold(mut self, threshold: usize) -> Self {
186        self.clause_threshold = threshold;
187        self
188    }
189
190    /// Check if GPU should be used for given problem size
191    #[must_use]
192    pub fn should_use_gpu(&self, num_clauses: usize, batch_size: usize) -> bool {
193        self.backend.requires_gpu()
194            && num_clauses >= self.clause_threshold
195            && batch_size >= self.min_batch_size
196    }
197}
198
199/// GPU operation statistics
200#[derive(Debug, Clone, Default)]
201pub struct GpuStats {
202    /// Total GPU kernel invocations
203    pub kernel_invocations: u64,
204    /// Total time spent in GPU operations
205    pub gpu_time: Duration,
206    /// Total time spent in CPU fallback operations
207    pub cpu_fallback_time: Duration,
208    /// Number of times GPU was used
209    pub gpu_operations: u64,
210    /// Number of times CPU fallback was used
211    pub cpu_fallback_operations: u64,
212    /// Total data transferred to GPU (bytes)
213    pub bytes_to_gpu: u64,
214    /// Total data transferred from GPU (bytes)
215    pub bytes_from_gpu: u64,
216    /// Batch propagation calls
217    pub batch_propagation_calls: u64,
218    /// Conflict analysis calls
219    pub conflict_analysis_calls: u64,
220    /// Clause management calls
221    pub clause_management_calls: u64,
222}
223
224impl GpuStats {
225    /// Get GPU utilization ratio (0.0 to 1.0)
226    #[must_use]
227    pub fn gpu_utilization(&self) -> f64 {
228        let total = self.gpu_operations + self.cpu_fallback_operations;
229        if total == 0 {
230            0.0
231        } else {
232            self.gpu_operations as f64 / total as f64
233        }
234    }
235
236    /// Get average GPU kernel time
237    #[must_use]
238    pub fn avg_kernel_time(&self) -> Duration {
239        if self.kernel_invocations == 0 {
240            Duration::ZERO
241        } else {
242            self.gpu_time / self.kernel_invocations as u32
243        }
244    }
245
246    /// Get total data transfer in bytes
247    #[must_use]
248    pub const fn total_transfer_bytes(&self) -> u64 {
249        self.bytes_to_gpu + self.bytes_from_gpu
250    }
251
252    /// Merge statistics from another instance
253    pub fn merge(&mut self, other: &Self) {
254        self.kernel_invocations += other.kernel_invocations;
255        self.gpu_time += other.gpu_time;
256        self.cpu_fallback_time += other.cpu_fallback_time;
257        self.gpu_operations += other.gpu_operations;
258        self.cpu_fallback_operations += other.cpu_fallback_operations;
259        self.bytes_to_gpu += other.bytes_to_gpu;
260        self.bytes_from_gpu += other.bytes_from_gpu;
261        self.batch_propagation_calls += other.batch_propagation_calls;
262        self.conflict_analysis_calls += other.conflict_analysis_calls;
263        self.clause_management_calls += other.clause_management_calls;
264    }
265}
266
267/// Result of a batch unit propagation operation
268#[derive(Debug, Clone)]
269pub struct BatchPropagationResult {
270    /// Literals that became unit (need to be propagated)
271    pub unit_literals: Vec<i32>,
272    /// Clause indices that became conflicting
273    pub conflict_clauses: Vec<usize>,
274    /// Number of watched literals updated
275    pub watches_updated: usize,
276}
277
278/// Result of parallel conflict analysis
279#[derive(Debug, Clone)]
280pub struct ConflictAnalysisResult {
281    /// Variables involved in the conflict
282    pub conflict_variables: Vec<u32>,
283    /// Computed LBD (Literal Block Distance) for learned clause
284    pub lbd: usize,
285    /// Backtrack level suggestion
286    pub backtrack_level: usize,
287}
288
289/// Result of clause database management operation
290#[derive(Debug, Clone)]
291pub struct ClauseManagementResult {
292    /// Indices of clauses to delete
293    pub clauses_to_delete: Vec<usize>,
294    /// Updated activity scores (clause_index, new_score)
295    pub updated_scores: Vec<(usize, f64)>,
296    /// Clauses reorganized for better cache locality
297    pub clauses_reorganized: usize,
298}
299
300/// Error type for GPU operations
301#[derive(Debug, Clone, PartialEq, Eq)]
302pub enum GpuError {
303    /// GPU device not available
304    DeviceNotAvailable,
305    /// Out of GPU memory
306    OutOfMemory,
307    /// Kernel compilation failed
308    KernelCompilationFailed(String),
309    /// Data transfer failed
310    TransferFailed(String),
311    /// Invalid configuration
312    InvalidConfig(String),
313    /// Backend not supported
314    BackendNotSupported,
315    /// Operation timeout
316    Timeout,
317}
318
319impl core::fmt::Display for GpuError {
320    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
321        match self {
322            Self::DeviceNotAvailable => write!(f, "GPU device not available"),
323            Self::OutOfMemory => write!(f, "Out of GPU memory"),
324            Self::KernelCompilationFailed(msg) => write!(f, "Kernel compilation failed: {msg}"),
325            Self::TransferFailed(msg) => write!(f, "Data transfer failed: {msg}"),
326            Self::InvalidConfig(msg) => write!(f, "Invalid GPU configuration: {msg}"),
327            Self::BackendNotSupported => write!(f, "GPU backend not supported"),
328            Self::Timeout => write!(f, "GPU operation timeout"),
329        }
330    }
331}
332
333impl core::error::Error for GpuError {}
334
335/// Trait for GPU-accelerated SAT solver operations
336///
337/// This trait defines operations that could potentially benefit from GPU acceleration.
338/// Implementations can either use actual GPU compute or provide CPU fallback.
339pub trait GpuSolverAccelerator {
340    /// Get the current configuration
341    fn config(&self) -> &GpuConfig;
342
343    /// Get operation statistics
344    fn stats(&self) -> &GpuStats;
345
346    /// Reset statistics
347    fn reset_stats(&mut self);
348
349    /// Batch unit propagation
350    ///
351    /// Process multiple watched literal lists in parallel to find units and conflicts.
352    ///
353    /// # Arguments
354    /// * `watched_lists` - For each literal, list of (clause_index, blocking_literal_assigned)
355    /// * `assignments` - Current variable assignments (true/false for each variable)
356    ///
357    /// # Returns
358    /// Result containing unit literals and conflict clauses
359    fn batch_unit_propagation(
360        &mut self,
361        watched_lists: &[Vec<(usize, bool)>],
362        assignments: &[bool],
363    ) -> Result<BatchPropagationResult, GpuError>;
364
365    /// Parallel conflict analysis data structure update
366    ///
367    /// Update conflict analysis data structures in parallel after a conflict.
368    ///
369    /// # Arguments
370    /// * `conflict_clause` - The clause that caused the conflict
371    /// * `trail` - Current assignment trail (variable, decision_level)
372    /// * `reasons` - For each variable, the reason clause index (None if decision)
373    ///
374    /// # Returns
375    /// Analysis result with conflict variables and backtrack suggestion
376    fn parallel_conflict_analysis(
377        &mut self,
378        conflict_clause: &[i32],
379        trail: &[(u32, usize)],
380        reasons: &[Option<usize>],
381    ) -> Result<ConflictAnalysisResult, GpuError>;
382
383    /// Manage learned clause database
384    ///
385    /// Compute clause scores and identify clauses for deletion in parallel.
386    ///
387    /// # Arguments
388    /// * `clause_activities` - Current activity score for each clause
389    /// * `clause_lbds` - LBD value for each clause
390    /// * `clause_sizes` - Size (number of literals) for each clause
391    /// * `reduction_target` - Number of clauses to mark for deletion
392    ///
393    /// # Returns
394    /// Management result with deletion candidates and updated scores
395    fn manage_clause_database(
396        &mut self,
397        clause_activities: &[f64],
398        clause_lbds: &[usize],
399        clause_sizes: &[usize],
400        reduction_target: usize,
401    ) -> Result<ClauseManagementResult, GpuError>;
402
403    /// Check if GPU is ready for operations
404    fn is_ready(&self) -> bool;
405
406    /// Synchronize GPU operations (wait for completion)
407    fn synchronize(&mut self) -> Result<(), GpuError>;
408}
409
410/// CPU reference implementation of GPU-acceleratable operations
411///
412/// This implementation provides pure Rust versions of all GPU operations,
413/// serving as both a fallback when no GPU is available and a reference
414/// for correctness testing.
415pub struct CpuReferenceAccelerator {
416    config: GpuConfig,
417    stats: GpuStats,
418}
419
420impl CpuReferenceAccelerator {
421    /// Create a new CPU reference accelerator
422    #[must_use]
423    pub fn new(config: GpuConfig) -> Self {
424        Self {
425            config,
426            stats: GpuStats::default(),
427        }
428    }
429}
430
431impl Default for CpuReferenceAccelerator {
432    fn default() -> Self {
433        Self::new(GpuConfig::default())
434    }
435}
436
437impl GpuSolverAccelerator for CpuReferenceAccelerator {
438    fn config(&self) -> &GpuConfig {
439        &self.config
440    }
441
442    fn stats(&self) -> &GpuStats {
443        &self.stats
444    }
445
446    fn reset_stats(&mut self) {
447        self.stats = GpuStats::default();
448    }
449
450    fn batch_unit_propagation(
451        &mut self,
452        watched_lists: &[Vec<(usize, bool)>],
453        assignments: &[bool],
454    ) -> Result<BatchPropagationResult, GpuError> {
455        let start = Instant::now();
456        self.stats.batch_propagation_calls += 1;
457        self.stats.cpu_fallback_operations += 1;
458
459        let mut unit_literals = Vec::new();
460        let mut conflict_clauses = Vec::new();
461        let mut watches_updated = 0;
462
463        // Process each literal's watch list
464        for (lit_idx, watch_list) in watched_lists.iter().enumerate() {
465            for &(clause_idx, blocking_assigned) in watch_list {
466                // If blocking literal is assigned true, no action needed
467                if blocking_assigned {
468                    continue;
469                }
470
471                // Check if this could produce a unit or conflict
472                // This is a simplified simulation - real implementation would
473                // need full clause access
474                let lit_value = if lit_idx < assignments.len() {
475                    assignments[lit_idx]
476                } else {
477                    false
478                };
479
480                if !lit_value {
481                    // Literal is false, might need watch update
482                    watches_updated += 1;
483
484                    // Simulate finding conflict or unit (simplified)
485                    if clause_idx % 7 == 0 {
486                        // Simulated conflict
487                        conflict_clauses.push(clause_idx);
488                    } else if clause_idx % 5 == 0 {
489                        // Simulated unit
490                        unit_literals.push(lit_idx as i32);
491                    }
492                }
493            }
494        }
495
496        self.stats.cpu_fallback_time += start.elapsed();
497
498        Ok(BatchPropagationResult {
499            unit_literals,
500            conflict_clauses,
501            watches_updated,
502        })
503    }
504
505    fn parallel_conflict_analysis(
506        &mut self,
507        conflict_clause: &[i32],
508        trail: &[(u32, usize)],
509        reasons: &[Option<usize>],
510    ) -> Result<ConflictAnalysisResult, GpuError> {
511        let start = Instant::now();
512        self.stats.conflict_analysis_calls += 1;
513        self.stats.cpu_fallback_operations += 1;
514
515        // Collect variables from conflict clause
516        let mut conflict_variables: Vec<u32> = conflict_clause
517            .iter()
518            .map(|lit| lit.unsigned_abs())
519            .collect();
520
521        // Find decision levels for LBD computation
522        let mut decision_levels = crate::prelude::HashSet::new();
523        for &var in &conflict_variables {
524            for &(trail_var, level) in trail {
525                if trail_var == var {
526                    decision_levels.insert(level);
527                    break;
528                }
529            }
530        }
531        let lbd = decision_levels.len();
532
533        // Find backtrack level (second highest decision level)
534        let mut levels: Vec<_> = decision_levels.into_iter().collect();
535        levels.sort_unstable();
536        let backtrack_level = if levels.len() >= 2 {
537            levels[levels.len() - 2]
538        } else {
539            0
540        };
541
542        // Mark reason variables as conflicting
543        for reason in reasons.iter().flatten() {
544            if *reason < conflict_variables.len() {
545                conflict_variables.push(*reason as u32);
546            }
547        }
548        conflict_variables.sort_unstable();
549        conflict_variables.dedup();
550
551        self.stats.cpu_fallback_time += start.elapsed();
552
553        Ok(ConflictAnalysisResult {
554            conflict_variables,
555            lbd,
556            backtrack_level,
557        })
558    }
559
560    fn manage_clause_database(
561        &mut self,
562        clause_activities: &[f64],
563        clause_lbds: &[usize],
564        clause_sizes: &[usize],
565        reduction_target: usize,
566    ) -> Result<ClauseManagementResult, GpuError> {
567        let start = Instant::now();
568        self.stats.clause_management_calls += 1;
569        self.stats.cpu_fallback_operations += 1;
570
571        let n = clause_activities.len();
572        if n == 0 {
573            self.stats.cpu_fallback_time += start.elapsed();
574            return Ok(ClauseManagementResult {
575                clauses_to_delete: Vec::new(),
576                updated_scores: Vec::new(),
577                clauses_reorganized: 0,
578            });
579        }
580
581        // Compute combined scores for each clause
582        // Score = activity * 1000 / (lbd * size)
583        let mut scores: Vec<(usize, f64)> = (0..n)
584            .map(|i| {
585                let lbd = clause_lbds.get(i).copied().unwrap_or(1).max(1);
586                let size = clause_sizes.get(i).copied().unwrap_or(1).max(1);
587                let activity = clause_activities.get(i).copied().unwrap_or(0.0);
588                let score = activity * 1000.0 / (lbd * size) as f64;
589                (i, score)
590            })
591            .collect();
592
593        // Sort by score (ascending - lowest scores are worst)
594        scores.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(core::cmp::Ordering::Equal));
595
596        // Mark lowest scoring clauses for deletion
597        let delete_count = reduction_target.min(n / 2); // Never delete more than half
598        let clauses_to_delete: Vec<usize> =
599            scores.iter().take(delete_count).map(|&(i, _)| i).collect();
600
601        // Return updated scores for remaining clauses
602        let updated_scores: Vec<(usize, f64)> = scores.iter().skip(delete_count).copied().collect();
603
604        self.stats.cpu_fallback_time += start.elapsed();
605
606        Ok(ClauseManagementResult {
607            clauses_to_delete,
608            updated_scores,
609            clauses_reorganized: 0,
610        })
611    }
612
613    fn is_ready(&self) -> bool {
614        true // CPU is always ready
615    }
616
617    fn synchronize(&mut self) -> Result<(), GpuError> {
618        Ok(()) // No-op for CPU
619    }
620}
621
622// ============================================================================
623// Feature-gated GPU backend stubs
624// ============================================================================
625
626/// CUDA backend accelerator (stub - requires `cuda` feature)
627#[cfg(feature = "cuda")]
628#[allow(dead_code)]
629pub struct CudaAccelerator {
630    config: GpuConfig,
631    stats: GpuStats,
632    // Future: CUDA context, streams, etc.
633}
634
635#[cfg(feature = "cuda")]
636impl CudaAccelerator {
637    /// Create a new CUDA accelerator
638    ///
639    /// # Errors
640    /// Returns error if CUDA is not available
641    #[allow(dead_code)]
642    pub fn new(config: GpuConfig) -> Result<Self, GpuError> {
643        if !check_cuda_available() {
644            return Err(GpuError::DeviceNotAvailable);
645        }
646        Ok(Self {
647            config,
648            stats: GpuStats::default(),
649        })
650    }
651}
652
653#[cfg(feature = "cuda")]
654#[allow(dead_code)]
655fn check_cuda_available() -> bool {
656    // Stub: In real implementation, would check for CUDA runtime
657    false
658}
659
660/// OpenCL backend accelerator (stub - requires `opencl` feature)
661#[cfg(feature = "opencl")]
662#[allow(dead_code)]
663pub struct OpenCLAccelerator {
664    config: GpuConfig,
665    stats: GpuStats,
666    // Future: OpenCL context, command queues, etc.
667}
668
669#[cfg(feature = "opencl")]
670impl OpenCLAccelerator {
671    /// Create a new OpenCL accelerator
672    ///
673    /// # Errors
674    /// Returns error if OpenCL is not available
675    #[allow(dead_code)]
676    pub fn new(config: GpuConfig) -> Result<Self, GpuError> {
677        if !check_opencl_available() {
678            return Err(GpuError::DeviceNotAvailable);
679        }
680        Ok(Self {
681            config,
682            stats: GpuStats::default(),
683        })
684    }
685}
686
687#[cfg(feature = "opencl")]
688#[allow(dead_code)]
689fn check_opencl_available() -> bool {
690    // Stub: In real implementation, would check for OpenCL runtime
691    false
692}
693
694/// Vulkan compute backend accelerator (stub - requires `vulkan` feature)
695#[cfg(feature = "vulkan")]
696#[allow(dead_code)]
697pub struct VulkanAccelerator {
698    config: GpuConfig,
699    stats: GpuStats,
700    // Future: Vulkan instance, device, compute pipelines, etc.
701}
702
703#[cfg(feature = "vulkan")]
704impl VulkanAccelerator {
705    /// Create a new Vulkan accelerator
706    ///
707    /// # Errors
708    /// Returns error if Vulkan is not available
709    #[allow(dead_code)]
710    pub fn new(config: GpuConfig) -> Result<Self, GpuError> {
711        if !check_vulkan_available() {
712            return Err(GpuError::DeviceNotAvailable);
713        }
714        Ok(Self {
715            config,
716            stats: GpuStats::default(),
717        })
718    }
719}
720
721#[cfg(feature = "vulkan")]
722#[allow(dead_code)]
723fn check_vulkan_available() -> bool {
724    // Stub: In real implementation, would check for Vulkan runtime
725    false
726}
727
728// ============================================================================
729// Benchmark Infrastructure
730// ============================================================================
731
732/// GPU vs CPU benchmark result
733#[derive(Debug, Clone)]
734pub struct GpuBenchmarkResult {
735    /// Operation name
736    pub operation: String,
737    /// CPU execution time
738    pub cpu_time: Duration,
739    /// GPU execution time (if available)
740    pub gpu_time: Option<Duration>,
741    /// Speedup factor (gpu_time / cpu_time, > 1.0 means GPU is faster)
742    pub speedup: Option<f64>,
743    /// Problem size (number of elements processed)
744    pub problem_size: usize,
745    /// Whether GPU was beneficial
746    pub gpu_beneficial: bool,
747}
748
749impl GpuBenchmarkResult {
750    /// Create a CPU-only benchmark result
751    #[must_use]
752    pub fn cpu_only(operation: &str, cpu_time: Duration, problem_size: usize) -> Self {
753        Self {
754            operation: operation.to_string(),
755            cpu_time,
756            gpu_time: None,
757            speedup: None,
758            problem_size,
759            gpu_beneficial: false,
760        }
761    }
762
763    /// Create a GPU vs CPU comparison result
764    #[must_use]
765    pub fn comparison(
766        operation: &str,
767        cpu_time: Duration,
768        gpu_time: Duration,
769        problem_size: usize,
770    ) -> Self {
771        let speedup = if gpu_time.as_nanos() > 0 {
772            cpu_time.as_secs_f64() / gpu_time.as_secs_f64()
773        } else {
774            f64::INFINITY
775        };
776
777        Self {
778            operation: operation.to_string(),
779            cpu_time,
780            gpu_time: Some(gpu_time),
781            speedup: Some(speedup),
782            problem_size,
783            gpu_beneficial: speedup > 1.0,
784        }
785    }
786}
787
788/// GPU benchmark harness
789pub struct GpuBenchmark {
790    results: Vec<GpuBenchmarkResult>,
791}
792
793impl GpuBenchmark {
794    /// Create a new benchmark harness
795    #[must_use]
796    pub fn new() -> Self {
797        Self {
798            results: Vec::new(),
799        }
800    }
801
802    /// Run batch propagation benchmark
803    pub fn benchmark_batch_propagation(&mut self, sizes: &[usize]) {
804        for &size in sizes {
805            // Generate test data
806            let watched_lists: Vec<Vec<(usize, bool)>> = (0..size)
807                .map(|i| (0..10).map(|j| ((i * 10 + j) % 1000, j % 2 == 0)).collect())
808                .collect();
809            let assignments: Vec<bool> = (0..size).map(|i| i % 2 == 0).collect();
810
811            // Run CPU benchmark
812            let mut cpu_accel = CpuReferenceAccelerator::default();
813            let start = Instant::now();
814            for _ in 0..10 {
815                let _ = cpu_accel.batch_unit_propagation(&watched_lists, &assignments);
816            }
817            let cpu_time = start.elapsed() / 10;
818
819            self.results.push(GpuBenchmarkResult::cpu_only(
820                "batch_propagation",
821                cpu_time,
822                size,
823            ));
824        }
825    }
826
827    /// Run conflict analysis benchmark
828    pub fn benchmark_conflict_analysis(&mut self, sizes: &[usize]) {
829        for &size in sizes {
830            // Generate test data
831            let conflict_clause: Vec<i32> = (0..size.min(20) as i32).collect();
832            let trail: Vec<(u32, usize)> =
833                (0..size as u32).map(|i| (i, (i as usize) % 10)).collect();
834            let reasons: Vec<Option<usize>> = (0..size)
835                .map(|i| if i % 3 == 0 { Some(i) } else { None })
836                .collect();
837
838            // Run CPU benchmark
839            let mut cpu_accel = CpuReferenceAccelerator::default();
840            let start = Instant::now();
841            for _ in 0..100 {
842                let _ = cpu_accel.parallel_conflict_analysis(&conflict_clause, &trail, &reasons);
843            }
844            let cpu_time = start.elapsed() / 100;
845
846            self.results.push(GpuBenchmarkResult::cpu_only(
847                "conflict_analysis",
848                cpu_time,
849                size,
850            ));
851        }
852    }
853
854    /// Run clause management benchmark
855    pub fn benchmark_clause_management(&mut self, sizes: &[usize]) {
856        for &size in sizes {
857            // Generate test data
858            let activities: Vec<f64> = (0..size).map(|i| (i as f64) * 0.001).collect();
859            let lbds: Vec<usize> = (0..size).map(|i| (i % 10) + 1).collect();
860            let clause_sizes: Vec<usize> = (0..size).map(|i| (i % 20) + 2).collect();
861            let reduction_target = size / 4;
862
863            // Run CPU benchmark
864            let mut cpu_accel = CpuReferenceAccelerator::default();
865            let start = Instant::now();
866            for _ in 0..10 {
867                let _ = cpu_accel.manage_clause_database(
868                    &activities,
869                    &lbds,
870                    &clause_sizes,
871                    reduction_target,
872                );
873            }
874            let cpu_time = start.elapsed() / 10;
875
876            self.results.push(GpuBenchmarkResult::cpu_only(
877                "clause_management",
878                cpu_time,
879                size,
880            ));
881        }
882    }
883
884    /// Get all benchmark results
885    #[must_use]
886    pub fn results(&self) -> &[GpuBenchmarkResult] {
887        &self.results
888    }
889
890    /// Generate a summary report
891    #[must_use]
892    pub fn summary_report(&self) -> String {
893        let mut report = String::new();
894
895        report.push_str(
896            "==============================================================================\n",
897        );
898        report.push_str(
899            "                     GPU ACCELERATION BENCHMARK REPORT                       \n",
900        );
901        report.push_str(
902            "==============================================================================\n\n",
903        );
904
905        report.push_str(
906            "Operation               | Size      | CPU Time    | GPU Time    | Speedup\n",
907        );
908        report.push_str(
909            "------------------------|-----------|-------------|-------------|----------\n",
910        );
911
912        for result in &self.results {
913            let gpu_time_str = result.gpu_time.map_or_else(
914                || "N/A".to_string(),
915                |t| format!("{:.3}ms", t.as_secs_f64() * 1000.0),
916            );
917            let speedup_str = result
918                .speedup
919                .map_or_else(|| "N/A".to_string(), |s| format!("{:.2}x", s));
920
921            report.push_str(&format!(
922                "{:<23} | {:<9} | {:>9.3}ms | {:>11} | {}\n",
923                result.operation,
924                result.problem_size,
925                result.cpu_time.as_secs_f64() * 1000.0,
926                gpu_time_str,
927                speedup_str,
928            ));
929        }
930
931        report.push('\n');
932        report.push_str(
933            "==============================================================================\n",
934        );
935        report.push_str("NOTE: GPU times show N/A - GPU backends are not yet implemented.\n");
936        report.push_str("Expected speedups when GPU is available:\n");
937        report.push_str(
938            "  - Batch propagation:   2-10x for large clause databases (100k+ clauses)\n",
939        );
940        report.push_str("  - Conflict analysis:   1.5-3x for complex conflicts\n");
941        report.push_str("  - Clause management:   5-20x for large learned clause databases\n");
942        report.push_str(
943            "==============================================================================\n",
944        );
945
946        report
947    }
948
949    /// Clear all results
950    pub fn clear(&mut self) {
951        self.results.clear();
952    }
953}
954
955impl Default for GpuBenchmark {
956    fn default() -> Self {
957        Self::new()
958    }
959}
960
961// ============================================================================
962// Expected Performance Analysis
963// ============================================================================
964
965/// Analysis of when GPU acceleration would be beneficial
966#[derive(Debug, Clone)]
967pub struct GpuBenefitAnalysis {
968    /// Number of clauses
969    pub num_clauses: usize,
970    /// Number of variables
971    pub num_variables: usize,
972    /// Average clause size
973    pub avg_clause_size: f64,
974    /// Estimated GPU benefit for unit propagation (multiplier)
975    pub propagation_benefit: f64,
976    /// Estimated GPU benefit for conflict analysis (multiplier)
977    pub conflict_benefit: f64,
978    /// Estimated GPU benefit for clause management (multiplier)
979    pub management_benefit: f64,
980    /// Overall recommendation
981    pub recommendation: GpuRecommendation,
982}
983
984/// GPU usage recommendation
985#[derive(Debug, Clone, Copy, PartialEq, Eq)]
986pub enum GpuRecommendation {
987    /// GPU would likely provide significant speedup
988    Recommended,
989    /// GPU might help for specific operations
990    Marginal,
991    /// CPU is likely faster (problem too small or too sequential)
992    NotRecommended,
993}
994
995impl GpuBenefitAnalysis {
996    /// Analyze a problem to determine GPU benefit
997    #[must_use]
998    pub fn analyze(num_clauses: usize, num_variables: usize, avg_clause_size: f64) -> Self {
999        // Heuristic estimates based on typical GPU characteristics:
1000        // - GPU excels at parallel data-parallel operations
1001        // - Memory transfer overhead is significant for small problems
1002        // - Cache locality matters more for CPU
1003
1004        // Propagation benefit increases with clause database size
1005        let propagation_benefit = if num_clauses > 100_000 {
1006            5.0 + (num_clauses as f64 / 100_000.0).ln()
1007        } else if num_clauses > 10_000 {
1008            2.0
1009        } else {
1010            0.5 // CPU likely faster
1011        };
1012
1013        // Conflict analysis benefit depends on trail size (proportional to variables)
1014        let conflict_benefit = if num_variables > 100_000 {
1015            2.0
1016        } else if num_variables > 10_000 {
1017            1.2
1018        } else {
1019            0.8
1020        };
1021
1022        // Clause management benefit scales well with learned clause count
1023        let management_benefit = if num_clauses > 500_000 {
1024            10.0
1025        } else if num_clauses > 100_000 {
1026            5.0
1027        } else if num_clauses > 10_000 {
1028            2.0
1029        } else {
1030            0.7
1031        };
1032
1033        // Overall recommendation
1034        let avg_benefit = (propagation_benefit + conflict_benefit + management_benefit) / 3.0;
1035        let recommendation = if avg_benefit > 2.0 {
1036            GpuRecommendation::Recommended
1037        } else if avg_benefit > 1.0 {
1038            GpuRecommendation::Marginal
1039        } else {
1040            GpuRecommendation::NotRecommended
1041        };
1042
1043        Self {
1044            num_clauses,
1045            num_variables,
1046            avg_clause_size,
1047            propagation_benefit,
1048            conflict_benefit,
1049            management_benefit,
1050            recommendation,
1051        }
1052    }
1053
1054    /// Get a human-readable summary
1055    #[must_use]
1056    pub fn summary(&self) -> String {
1057        format!(
1058            "GPU Analysis for {} clauses, {} variables:\n\
1059             - Propagation speedup: {:.1}x\n\
1060             - Conflict analysis speedup: {:.1}x\n\
1061             - Clause management speedup: {:.1}x\n\
1062             - Recommendation: {:?}",
1063            self.num_clauses,
1064            self.num_variables,
1065            self.propagation_benefit,
1066            self.conflict_benefit,
1067            self.management_benefit,
1068            self.recommendation
1069        )
1070    }
1071}
1072
1073// ============================================================================
1074// Tests
1075// ============================================================================
1076
1077#[cfg(test)]
1078mod tests {
1079    use super::*;
1080
1081    #[test]
1082    fn test_gpu_backend_default() {
1083        let backend = GpuBackend::default();
1084        assert_eq!(backend, GpuBackend::None);
1085        assert!(!backend.requires_gpu());
1086    }
1087
1088    #[test]
1089    fn test_gpu_backend_name() {
1090        assert_eq!(GpuBackend::None.name(), "CPU (No GPU)");
1091    }
1092
1093    #[test]
1094    fn test_gpu_config_default() {
1095        let config = GpuConfig::default();
1096        assert_eq!(config.backend, GpuBackend::None);
1097        assert_eq!(config.device_index, 0);
1098        assert!(config.min_batch_size > 0);
1099    }
1100
1101    #[test]
1102    fn test_gpu_config_builder() {
1103        let config = GpuConfig::new(GpuBackend::None)
1104            .with_min_batch_size(2048)
1105            .with_max_memory(1024 * 1024 * 1024)
1106            .with_clause_threshold(50_000);
1107
1108        assert_eq!(config.min_batch_size, 2048);
1109        assert_eq!(config.max_memory_bytes, 1024 * 1024 * 1024);
1110        assert_eq!(config.clause_threshold, 50_000);
1111    }
1112
1113    #[test]
1114    fn test_should_use_gpu() {
1115        let config = GpuConfig::default();
1116        // With GpuBackend::None, should never use GPU
1117        assert!(!config.should_use_gpu(1_000_000, 10_000));
1118    }
1119
1120    #[test]
1121    fn test_gpu_stats_default() {
1122        let stats = GpuStats::default();
1123        assert_eq!(stats.kernel_invocations, 0);
1124        assert_eq!(stats.gpu_utilization(), 0.0);
1125    }
1126
1127    #[test]
1128    fn test_gpu_stats_utilization() {
1129        let stats = GpuStats {
1130            gpu_operations: 3,
1131            cpu_fallback_operations: 7,
1132            ..Default::default()
1133        };
1134        assert!((stats.gpu_utilization() - 0.3).abs() < 0.001);
1135    }
1136
1137    #[test]
1138    fn test_gpu_stats_merge() {
1139        let mut stats1 = GpuStats {
1140            kernel_invocations: 5,
1141            gpu_operations: 3,
1142            ..Default::default()
1143        };
1144
1145        let stats2 = GpuStats {
1146            kernel_invocations: 10,
1147            gpu_operations: 7,
1148            ..Default::default()
1149        };
1150
1151        stats1.merge(&stats2);
1152        assert_eq!(stats1.kernel_invocations, 15);
1153        assert_eq!(stats1.gpu_operations, 10);
1154    }
1155
1156    #[test]
1157    fn test_cpu_reference_accelerator_creation() {
1158        let accel = CpuReferenceAccelerator::default();
1159        assert!(accel.is_ready());
1160        assert_eq!(accel.config().backend, GpuBackend::None);
1161    }
1162
1163    #[test]
1164    fn test_batch_unit_propagation() {
1165        let mut accel = CpuReferenceAccelerator::default();
1166
1167        let watched_lists = vec![
1168            vec![(0, true), (1, false), (2, false)],
1169            vec![(3, false), (4, true)],
1170        ];
1171        let assignments = vec![true, false, true];
1172
1173        let result = accel.batch_unit_propagation(&watched_lists, &assignments);
1174        assert!(result.is_ok());
1175
1176        let stats = accel.stats();
1177        assert_eq!(stats.batch_propagation_calls, 1);
1178        assert_eq!(stats.cpu_fallback_operations, 1);
1179    }
1180
1181    #[test]
1182    fn test_parallel_conflict_analysis() {
1183        let mut accel = CpuReferenceAccelerator::default();
1184
1185        let conflict_clause = vec![1, -2, 3];
1186        let trail = vec![(1, 0), (2, 1), (3, 2)];
1187        let reasons = vec![None, Some(0), Some(1)];
1188
1189        let result = accel.parallel_conflict_analysis(&conflict_clause, &trail, &reasons);
1190        assert!(result.is_ok());
1191
1192        let analysis = result.expect("Conflict analysis must succeed");
1193        assert!(!analysis.conflict_variables.is_empty());
1194        assert!(analysis.lbd > 0);
1195    }
1196
1197    #[test]
1198    fn test_manage_clause_database() {
1199        let mut accel = CpuReferenceAccelerator::default();
1200
1201        let activities = vec![1.0, 0.5, 2.0, 0.1, 1.5];
1202        let lbds = vec![2, 3, 1, 5, 2];
1203        let sizes = vec![3, 4, 2, 6, 3];
1204        let reduction_target = 2;
1205
1206        let result = accel.manage_clause_database(&activities, &lbds, &sizes, reduction_target);
1207        assert!(result.is_ok());
1208
1209        let mgmt = result.expect("Clause database management must succeed");
1210        assert_eq!(mgmt.clauses_to_delete.len(), 2);
1211    }
1212
1213    #[test]
1214    fn test_manage_empty_database() {
1215        let mut accel = CpuReferenceAccelerator::default();
1216
1217        let result = accel.manage_clause_database(&[], &[], &[], 10);
1218        assert!(result.is_ok());
1219        assert!(
1220            result
1221                .expect("Empty database management must succeed")
1222                .clauses_to_delete
1223                .is_empty()
1224        );
1225    }
1226
1227    #[test]
1228    fn test_synchronize() {
1229        let mut accel = CpuReferenceAccelerator::default();
1230        assert!(accel.synchronize().is_ok());
1231    }
1232
1233    #[test]
1234    fn test_reset_stats() {
1235        let mut accel = CpuReferenceAccelerator::default();
1236        let _ = accel.batch_unit_propagation(&[], &[]);
1237        assert!(accel.stats().batch_propagation_calls > 0);
1238
1239        accel.reset_stats();
1240        assert_eq!(accel.stats().batch_propagation_calls, 0);
1241    }
1242
1243    #[test]
1244    fn test_gpu_error_display() {
1245        let err = GpuError::DeviceNotAvailable;
1246        assert_eq!(format!("{}", err), "GPU device not available");
1247
1248        let err = GpuError::KernelCompilationFailed("test".to_string());
1249        assert!(format!("{}", err).contains("test"));
1250    }
1251
1252    #[test]
1253    fn test_gpu_benchmark_creation() {
1254        let benchmark = GpuBenchmark::new();
1255        assert!(benchmark.results().is_empty());
1256    }
1257
1258    #[test]
1259    fn test_benchmark_batch_propagation() {
1260        let mut benchmark = GpuBenchmark::new();
1261        benchmark.benchmark_batch_propagation(&[100, 1000]);
1262        assert_eq!(benchmark.results().len(), 2);
1263    }
1264
1265    #[test]
1266    fn test_benchmark_conflict_analysis() {
1267        let mut benchmark = GpuBenchmark::new();
1268        benchmark.benchmark_conflict_analysis(&[100, 1000]);
1269        assert_eq!(benchmark.results().len(), 2);
1270    }
1271
1272    #[test]
1273    fn test_benchmark_clause_management() {
1274        let mut benchmark = GpuBenchmark::new();
1275        benchmark.benchmark_clause_management(&[100, 1000]);
1276        assert_eq!(benchmark.results().len(), 2);
1277    }
1278
1279    #[test]
1280    fn test_benchmark_summary_report() {
1281        let mut benchmark = GpuBenchmark::new();
1282        benchmark.benchmark_batch_propagation(&[100]);
1283        let report = benchmark.summary_report();
1284        assert!(report.contains("GPU ACCELERATION BENCHMARK"));
1285        assert!(report.contains("batch_propagation"));
1286    }
1287
1288    #[test]
1289    fn test_benchmark_result_cpu_only() {
1290        let result = GpuBenchmarkResult::cpu_only("test", Duration::from_millis(10), 1000);
1291        assert!(!result.gpu_beneficial);
1292        assert!(result.gpu_time.is_none());
1293    }
1294
1295    #[test]
1296    fn test_benchmark_result_comparison() {
1297        let result = GpuBenchmarkResult::comparison(
1298            "test",
1299            Duration::from_millis(100),
1300            Duration::from_millis(50),
1301            1000,
1302        );
1303        assert!(result.gpu_beneficial);
1304        assert!(
1305            (result
1306                .speedup
1307                .expect("Speedup must be calculated for comparison")
1308                - 2.0)
1309                .abs()
1310                < 0.01
1311        );
1312    }
1313
1314    #[test]
1315    fn test_gpu_benefit_analysis_small() {
1316        let analysis = GpuBenefitAnalysis::analyze(1000, 500, 3.0);
1317        assert_eq!(analysis.recommendation, GpuRecommendation::NotRecommended);
1318    }
1319
1320    #[test]
1321    fn test_gpu_benefit_analysis_large() {
1322        let analysis = GpuBenefitAnalysis::analyze(500_000, 100_000, 5.0);
1323        assert_eq!(analysis.recommendation, GpuRecommendation::Recommended);
1324    }
1325
1326    #[test]
1327    fn test_gpu_benefit_analysis_marginal() {
1328        let analysis = GpuBenefitAnalysis::analyze(50_000, 10_000, 4.0);
1329        // Could be Marginal or Recommended depending on exact thresholds
1330        assert!(matches!(
1331            analysis.recommendation,
1332            GpuRecommendation::Marginal | GpuRecommendation::Recommended
1333        ));
1334    }
1335
1336    #[test]
1337    fn test_gpu_benefit_summary() {
1338        let analysis = GpuBenefitAnalysis::analyze(100_000, 50_000, 4.0);
1339        let summary = analysis.summary();
1340        assert!(summary.contains("100000 clauses"));
1341        assert!(summary.contains("50000 variables"));
1342    }
1343
1344    #[test]
1345    fn test_benchmark_clear() {
1346        let mut benchmark = GpuBenchmark::new();
1347        benchmark.benchmark_batch_propagation(&[100]);
1348        assert!(!benchmark.results().is_empty());
1349
1350        benchmark.clear();
1351        assert!(benchmark.results().is_empty());
1352    }
1353}