1#![deny(missing_docs)]
2
3use std::{
13 borrow::Cow,
14 num::NonZeroU64,
15 sync::{Arc, Mutex, MutexGuard},
16 time::Instant,
17};
18
19use futures::channel::oneshot;
20
21use bytemuck::{cast_slice, pod_read_unaligned, Pod, Zeroable};
22use thiserror::Error;
23use wgpu::util::DeviceExt;
24use zeldhash_miner_core::encode_nonce;
25
26#[cfg_attr(test, allow(dead_code))]
27const WORKGROUP_SIZE: u32 = 256;
28const MAX_RESULTS: usize = 8;
29
30const SHADER_WGSL: &str = include_str!("shader.wgsl");
31
32#[derive(Debug, Clone, PartialEq, Eq)]
34pub struct AdapterSummary {
35 pub name: String,
37 pub backend: String,
39 pub device_type: String,
41}
42
43impl From<wgpu::AdapterInfo> for AdapterSummary {
44 fn from(info: wgpu::AdapterInfo) -> Self {
45 Self {
46 name: info.name,
47 backend: format!("{:?}", info.backend),
48 device_type: format!("{:?}", info.device_type),
49 }
50 }
51}
52
53#[derive(Debug, Clone, PartialEq, Eq)]
55pub struct MineResult {
56 pub nonce: u64,
58 pub txid: [u8; 32],
60}
61
62#[derive(Debug, Error)]
64pub enum GpuError {
65 #[error("WebGPU not available: {0}")]
67 Unavailable(String),
68 #[error("GPU error: {0}")]
70 Internal(String),
71}
72
73#[derive(Clone)]
75#[cfg_attr(test, allow(dead_code))]
76pub struct GpuContext {
77 device: Arc<wgpu::Device>,
78 queue: Arc<wgpu::Queue>,
79 #[allow(dead_code)]
80 adapter_info: wgpu::AdapterInfo,
81 batch_size_cache: Arc<Mutex<Option<u32>>>,
82 pipeline_cache: Arc<Mutex<Option<Arc<GpuPipeline>>>>,
83 fixed_buffers: Arc<Mutex<Option<Arc<FixedBuffers>>>>,
84 io_buffers: Arc<Mutex<Option<IoBuffers>>>,
85}
86
87impl GpuContext {
88 pub async fn init() -> Result<Self, GpuError> {
90 let instance = if cfg!(target_arch = "wasm32") {
91 wgpu::Instance::new(wgpu::InstanceDescriptor {
92 backends: wgpu::Backends::BROWSER_WEBGPU,
93 dx12_shader_compiler: wgpu::Dx12Compiler::Fxc,
94 flags: wgpu::InstanceFlags::default(),
95 gles_minor_version: wgpu::Gles3MinorVersion::Automatic,
96 })
97 } else {
98 wgpu::Instance::new(wgpu::InstanceDescriptor {
99 backends: wgpu::Backends::PRIMARY,
100 dx12_shader_compiler: wgpu::Dx12Compiler::Fxc,
101 flags: wgpu::InstanceFlags::default(),
102 gles_minor_version: wgpu::Gles3MinorVersion::Automatic,
103 })
104 };
105 let adapter = instance
106 .request_adapter(&wgpu::RequestAdapterOptions {
107 power_preference: wgpu::PowerPreference::HighPerformance,
108 compatible_surface: None,
109 force_fallback_adapter: false,
110 })
111 .await
112 .ok_or_else(|| GpuError::Unavailable("no suitable adapter found".into()))?;
113
114 let adapter_info = adapter.get_info();
115 let required_features = wgpu::Features::empty();
116
117 let required_limits = if cfg!(target_arch = "wasm32") {
127 let mut limits = wgpu::Limits::downlevel_webgl2_defaults();
128 limits.max_inter_stage_shader_components = 0;
130 limits
131 } else {
132 adapter.limits()
133 };
134
135 let (device, queue) = adapter
136 .request_device(
137 &wgpu::DeviceDescriptor {
138 label: Some("zeldhash-miner-gpu-device"),
139 required_features,
140 required_limits,
141 },
142 None,
143 )
144 .await
145 .map_err(|e| GpuError::Unavailable(format!("request_device failed: {e}")))?;
146
147 Ok(Self {
148 device: Arc::new(device),
149 queue: Arc::new(queue),
150 adapter_info,
151 batch_size_cache: Arc::new(Mutex::new(None)),
152 pipeline_cache: Arc::new(Mutex::new(None)),
153 fixed_buffers: Arc::new(Mutex::new(None)),
154 io_buffers: Arc::new(Mutex::new(None)),
155 })
156 }
157
158 pub fn adapter_summary(&self) -> AdapterSummary {
160 AdapterSummary::from(self.adapter_info.clone())
161 }
162}
163
164#[derive(Debug, Clone)]
166pub struct MiningBatch<'a> {
167 pub tx_prefix: &'a [u8],
169 pub tx_suffix: &'a [u8],
171 pub start_nonce: u64,
173 pub batch_size: u32,
175 pub target_zeros: u8,
177 pub use_cbor_nonce: bool,
179}
180
181#[repr(C)]
182#[derive(Clone, Copy, Pod, Zeroable)]
183struct MiningParams {
184 start_nonce_lo: u32,
185 start_nonce_hi: u32,
186 batch_size: u32,
187 target_zeros: u32,
188 prefix_len: u32,
189 suffix_len: u32,
190 nonce_len: u32,
191 use_cbor_nonce: u32, _pad2: u32, _pad3: u32,
194 _pad4: u32,
195 _pad5: u32,
196}
197
198#[repr(C, align(16))]
199#[derive(Clone, Copy, Pod, Zeroable)]
200struct ResultEntry {
201 nonce_lo: u32,
202 nonce_hi: u32,
203 txid: [u32; 8],
204 _tail_pad: [u32; 2],
205}
206
207#[repr(C, align(16))]
209#[derive(Clone, Copy, Pod, Zeroable)]
210struct ResultBuffer {
211 found_count: u32,
212 _pad: u32, _align_pad: [u32; 2],
214 results: [ResultEntry; MAX_RESULTS],
215 _tail_pad: [u32; 2],
216 _final_pad: [u32; 2],
217}
218
219#[allow(dead_code)]
221const RESULT_ENTRY_SIZE: usize = 48;
222#[allow(dead_code)]
223const RESULT_BUFFER_HEADER: usize = 16; #[allow(dead_code)]
225const RESULT_BUFFER_TAIL: usize = 16; #[allow(dead_code)]
227const RESULT_BUFFER_SIZE: usize =
228 ((RESULT_BUFFER_HEADER + (MAX_RESULTS * RESULT_ENTRY_SIZE) + RESULT_BUFFER_TAIL + 15) / 16)
229 * 16;
230const _: [(); RESULT_ENTRY_SIZE] = [(); std::mem::size_of::<ResultEntry>()];
231const _: [(); RESULT_BUFFER_SIZE] = [(); std::mem::size_of::<ResultBuffer>()];
232
233#[cfg_attr(test, allow(dead_code))]
234struct GpuPipeline {
235 pipeline: wgpu::ComputePipeline,
236 layout: wgpu::BindGroupLayout,
237}
238
239struct FixedBuffers {
240 result: wgpu::Buffer,
241 staging: wgpu::Buffer,
242}
243
244struct IoBuffers {
245 prefix: wgpu::Buffer,
246 prefix_capacity: u64,
247 suffix: wgpu::Buffer,
248 suffix_capacity: u64,
249 params: wgpu::Buffer,
250 params_capacity: u64,
251}
252
253type IoBuffersCacheGuard<'a> = MutexGuard<'a, Option<IoBuffers>>;
254
255fn min_capacity(size: u64) -> u64 {
256 size.max(16).next_power_of_two()
258}
259
260fn create_buffer(
261 device: &wgpu::Device,
262 label: &str,
263 size: u64,
264 usage: wgpu::BufferUsages,
265) -> wgpu::Buffer {
266 device.create_buffer(&wgpu::BufferDescriptor {
267 label: Some(label),
268 size,
269 usage,
270 mapped_at_creation: false,
271 })
272}
273
274impl IoBuffers {
275 fn new(device: &wgpu::Device, prefix: u64, suffix: u64, params: u64) -> Self {
276 let prefix_capacity = min_capacity(prefix);
277 let suffix_capacity = min_capacity(suffix);
278 let params_capacity = min_capacity(params);
279
280 Self {
281 prefix: create_buffer(
282 device,
283 "zeldhash-miner-gpu-prefix-pooled",
284 prefix_capacity,
285 wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
286 ),
287 prefix_capacity,
288 suffix: create_buffer(
289 device,
290 "zeldhash-miner-gpu-suffix-pooled",
291 suffix_capacity,
292 wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
293 ),
294 suffix_capacity,
295 params: create_buffer(
296 device,
297 "zeldhash-miner-gpu-params-pooled",
298 params_capacity,
299 wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
300 ),
301 params_capacity,
302 }
303 }
304
305 fn ensure_capacity(
306 &mut self,
307 device: &wgpu::Device,
308 prefix: u64,
309 suffix: u64,
310 params: u64,
311 limits: &wgpu::Limits,
312 ) -> Result<(), GpuError> {
313 let max_storage: u64 = limits.max_storage_buffer_binding_size.into();
314 let max_uniform: u64 = limits.max_uniform_buffer_binding_size.into();
315
316 if prefix > max_storage {
317 return Err(GpuError::Internal(format!(
318 "prefix buffer exceeds max storage binding size ({} > {})",
319 prefix, max_storage
320 )));
321 }
322 if suffix > max_storage {
323 return Err(GpuError::Internal(format!(
324 "suffix buffer exceeds max storage binding size ({} > {})",
325 suffix, max_storage
326 )));
327 }
328 if params > max_uniform {
329 return Err(GpuError::Internal(format!(
330 "params buffer exceeds max uniform binding size ({} > {})",
331 params, max_uniform
332 )));
333 }
334
335 let needed_prefix = min_capacity(prefix).min(max_storage);
336 if needed_prefix > self.prefix_capacity {
337 self.prefix = create_buffer(
338 device,
339 "zeldhash-miner-gpu-prefix-pooled",
340 needed_prefix,
341 wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
342 );
343 self.prefix_capacity = needed_prefix;
344 }
345
346 let needed_suffix = min_capacity(suffix).min(max_storage);
347 if needed_suffix > self.suffix_capacity {
348 self.suffix = create_buffer(
349 device,
350 "zeldhash-miner-gpu-suffix-pooled",
351 needed_suffix,
352 wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
353 );
354 self.suffix_capacity = needed_suffix;
355 }
356
357 let needed_params = min_capacity(params).min(max_uniform);
358 if needed_params > self.params_capacity {
359 self.params = create_buffer(
360 device,
361 "zeldhash-miner-gpu-params-pooled",
362 needed_params,
363 wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
364 );
365 self.params_capacity = needed_params;
366 }
367
368 Ok(())
369 }
370}
371
372fn get_or_create_io_buffers(
373 ctx: &GpuContext,
374 prefix: u64,
375 suffix: u64,
376 params: u64,
377) -> Result<IoBuffersCacheGuard<'_>, GpuError> {
378 let limits = ctx.device.limits();
379 let mut guard = ctx
380 .io_buffers
381 .lock()
382 .map_err(|_| GpuError::Internal("buffer cache poisoned".into()))?;
383
384 let buffers = guard.get_or_insert_with(|| IoBuffers::new(&ctx.device, prefix, suffix, params));
385 buffers.ensure_capacity(&ctx.device, prefix, suffix, params, &limits)?;
386
387 Ok(guard)
388}
389
390fn fallback_batch_size(info: &wgpu::AdapterInfo) -> u32 {
391 match info.device_type {
394 wgpu::DeviceType::IntegratedGpu => 100_000,
395 wgpu::DeviceType::DiscreteGpu => 1_000_000,
396 wgpu::DeviceType::VirtualGpu => 200_000,
397 wgpu::DeviceType::Cpu => 25_000,
398 _ => 150_000,
399 }
400}
401
402fn cbor_nonce_len(value: u64) -> u32 {
403 match value {
404 0..=23 => 1,
405 24..=255 => 2,
406 256..=65_535 => 3,
407 65_536..=0xFFFF_FFFF => 5,
408 _ => 9,
409 }
410}
411
412fn nonce_len_for_range(
413 start_nonce: u64,
414 batch_size: u32,
415 use_cbor_nonce: bool,
416) -> Result<u32, GpuError> {
417 if batch_size == 0 {
418 return Err(GpuError::Internal("batch_size must be positive".into()));
419 }
420 let last = start_nonce
421 .checked_add(batch_size as u64 - 1)
422 .ok_or_else(|| GpuError::Internal("nonce range overflow".into()))?;
423
424 let (start_len, last_len) = if use_cbor_nonce {
425 (cbor_nonce_len(start_nonce), cbor_nonce_len(last))
426 } else {
427 (
428 encode_nonce(start_nonce).len() as u32,
429 encode_nonce(last).len() as u32,
430 )
431 };
432
433 if start_len != last_len {
434 return Err(GpuError::Internal(
435 "nonce range crosses byte-length boundary; split batch".into(),
436 ));
437 }
438 Ok(start_len)
439}
440
441fn pad_bytes_to_words(bytes: &[u8]) -> Vec<u32> {
442 let mut padded = bytes.to_vec();
443 while padded.len() % 4 != 0 {
444 padded.push(0);
445 }
446 padded
447 .chunks_exact(4)
448 .map(|chunk| u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]))
449 .collect()
450}
451
452fn to_u8_bytes(words: &[u32; 8]) -> [u8; 32] {
453 let mut out = [0u8; 32];
454 for (i, word) in words.iter().enumerate() {
455 out[i * 4..(i + 1) * 4].copy_from_slice(&word.to_be_bytes());
456 }
457 out
458}
459
460#[cfg_attr(test, allow(dead_code))]
461fn create_shader_module(ctx: &GpuContext) -> wgpu::ShaderModule {
462 ctx.device
463 .create_shader_module(wgpu::ShaderModuleDescriptor {
464 label: Some("zeldhash-miner-gpu-miner-shader-wgsl"),
465 source: wgpu::ShaderSource::Wgsl(Cow::Borrowed(SHADER_WGSL)),
466 })
467}
468
469fn build_pipeline(ctx: &GpuContext) -> Result<GpuPipeline, GpuError> {
470 let shader = create_shader_module(ctx);
471
472 let layout = ctx
473 .device
474 .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
475 label: Some("zeldhash-miner-gpu-bind-layout"),
476 entries: &[
477 wgpu::BindGroupLayoutEntry {
478 binding: 0,
479 visibility: wgpu::ShaderStages::COMPUTE,
480 ty: wgpu::BindingType::Buffer {
481 ty: wgpu::BufferBindingType::Storage { read_only: true },
482 has_dynamic_offset: false,
483 min_binding_size: None,
484 },
485 count: None,
486 },
487 wgpu::BindGroupLayoutEntry {
488 binding: 1,
489 visibility: wgpu::ShaderStages::COMPUTE,
490 ty: wgpu::BindingType::Buffer {
491 ty: wgpu::BufferBindingType::Storage { read_only: true },
492 has_dynamic_offset: false,
493 min_binding_size: None,
494 },
495 count: None,
496 },
497 wgpu::BindGroupLayoutEntry {
498 binding: 2,
499 visibility: wgpu::ShaderStages::COMPUTE,
500 ty: wgpu::BindingType::Buffer {
501 ty: wgpu::BufferBindingType::Uniform,
502 has_dynamic_offset: false,
503 min_binding_size: NonZeroU64::new(
504 std::mem::size_of::<MiningParams>() as u64
505 ),
506 },
507 count: None,
508 },
509 wgpu::BindGroupLayoutEntry {
510 binding: 3,
511 visibility: wgpu::ShaderStages::COMPUTE,
512 ty: wgpu::BindingType::Buffer {
513 ty: wgpu::BufferBindingType::Storage { read_only: false },
514 has_dynamic_offset: false,
515 min_binding_size: NonZeroU64::new(
516 std::mem::size_of::<ResultBuffer>() as u64
517 ),
518 },
519 count: None,
520 },
521 ],
522 });
523
524 let pipeline_layout = ctx
525 .device
526 .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
527 label: Some("zeldhash-miner-gpu-pipeline-layout"),
528 bind_group_layouts: &[&layout],
529 push_constant_ranges: &[],
530 });
531
532 let pipeline = ctx
533 .device
534 .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
535 label: Some("zeldhash-miner-gpu-miner"),
536 layout: Some(&pipeline_layout),
537 module: &shader,
538 entry_point: "main",
539 });
540
541 Ok(GpuPipeline { pipeline, layout })
542}
543
544fn get_or_create_pipeline(ctx: &GpuContext) -> Result<Arc<GpuPipeline>, GpuError> {
545 if let Ok(mut cache) = ctx.pipeline_cache.lock() {
546 if let Some(p) = cache.as_ref() {
547 return Ok(p.clone());
548 }
549 let built = Arc::new(build_pipeline(ctx)?);
550 *cache = Some(built.clone());
551 return Ok(built);
552 }
553
554 Ok(Arc::new(build_pipeline(ctx)?))
556}
557
558fn get_or_create_fixed_buffers(ctx: &GpuContext) -> Result<Arc<FixedBuffers>, GpuError> {
559 let size = std::mem::size_of::<ResultBuffer>() as u64;
560
561 if let Ok(mut cache) = ctx.fixed_buffers.lock() {
562 if let Some(bufs) = cache.as_ref() {
563 return Ok(bufs.clone());
564 }
565
566 let result = ctx.device.create_buffer(&wgpu::BufferDescriptor {
567 label: Some("zeldhash-miner-gpu-results"),
568 size,
569 usage: wgpu::BufferUsages::STORAGE
570 | wgpu::BufferUsages::COPY_SRC
571 | wgpu::BufferUsages::COPY_DST,
572 mapped_at_creation: false,
573 });
574
575 let staging = ctx.device.create_buffer(&wgpu::BufferDescriptor {
576 label: Some("zeldhash-miner-gpu-result-staging"),
577 size,
578 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
579 mapped_at_creation: false,
580 });
581
582 let fixed = Arc::new(FixedBuffers { result, staging });
583 *cache = Some(fixed.clone());
584 return Ok(fixed);
585 }
586
587 let result = ctx.device.create_buffer(&wgpu::BufferDescriptor {
589 label: Some("zeldhash-miner-gpu-results"),
590 size,
591 usage: wgpu::BufferUsages::STORAGE
592 | wgpu::BufferUsages::COPY_SRC
593 | wgpu::BufferUsages::COPY_DST,
594 mapped_at_creation: false,
595 });
596 let staging = ctx.device.create_buffer(&wgpu::BufferDescriptor {
597 label: Some("zeldhash-miner-gpu-result-staging"),
598 size,
599 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
600 mapped_at_creation: false,
601 });
602 Ok(Arc::new(FixedBuffers { result, staging }))
603}
604
605#[cfg_attr(test, allow(dead_code))]
606fn create_buffers(
607 ctx: &GpuContext,
608 pipeline: &GpuPipeline,
609 batch: &MiningBatch<'_>,
610 nonce_len: u32,
611 result_buf: &wgpu::Buffer,
612) -> Result<wgpu::BindGroup, GpuError> {
613 let prefix_words = pad_bytes_to_words(batch.tx_prefix);
614 let suffix_words = pad_bytes_to_words(batch.tx_suffix);
615
616 let prefix_size = (prefix_words.len() * std::mem::size_of::<u32>()) as u64;
617 let suffix_size = (suffix_words.len() * std::mem::size_of::<u32>()) as u64;
618 let params_size = std::mem::size_of::<MiningParams>() as u64;
619
620 let buffers_guard = get_or_create_io_buffers(ctx, prefix_size, suffix_size, params_size)?;
621 let buffers = buffers_guard
622 .as_ref()
623 .expect("io buffers must be initialized before use");
624
625 if !prefix_words.is_empty() {
626 ctx.queue
627 .write_buffer(&buffers.prefix, 0, cast_slice(&prefix_words));
628 }
629 if !suffix_words.is_empty() {
630 ctx.queue
631 .write_buffer(&buffers.suffix, 0, cast_slice(&suffix_words));
632 }
633
634 let params = MiningParams {
635 start_nonce_lo: batch.start_nonce as u32,
636 start_nonce_hi: (batch.start_nonce >> 32) as u32,
637 batch_size: batch.batch_size,
638 target_zeros: batch.target_zeros as u32,
639 prefix_len: batch.tx_prefix.len() as u32,
640 suffix_len: batch.tx_suffix.len() as u32,
641 nonce_len,
642 use_cbor_nonce: batch.use_cbor_nonce as u32,
643 _pad2: 0,
644 _pad3: 0,
645 _pad4: 0,
646 _pad5: 0,
647 };
648 ctx.queue.write_buffer(
649 &buffers.params,
650 0,
651 cast_slice(std::slice::from_ref(¶ms)),
652 );
653
654 let bind_group = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
655 label: Some("zeldhash-miner-gpu-bind-group"),
656 layout: &pipeline.layout,
657 entries: &[
658 wgpu::BindGroupEntry {
659 binding: 0,
660 resource: buffers.prefix.as_entire_binding(),
661 },
662 wgpu::BindGroupEntry {
663 binding: 1,
664 resource: buffers.suffix.as_entire_binding(),
665 },
666 wgpu::BindGroupEntry {
667 binding: 2,
668 resource: buffers.params.as_entire_binding(),
669 },
670 wgpu::BindGroupEntry {
671 binding: 3,
672 resource: result_buf.as_entire_binding(),
673 },
674 ],
675 });
676
677 Ok(bind_group)
678}
679
680#[cfg_attr(test, allow(dead_code))]
681fn parse_results(mapped: &[u8]) -> Vec<MineResult> {
682 let required = std::mem::size_of::<ResultBuffer>();
683 if mapped.len() < required {
684 return Vec::new();
685 }
686
687 let buffer: ResultBuffer = pod_read_unaligned(mapped);
691 let found = buffer.found_count as usize;
692 let take = found.min(MAX_RESULTS);
693
694 let mut out = Vec::with_capacity(take);
695 for entry in buffer.results.iter().take(take) {
696 let nonce = ((entry.nonce_hi as u64) << 32) | entry.nonce_lo as u64;
697 out.push(MineResult {
698 nonce,
699 txid: to_u8_bytes(&entry.txid),
700 });
701 }
702 out
703}
704
705async fn dispatch_gpu(
706 ctx: &GpuContext,
707 batch: &MiningBatch<'_>,
708 nonce_len: u32,
709) -> Result<Vec<MineResult>, GpuError> {
710 if batch.batch_size == 0 {
711 return Ok(Vec::new());
712 }
713
714 let pipeline = get_or_create_pipeline(ctx)?;
715 let fixed = get_or_create_fixed_buffers(ctx)?;
716
717 let zero_template = vec![0u8; std::mem::size_of::<ResultBuffer>()];
719 ctx.queue.write_buffer(&fixed.result, 0, &zero_template);
720
721 let bind_group = create_buffers(ctx, &pipeline, batch, nonce_len, &fixed.result)?;
722
723 let mut encoder = ctx
724 .device
725 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
726 label: Some("zeldhash-miner-gpu-encoder"),
727 });
728
729 {
730 let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
731 label: Some("zeldhash-miner-gpu-compute-pass"),
732 timestamp_writes: None,
733 });
734 cpass.set_pipeline(&pipeline.pipeline);
735 cpass.set_bind_group(0, &bind_group, &[]);
736 let groups = (batch.batch_size + WORKGROUP_SIZE - 1) / WORKGROUP_SIZE;
737 cpass.dispatch_workgroups(groups, 1, 1);
738 }
739
740 encoder.copy_buffer_to_buffer(
741 &fixed.result,
742 0,
743 &fixed.staging,
744 0,
745 std::mem::size_of::<ResultBuffer>() as u64,
746 );
747
748 ctx.queue.submit(Some(encoder.finish()));
749
750 let (sender, receiver) = oneshot::channel();
751 fixed
752 .staging
753 .slice(..)
754 .map_async(wgpu::MapMode::Read, move |res| {
755 let _ = sender.send(res);
756 });
757
758 ctx.device.poll(wgpu::Maintain::Wait);
759
760 receiver
761 .await
762 .map_err(|e| GpuError::Internal(format!("failed to receive map result: {e}")))?
763 .map_err(|e| GpuError::Internal(format!("failed to map results: {e:?}")))?;
764
765 let data = fixed.staging.slice(..).get_mapped_range();
766 let parsed = parse_results(&data);
767 drop(data);
768 fixed.staging.unmap();
769 Ok(parsed)
770}
771
772pub async fn dispatch_mining_batch(
774 ctx: &GpuContext,
775 batch: &MiningBatch<'_>,
776) -> Result<Vec<MineResult>, GpuError> {
777 let nonce_len = nonce_len_for_range(batch.start_nonce, batch.batch_size, batch.use_cbor_nonce)?;
778 dispatch_gpu(ctx, batch, nonce_len).await
779}
780
781pub async fn calibrate_batch_size(ctx: &GpuContext) -> Result<u32, GpuError> {
783 if let Ok(cache) = ctx.batch_size_cache.lock() {
785 if let Some(value) = *cache {
786 return Ok(value);
787 }
788 }
789
790 let candidates = [1_000u32, 10_000, 100_000, 1_000_000];
792 let mut best = 100_000u32;
793 let mut best_hps = 0.0f64;
794
795 const DUMMY: &[u8] = &[0u8];
797 let pipeline = get_or_create_pipeline(ctx)?;
798
799 let prefix_words = pad_bytes_to_words(DUMMY);
800 let suffix_words = pad_bytes_to_words(DUMMY);
801 let params_template = MiningParams {
802 start_nonce_lo: 0,
803 start_nonce_hi: 0,
804 batch_size: 1,
805 target_zeros: 64, prefix_len: DUMMY.len() as u32,
807 suffix_len: DUMMY.len() as u32,
808 nonce_len: 1,
809 use_cbor_nonce: 0,
810 _pad2: 0,
811 _pad3: 0,
812 _pad4: 0,
813 _pad5: 0,
814 };
815
816 let prefix_buf = ctx
817 .device
818 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
819 label: Some("zeldhash-miner-gpu-prefix-calibration"),
820 contents: cast_slice(&prefix_words),
821 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
822 });
823 let suffix_buf = ctx
824 .device
825 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
826 label: Some("zeldhash-miner-gpu-suffix-calibration"),
827 contents: cast_slice(&suffix_words),
828 usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
829 });
830 let params_buf = ctx
831 .device
832 .create_buffer_init(&wgpu::util::BufferInitDescriptor {
833 label: Some("zeldhash-miner-gpu-params-calibration"),
834 contents: cast_slice(std::slice::from_ref(¶ms_template)),
835 usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
836 });
837 let result_buf = ctx.device.create_buffer(&wgpu::BufferDescriptor {
838 label: Some("zeldhash-miner-gpu-results-calibration"),
839 size: std::mem::size_of::<ResultBuffer>() as u64,
840 usage: wgpu::BufferUsages::STORAGE
841 | wgpu::BufferUsages::COPY_SRC
842 | wgpu::BufferUsages::COPY_DST,
843 mapped_at_creation: false,
844 });
845 let staging = ctx.device.create_buffer(&wgpu::BufferDescriptor {
846 label: Some("zeldhash-miner-gpu-result-staging-calibration"),
847 size: std::mem::size_of::<ResultBuffer>() as u64,
848 usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
849 mapped_at_creation: false,
850 });
851
852 let bind_group = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
853 label: Some("zeldhash-miner-gpu-bind-group-calibration"),
854 layout: &pipeline.layout,
855 entries: &[
856 wgpu::BindGroupEntry {
857 binding: 0,
858 resource: prefix_buf.as_entire_binding(),
859 },
860 wgpu::BindGroupEntry {
861 binding: 1,
862 resource: suffix_buf.as_entire_binding(),
863 },
864 wgpu::BindGroupEntry {
865 binding: 2,
866 resource: params_buf.as_entire_binding(),
867 },
868 wgpu::BindGroupEntry {
869 binding: 3,
870 resource: result_buf.as_entire_binding(),
871 },
872 ],
873 });
874
875 let zero_template = vec![0u8; std::mem::size_of::<ResultBuffer>()];
876
877 for &size in &candidates {
878 let mut params = params_template;
879 params.batch_size = size;
880
881 ctx.queue
882 .write_buffer(¶ms_buf, 0, cast_slice(std::slice::from_ref(¶ms)));
883 ctx.queue.write_buffer(&result_buf, 0, &zero_template);
884
885 let start = Instant::now();
886 let mut encoder = ctx
887 .device
888 .create_command_encoder(&wgpu::CommandEncoderDescriptor {
889 label: Some("zeldhash-miner-gpu-calibration-encoder"),
890 });
891 {
892 let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
893 label: Some("zeldhash-miner-gpu-calibration-pass"),
894 timestamp_writes: None,
895 });
896 cpass.set_pipeline(&pipeline.pipeline);
897 cpass.set_bind_group(0, &bind_group, &[]);
898 let groups = (size + WORKGROUP_SIZE - 1) / WORKGROUP_SIZE;
899 cpass.dispatch_workgroups(groups, 1, 1);
900 }
901
902 encoder.copy_buffer_to_buffer(
903 &result_buf,
904 0,
905 &staging,
906 0,
907 std::mem::size_of::<ResultBuffer>() as u64,
908 );
909
910 ctx.queue.submit(Some(encoder.finish()));
911 let (sender, receiver) = oneshot::channel();
912 staging
913 .slice(..)
914 .map_async(wgpu::MapMode::Read, move |res| {
915 let _ = sender.send(res);
916 });
917
918 ctx.device.poll(wgpu::Maintain::Wait);
919
920 receiver
921 .await
922 .map_err(|e| GpuError::Internal(format!("failed to receive map result: {e}")))?
923 .map_err(|e| GpuError::Internal(format!("failed to map results: {e:?}")))?;
924
925 staging.unmap();
927
928 let elapsed = start.elapsed().as_secs_f64();
929 if elapsed == 0.0 {
930 continue;
931 }
932 let hps = size as f64 / elapsed;
933 if hps > best_hps {
934 best_hps = hps;
935 best = size;
936 }
937 }
938
939 let best_final = if best_hps == 0.0 {
940 fallback_batch_size(&ctx.adapter_info)
941 } else {
942 best
943 };
944
945 if let Ok(mut cache) = ctx.batch_size_cache.lock() {
946 *cache = Some(best_final);
947 }
948
949 Ok(best_final)
950}
951
952#[cfg(test)]
953mod tests {
954 use super::*;
955 use std::time::Duration;
956
957 fn cpu_mine(batch: &MiningBatch<'_>) -> Vec<MineResult> {
958 let nonce_len =
959 nonce_len_for_range(batch.start_nonce, batch.batch_size, batch.use_cbor_nonce)
960 .expect("valid nonce range");
961 let mut buf = Vec::new();
962 let mut out = Vec::new();
963 for offset in 0..batch.batch_size {
964 if let Some(nonce) = batch.start_nonce.checked_add(offset as u64) {
965 buf.clear();
966 buf.extend_from_slice(batch.tx_prefix);
967 if batch.use_cbor_nonce {
968 let encoded = zeldhash_miner_core::cbor::encode_cbor_uint(nonce);
969 assert_eq!(encoded.len(), nonce_len as usize);
970 buf.extend_from_slice(&encoded);
971 } else {
972 let be = nonce.to_be_bytes();
973 let start = 8 - nonce_len as usize;
974 buf.extend_from_slice(&be[start..]);
975 }
976 buf.extend_from_slice(batch.tx_suffix);
977 let hash = zeldhash_miner_core::double_sha256(&buf);
978 if zeldhash_miner_core::hash_meets_target(&hash, batch.target_zeros) {
979 out.push(MineResult { nonce, txid: hash });
980 }
981 }
982 }
983 out
984 }
985
986 #[test]
987 fn pads_bytes_to_words() {
988 let words = pad_bytes_to_words(&[0x01, 0x02, 0x03]);
989 assert_eq!(words.len(), 1);
990 assert_eq!(words[0], 0x030201);
991 }
992
993 #[test]
994 fn converts_words_to_bytes() {
995 let words = [0x11223344u32; 8];
996 let bytes = to_u8_bytes(&words);
997 assert_eq!(bytes[0], 0x11);
998 assert_eq!(bytes[1], 0x22);
999 assert_eq!(bytes[2], 0x33);
1000 assert_eq!(bytes[3], 0x44);
1001 }
1002
1003 #[test]
1004 fn gpu_matches_cpu_when_available() {
1005 let ctx = pollster::block_on(GpuContext::init());
1006 let ctx = match ctx {
1007 Ok(c) => c,
1008 Err(_) => return, };
1010
1011 let batch = MiningBatch {
1012 tx_prefix: b"hello",
1013 tx_suffix: b"world",
1014 start_nonce: 0,
1015 batch_size: 64,
1016 target_zeros: 1,
1017 use_cbor_nonce: false,
1018 };
1019
1020 let mut cpu = cpu_mine(&batch);
1021 let mut gpu = pollster::block_on(dispatch_mining_batch(&ctx, &batch)).unwrap();
1022
1023 cpu.sort_by_key(|r| r.nonce);
1024 gpu.sort_by_key(|r| r.nonce);
1025 assert_eq!(cpu, gpu);
1026 }
1027
1028 #[test]
1029 fn gpu_collects_multiple_results_up_to_max_when_available() {
1030 let ctx = pollster::block_on(GpuContext::init());
1031 let ctx = match ctx {
1032 Ok(c) => c,
1033 Err(_) => return, };
1035
1036 let batch = MiningBatch {
1037 tx_prefix: b"a",
1038 tx_suffix: b"b",
1039 start_nonce: 0,
1040 batch_size: (MAX_RESULTS as u32) + 2,
1041 target_zeros: 0, use_cbor_nonce: false,
1043 };
1044
1045 let gpu_results =
1046 pollster::block_on(dispatch_mining_batch(&ctx, &batch)).expect("gpu dispatch failed");
1047 assert_eq!(
1048 gpu_results.len(),
1049 MAX_RESULTS.min(batch.batch_size as usize)
1050 );
1051 }
1052
1053 #[test]
1054 fn integrated_gpu_target_hash_rate_calculation() {
1055 let integrated = fallback_batch_size(&wgpu::AdapterInfo {
1059 name: String::from("test-integrated"),
1060 vendor: 0,
1061 device: 0,
1062 device_type: wgpu::DeviceType::IntegratedGpu,
1063 backend: wgpu::Backend::Vulkan,
1064 driver: String::new(),
1065 driver_info: String::new(),
1066 });
1067
1068 assert_eq!(integrated, 100_000);
1069
1070 let discrete = fallback_batch_size(&wgpu::AdapterInfo {
1071 name: String::from("test-discrete"),
1072 vendor: 0,
1073 device: 0,
1074 device_type: wgpu::DeviceType::DiscreteGpu,
1075 backend: wgpu::Backend::Vulkan,
1076 driver: String::new(),
1077 driver_info: String::new(),
1078 });
1079 assert!(discrete > integrated);
1080
1081 let elapsed = Duration::from_millis(5); let rate = integrated as f64 / elapsed.as_secs_f64();
1083 assert!(rate >= 10_000_000.0, "expected >= 10 MH/s, got {rate} H/s");
1084 }
1085}