1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
use crate::{
    channel::ComputeChannel,
    server::{ComputeServer, Handle},
    tune::{AutotuneOperationSet, Tuner},
};
use alloc::vec::Vec;
use alloc::{boxed::Box, sync::Arc};
use burn_common::reader::Reader;
use burn_common::stub::RwLock;

/// The ComputeClient is the entry point to require tasks from the ComputeServer.
/// It should be obtained for a specific device via the Compute struct.
#[derive(Debug)]
pub struct ComputeClient<Server: ComputeServer, Channel> {
    channel: Channel,
    tuner: Arc<RwLock<Tuner<Server, Channel>>>,
}

impl<S, C> Clone for ComputeClient<S, C>
where
    S: ComputeServer,
    C: ComputeChannel<S>,
{
    fn clone(&self) -> Self {
        Self {
            channel: self.channel.clone(),
            tuner: self.tuner.clone(),
        }
    }
}

impl<Server, Channel> ComputeClient<Server, Channel>
where
    Server: ComputeServer,
    Channel: ComputeChannel<Server>,
{
    /// Create a new client.
    pub fn new(channel: Channel, tuner: Arc<RwLock<Tuner<Server, Channel>>>) -> Self {
        Self { channel, tuner }
    }

    /// Given a handle, returns owned resource as bytes.
    pub fn read(&self, handle: &Handle<Server>) -> Reader<Vec<u8>> {
        self.channel.read(handle)
    }

    /// Given a resource, stores it and returns the resource handle.
    pub fn create(&self, data: &[u8]) -> Handle<Server> {
        self.channel.create(data)
    }

    /// Reserves `size` bytes in the storage, and returns a handle over them.
    pub fn empty(&self, size: usize) -> Handle<Server> {
        self.channel.empty(size)
    }

    /// Executes the `kernel` over the given `handles`.
    pub fn execute(&self, kernel: Server::Kernel, handles: &[&Handle<Server>]) {
        self.channel.execute(kernel, handles)
    }

    /// Wait for the completion of every task in the server.
    pub fn sync(&self) {
        self.channel.sync()
    }

    /// Executes the fastest kernel in the autotune operation, using (cached) runtime benchmarks
    pub fn autotune_execute(
        &self,
        autotune_operation_set: Box<dyn AutotuneOperationSet<Server::AutotuneKey>>,
    ) {
        self.tuner
            .write()
            .unwrap()
            .execute_autotune(autotune_operation_set, self);
    }

    /// Get the fastest kernel for the given autotune key if it exists.
    pub fn autotune_result(&self, key: &Server::AutotuneKey) -> Option<usize> {
        self.tuner.read().unwrap().autotune_fastest(key)
    }
}