1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
define_api_id!(0xaed6_0109_4f07_3c81, "ml-v2");
pub use super::ml_v1::FutureHandle;
pub use super::ml_v1::PollSimple;
pub use super::ml_v1::TrainingHandle;
use crate::FFIResult;
use bytemuck::Pod;
use bytemuck::Zeroable;
#[repr(C)]
#[derive(Clone, Copy, Debug, Default, Pod, Zeroable)]
pub struct ArkExperimentConfig {
pub num_features: u32,
pub num_actions: u32,
}
#[repr(C)]
#[derive(Clone, Copy, Debug, Default, Pod, Zeroable)]
pub struct ExperimentConfigFuture {
/// Future which resolves to an `ArkExperimentConfig`.
pub ark_config: FutureHandle,
/// Future which resolves to a `Vec<u8>`.
pub module_config: FutureHandle,
}
#[allow(clippy::too_many_arguments)]
#[ark_api_macros::ark_bindgen(imports = "ark-ml-v2")]
mod ml {
use super::*;
extern "C" {
/// Set the cid of the module that will be started when training in the cloud with remote workers.
///
/// Hacky workaround for not being able to get info about the current module in the api host.
/// Should be deprecated as soon as a better alternative comes along.
pub fn set_worker_module_link(cid: &str) -> FFIResult<()>;
/// Connects to an existing experiment.
///
/// The promise outputs a `TrainingHandle`.
pub fn connect_to_experiment(
hive_url: &str,
hive_port: u32,
run_id: &str,
) -> FFIResult<FutureHandle>;
/// Starts training.
///
/// The promise outputs a `TrainingHandle`.
pub fn start_training(
hive_url: &str,
hive_port: u32,
game_name: &str,
experiment_name: &str,
num_features: u32,
num_actions: u32,
num_remote_workers: u32,
config: &str,
) -> FFIResult<FutureHandle>;
/// Starts training from a checkpoint.
///
/// The promise outputs a `TrainingHandle`.
pub fn start_training_from_checkpoint(
hive_url: &str,
hive_port: u32,
game_name: &str,
experiment_name: &str,
num_features: u32,
num_actions: u32,
num_remote_workers: u32,
config: &str,
checkpoint: &str,
) -> FFIResult<FutureHandle>;
/// Stops all workers associated with an experiment handle.
///
/// If the experiment has remote workers it will also stop those.
pub fn stop_experiment(context: TrainingHandle) -> FFIResult<()>;
/// Retrieves the configuration for the experiment.
///
/// The user specified configuration is usually a json blob specified when creating the experiment.
/// This may require it to be downloaded from the hive server.
pub fn experiment_config(context: TrainingHandle) -> FFIResult<ExperimentConfigFuture>;
/// Retrieves the configuration for an experiment without connecting to it.
///
/// This is useful for downloading snapshots from experiments that are no longer running on the server
/// and thus it is not possible to connect to them.
///
/// The user specified configuration is usually a json blob specified when creating the experiment.
/// This may require it to be downloaded from the hive server.
pub fn experiment_config_from_registry(
hive_url: &str,
hive_port: u32,
run_id: &str,
) -> FFIResult<ExperimentConfigFuture>;
/// Retrieves the latest snapshot from an experiment without connecting to it.
///
/// This is useful for downloading snapshots from experiments that are no longer running on the server
/// and thus it is not possible to connect to them.
///
/// The future outputa a `Vec<u8>`.
pub fn download_snapshot_from_registry(
hive_url: &str,
hive_port: u32,
run_id: &str,
) -> FFIResult<FutureHandle>;
pub fn poll_future_ark_experiment_config(handle: FutureHandle) -> FFIResult<PollSimple>;
pub fn take_future_ark_experiment_config(
handle: FutureHandle,
data: &mut ArkExperimentConfig,
) -> FFIResult<()>;
}
}
pub use ml::*;