relayrl_framework 0.4.52

A system-oriented, distributed reinforcement learning framework using a Rust backend with Python interfaces.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
//! This module implements the gRPC-based RelayRL agent interface for communicating with the training server.
//! It defines a trait for asynchronous agent operations and provides an implementation that manages the
//! model, handles action requests, collects trajectories, and updates the model when new versions are available.

use crate::proto::relay_rl_route_client::RelayRlRouteClient as RelayRLRouteClient;
use crate::proto::{
    ActionResponse, RelayRlAction as grpc_RelayRLAction, RelayRlModel as RelayRLModel, RequestModel,
    Trajectory as grpc_Trajectory,
};
use crate::sys_utils::config_loader::{DEFAULT_CONFIG_CONTENT, DEFAULT_CONFIG_PATH};

use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::atomic::{AtomicBool, AtomicI64, Ordering};
use std::time::Duration;
use tch::{CModule, Device, IValue, Kind, Tensor, no_grad};
use tokio::fs;
use tokio::sync::{RwLock as TokioRwLock, RwLock, RwLockReadGuard, RwLockWriteGuard};
use tonic::Request;
use tonic::transport::{Channel, Endpoint, Error};

use crate::network::client::agent_wrapper::{convert_generic_dict, validate_model};
use crate::sys_utils::grpc_utils::{deserialize_model, serialize_action};
use crate::sys_utils::config_loader::ConfigLoader;
use crate::types::action::{RelayRLAction, RelayRLData, TensorData};
use crate::types::trajectory::{RelayRLTrajectory, RelayRLTrajectoryTrait};

#[cfg(all(feature = "console-subscriber", not(feature = "python_bindings")))]
use crate::orchestration::tokio::utils::get_or_init_console_subscriber;
#[cfg(not(feature = "python_bindings"))]
use crate::orchestration::tokio::utils::get_or_init_tokio_runtime;
#[cfg(not(feature = "python_bindings"))]
use tokio::runtime::Runtime as TokioRuntime;

use crate::{get_or_create_config_json_path, resolve_config_json_path};

/// Trait defining the asynchronous gRPC agent interface for the RelayRL framework.
///
/// This trait includes methods for:
/// - Performing an initial model handshake,
/// - Requesting actions from the current model,
/// - Flagging the final action of a trajectory,
/// - Sending trajectories to the training server,
/// - Polling for model updates,
/// - And closing the gRPC channel.
#[tonic::async_trait]
pub trait RelayRLAgentGrpcTrait {
    /// Initiates a one-time handshake with the training server to retrieve the initial model.
    async fn initial_model_handshake(&mut self);

    /// Requests an action from the model given the current observation and mask.
    ///
    /// # Arguments
    /// * `obs` - The observation tensor.
    /// * `mask` - The mask tensor.
    /// * `reward` - The immediate reward.
    ///
    /// # Returns
    /// * `Result<Arc<RelayRLAction>, String>` - On success, returns the generated action (wrapped in an Arc);
    ///   on failure, returns an error string.
    async fn request_for_action(
        &mut self,
        obs: Tensor,
        mask: Tensor,
        reward: f32,
    ) -> Result<Arc<RelayRLAction>, String>;

    /// Finalizes the current trajectory by appending a terminal action, sends the trajectory to the server,
    /// polls for any updated model, and then resets the local trajectory buffer.
    ///
    /// # Arguments
    /// * `reward` - The final reward associated with the terminal action.
    async fn flag_last_action(&mut self, reward: f32);

    /// Serializes and sends the current trajectory to the training server.
    ///
    /// # Returns
    /// * `i32` - Returns 1 if the server successfully accepts the trajectory, or 0 otherwise.
    async fn send_trajectory_to_server(&mut self) -> i32;

    /// Polls the training server for an updated model.
    ///
    /// If a new model is available (i.e. non-empty model bytes are received and the version is updated),
    /// the new model is deserialized, loaded, and the local version is updated.
    async fn poll_for_model_update(&mut self);
}

/// gRPC-based RelayRL Agent implementation.
///
/// This struct implements the RelayRLAgentGrpcTrait and provides a gRPC interface
/// for communicating with the training server. It manages the model, handles action requests,
/// collects trajectories, and updates the model as new versions become available.
pub struct RelayRLAgentGrpc {
    /// Status flag indicating whether the agent is active.
    active: Arc<AtomicBool>,
    /// The current model, stored as a TorchScript module.
    /// Wrapped in an Arc<TokioRwLock<>> for thread-safe shared access.
    model: Arc<TokioRwLock<Option<CModule>>>,
    /// File path for saving and loading the model.
    client_model_path: PathBuf,
    /// gRPC client stub used for communication with the training server.
    stub: Option<RelayRLRouteClient<Channel>>,
    /// Local model version number, used to check for updates.
    local_version: AtomicI64,
    /// Buffer holding the sequence of actions (trajectory) collected by the agent.
    current_traj: RelayRLTrajectory,
    /// Tokio runtime (assuming bindings aren't compiled)
    #[cfg(not(feature = "python_bindings"))]
    tokio_runtime: Arc<TokioRuntime>,
}

impl RelayRLAgentGrpc {
    /// Creates a new instance of the gRPC-based RelayRL agent.
    ///
    /// This function loads configuration settings, establishes a gRPC channel to the training server,
    /// and initializes the agent's state including the model (if provided) and trajectory buffer.
    ///
    /// # Arguments
    /// * `model` - An optional initial TorchScript model.
    /// * `config_path` - An optional configuration file path.
    /// * `training_server_address` - An optional address for the training server (defaults to "http://localhost:50051").
    ///
    /// # Returns
    /// * A new instance of RelayRLAgentGrpc.
    pub async fn init_agent(
        model: Option<CModule>,
        config_path: Option<PathBuf>,
        training_server_address: Option<String>,
    ) -> Self {
        #[cfg(all(feature = "console-subscriber", not(feature = "python_bindings")))]
        get_or_init_console_subscriber();
        #[cfg(not(feature = "python_bindings"))]
        let tokio_runtime: Arc<TokioRuntime> = get_or_init_tokio_runtime();

        // Load configloader parameters
        let client_model_path: PathBuf;
        let max_traj_length: u32;
        {
            let config_path = resolve_config_json_path!(config_path);
            let config: ConfigLoader = ConfigLoader::new(None, config_path);
            client_model_path = config.client_model_path;
            max_traj_length = config.max_traj_length;
        }

        // Create and connect the gRPC channel using the specified server address.
        let server_addr: String =
            training_server_address.unwrap_or_else(|| "127.0.0.1:50051".to_string());
        let grpc_endpoint: Endpoint =
            Channel::from_shared(server_addr).expect("Invalid server address");
        let grpc_channel: Option<Channel> = match grpc_endpoint.connect().await {
            Ok(channel) => Some(channel),
            Err(_) => {
                // retry 60 times
                let mut counter: i32 = 60;
                'retry: loop {
                    if counter > 0 {
                        match grpc_endpoint.connect().await {
                            Ok(channel) => Some(channel),
                            Err(_) => {
                                continue;
                            }
                        };
                    } else {
                        break 'retry;
                    }
                    tokio::time::sleep(Duration::from_secs(2)).await;
                }
                None
            }
        };

        let stub = match grpc_channel {
            Some(channel) => Some(RelayRLRouteClient::new(channel)),
            None => {
                panic!("grpc_channel not provided");
            }
        };

        // acquire shared access to the model
        let model_arc: Arc<RwLock<Option<CModule>>> = Arc::new(TokioRwLock::new(model));

        // Initialize the agent's state.
        let mut agent: RelayRLAgentGrpc = Self {
            active: Arc::new(AtomicBool::new(true)),
            model: Arc::clone(&model_arc),
            client_model_path,
            current_traj: RelayRLTrajectory::new(Some(max_traj_length), None),
            stub: stub,
            local_version: AtomicI64::new(0),
            #[cfg(not(feature = "python_bindings"))]
            tokio_runtime,
        };

        let model_is_none = agent.model.read().await.is_none();

        // Handle initial model handshake with server
        if model_is_none {
            agent.initial_model_handshake().await;
        } else {
            validate_model(
                agent
                    .model
                    .read()
                    .await
                    .as_ref()
                    .expect("Failed to read runtime model"),
            );
        }

        agent
    }

    /// Restarts the current instance of the gRPC agent.
    ///
    /// This is an abstraction of the following function operations:
    ///
    /// 1. `disable_agent()`
    /// 2. `enable_agent()`
    ///
    pub async fn restart_agent(
        &mut self,
        training_server_address: Option<String>,
    ) -> Vec<Result<(), Box<dyn std::error::Error>>> {
        let disable_result: Result<(), Box<dyn std::error::Error>> = self.disable_agent().await;
        let enable_result: Result<(), Box<dyn std::error::Error>> =
            self.enable_agent(training_server_address).await;
        vec![disable_result, enable_result]
    }

    /// Disables networking operations for gRPC agent.
    pub async fn disable_agent(&mut self) -> Result<(), Box<dyn std::error::Error>> {
        if self.active.load(Ordering::SeqCst) {
            self.stub.take().ok_or_else(|| {
                Box::new(std::io::Error::new(
                    std::io::ErrorKind::Other,
                    "gRPC stub not found",
                )) as Box<dyn std::error::Error>
            })?;
            self.active.store(false, Ordering::SeqCst);
        }

        Ok(())
    }

    /// Re-enables networking operations for gRPC agent.
    ///
    /// For initial instantiation, use init_agent()
    pub async fn enable_agent(
        &mut self,
        training_server_address: Option<String>,
    ) -> Result<(), Box<dyn std::error::Error>> {
        let result: Result<(), Box<dyn std::error::Error>> = if !self.active.load(Ordering::SeqCst)
        {
            let mut inner_results: Result<(), Box<dyn std::error::Error>> = Ok(());

            let server_addr: String =
                training_server_address.unwrap_or_else(|| "127.0.0.1:50051".to_string());
            let grpc_endpoint: Endpoint =
                Channel::from_shared(server_addr).expect("Invalid server address");
            let grpc_channel: Option<Channel> = match grpc_endpoint.connect().await {
                Ok(channel) => Some(channel),
                Err(err) => {
                    let mut counter: i32 = 60;
                    // retry 60 times
                    'retry: loop {
                        if counter > 0 {
                            match grpc_endpoint.connect().await {
                                Ok(channel) => Some(channel),
                                Err(_) => {
                                    continue;
                                }
                            };
                        } else {
                            inner_results = Err(Box::new(err));
                            break 'retry;
                        };
                        counter -= 1;
                        tokio::time::sleep(Duration::from_secs(2)).await;
                    }
                    None
                }
            };

            self.stub = match grpc_channel {
                Some(channel) => Some(RelayRLRouteClient::new(channel)),
                None => {
                    panic!("grpc_channel not provided");
                }
            };

            let model_is_none: bool = self.model.read().await.is_none();
            if model_is_none {
                self.initial_model_handshake().await;
            }

            inner_results
        } else {
            Ok(())
        };

        result
    }

    pub async fn get_model_version(&self) -> i64 {
        let version: i64 = self.local_version.load(Ordering::SeqCst);
        version
    }
}

#[tonic::async_trait]
impl RelayRLAgentGrpcTrait for RelayRLAgentGrpc {
    /// Initiates a one-time handshake with the training server to retrieve the initial model.
    ///
    /// Sends a RequestModel message with `first_time` set to 1.
    /// If the server responds with a valid model (code == 1 and non-empty model bytes),
    /// the model is deserialized and loaded, and the local version is updated.
    async fn initial_model_handshake(&mut self) {
        println!("[RelayRLAgent - initial_handshake] requesting initial model from server...");
        let req_msg = RequestModel {
            first_time: 1,
            version: 0, // Use simple versioning
        };
        loop {
            match self
                .stub
                .as_mut()
                .expect("Failed to get gRPC stub")
                .client_poll(Request::new(req_msg))
                .await
            {
                Ok(response) => {
                    let resp: RelayRLModel = response.into_inner();
                    if resp.code == 1 && !resp.model.is_empty() {
                        println!("[RelayRLAgent - initial_handshake] Received initial model from server...");
                        match deserialize_model(resp.model) {
                            Ok(loaded_model) => {
                                validate_model(&loaded_model);
                                loaded_model.save(&self.client_model_path).expect("Failed to save runtime model to path");
                                let mut model_lock = self.model.write().await;
                                *model_lock = Some(loaded_model);
                                println!("[RelayRLAgent - initial_handshake] Model loaded, proceeding to RL loop.");
                                break;
                            }
                            Err(e) => {
                                eprintln!("[RelayRLAgent - initial_handshake] Failed to load model: {}", e);
                            }
                        }
                    } else {
                        println!("[RelayRLAgent - initial_handshake] No initial model available. Waiting...");
                    }
                }
                Err(e) => {
                    eprintln!("[RelayRLAgent - initial_handshake] gRPC error during handshake: {}", e);
                }
            }
            tokio::time::sleep(Duration::from_millis(500)).await;
        }
        // After this, always proceed to RL loop!
    }

    /// Requests an action from the current model using the provided observation and mask.
    ///
    /// This function offloads all TorchScript inference to a blocking thread using `spawn_blocking`.
    /// It then constructs an `RelayRLAction` (which contains only safe, serializable data) inside the
    /// blocking thread and returns it back to the async context.
    ///
    /// # Arguments
    /// * `obs` - The observation tensor (moved).
    /// * `mask` - The mask tensor (moved).
    /// * `reward` - The immediate reward.
    async fn request_for_action(
        &mut self,
        obs: Tensor,
        mask: Tensor,
        reward: f32,
    ) -> Result<Arc<RelayRLAction>, String> {
        let action_result = {
            // Lock the model
            let model_guard: RwLockReadGuard<Option<CModule>> = self.model.read().await;
            let model = match &*model_guard {
                Some(m) => m,
                None => return Err("No model available yet!".to_string()),
            };

            // Move Tensors to CPU contiguously
            let obs: Tensor = obs.to_device(Device::Cpu).contiguous();
            let mask: Tensor = mask.to_device(Device::Cpu).contiguous();

            // Convert Tensors -> IValue
            let obs_ivalue = IValue::Tensor(obs.to_kind(Kind::Float));
            let mask_ivalue = IValue::Tensor(mask.to_kind(Kind::Float));
            let inputs: Vec<IValue> = vec![obs_ivalue, mask_ivalue];

            // Execute step(...) in a blocking context
            no_grad(|| {
                let output_ivalue: IValue = model
                    .method_is("step", &inputs)
                    .map_err(|e| format!("Failed to call model.step: {}", e))?;

                // Expect output to be a 2-tuple: (action_tensor, data_dict)
                let outputs: &Vec<IValue> = match output_ivalue {
                    IValue::Tuple(ref tup) if tup.len() == 2 => tup,
                    _ => return Err("step() did not return (action, data_dict) tuple".to_string()),
                };

                // Extract action
                let action_tensor: Tensor = if let IValue::Tensor(t) = &outputs[0] {
                    t.to_kind(Kind::Float)
                } else {
                    Tensor::zeros([], (Kind::Float, Device::Cpu))
                };

                // Convert data
                let data_dict: Option<HashMap<String, RelayRLData>> = match &outputs[1] {
                    IValue::GenericDict(dict) => {
                        Some(convert_generic_dict(dict).expect("Failed to convert data dict"))
                    }
                    _ => Some(HashMap::new()),
                };

                // Build RelayRLAction with Tensors turned into `TensorData`
                let obs_td: TensorData =
                    TensorData::try_from(&obs).expect("Failed to convert obs to TensorData");
                let act_td: TensorData = TensorData::try_from(&action_tensor)
                    .expect("Failed to convert act to TensorData");
                let mask_td: TensorData =
                    TensorData::try_from(&mask).expect("Failed to convert mask to TensorData");

                let r4sa: RelayRLAction = RelayRLAction::new(
                    Some(obs_td),
                    Some(act_td),
                    Some(mask_td),
                    reward,
                    data_dict,
                    false,
                    false,
                );

                Ok(r4sa)
            })
        };

        // Unwrap any error from inside the blocking code
        let r4sa: RelayRLAction = match action_result {
            Ok(r4sa) => r4sa,
            Err(msg) => return Err(msg),
        };

        // Add the action to the current trajectory
        self.current_traj.add_action(&r4sa, false);

        // Return the Arc
        Ok(Arc::new(r4sa))
    }

    /// Finalizes the current trajectory by appending a terminal action, sending the trajectory to the server,
    /// polling for model updates, and resetting the local trajectory.
    ///
    /// A terminal action is created with `done` set to true and appended to the trajectory.
    /// Then, the trajectory is serialized and sent to the server. If the server accepts the trajectory,
    /// the agent polls for an updated model before clearing the trajectory buffer.
    ///
    /// # Arguments
    /// * `reward` - The final reward associated with the terminal action.
    async fn flag_last_action(&mut self, reward: f32) {
        // Create a terminal action (indicating the end of an episode) with the given reward.
        let mut last_action: RelayRLAction =
            RelayRLAction::new(None, None, None, reward, None, true, false);
        last_action.update_reward(reward);
        self.current_traj.add_action(&last_action, false);

        // Send the trajectory to the training server.
        let response_code: i32 = self.send_trajectory_to_server().await;

        if response_code == 0 {
            println!("[RelayRLAgent - flag_last_action] Keep collecting trajectory");
            return;
        }

        // Poll the server for a model update.
        self.poll_for_model_update().await;
    }

    /// Serializes the current trajectory and sends it to the training server.
    ///
    /// Each stored action is converted to its gRPC representation using the `serialize_action` utility.
    /// The actions are then wrapped into a `grpc_Trajectory` message and sent using the gRPC client stub.
    ///
    /// # Returns
    /// * `i32` - Returns 1 if the trajectory was successfully sent, or 0 if it was rejected.
    async fn send_trajectory_to_server(&mut self) -> i32 {
        let mut action_msgs: Vec<grpc_RelayRLAction> = Vec::new();
        // Convert each action in the trajectory into its proto representation.
        for action in &self.current_traj.actions {
            let proto_action: grpc_RelayRLAction = serialize_action(action); // return == grpc_RelayRLAction.
            action_msgs.push(proto_action);
        }
        // Wrap the actions into a grpc_Trajectory message.
        let traj = grpc_Trajectory {
            actions: action_msgs,
        };
        println!("[RelayRLAgent - send_trajectory] Sending trajectory to server...");
        let request: Request<grpc_Trajectory> = Request::new(traj);
        match self
            .stub
            .as_mut()
            .expect("Failed to get gRPC stub")
            .send_actions(request)
            .await
        {
            Ok(response) => {
                let resp: ActionResponse = response.into_inner();
                if resp.code == 1 {
                    println!(
                        "[RelayRLAgent - send_trajectory] Trajectory sent: {}",
                        resp.message
                    );
                    1
                } else {
                    println!(
                        "[RelayRLAgent - send_trajectory] Trajectory rejected: {}",
                        resp.message
                    );
                    0
                }
            }
            Err(e) => {
                eprintln!("[RelayRLAgent - send_trajectory] Trajectory error: {}", e);
                std::process::exit(1);
            }
        }
    }

    /// Polls the training server for an updated model.
    ///
    /// A RequestModel message is sent with `first_time` set to 0 and the current local version.
    /// If a new model is available (i.e. the returned model bytes are non-empty), the model is deserialized,
    /// loaded, and the local version is updated.
    async fn poll_for_model_update(&mut self) {
        println!("[RelayRLAgent - poll_for_model] Polling for model update...");
        let current_version: i64 = self.local_version.load(Ordering::SeqCst);
        let poll_req = RequestModel {
            first_time: 0,
            version: current_version,
        };
        let request: Request<RequestModel> = Request::new(poll_req);
        match self
            .stub
            .as_mut()
            .expect("Failed to get gRPC stub")
            .client_poll(request)
            .await
        {
            Ok(response) => {
                println!("[RelayRLAgent - poll_for_model] Received poll response from server...");
                let resp: RelayRLModel = response.into_inner();
                if resp.code == 1 {
                    if !resp.model.is_empty() {
                        let mut model_lock: RwLockWriteGuard<Option<CModule>> =
                            self.model.write().await;
                        match deserialize_model(resp.model) {
                            Ok(loaded_model) => {
                                // validate and then load new model into memory
                                validate_model(&loaded_model);
                                loaded_model
                                    .save(&self.client_model_path)
                                    .expect("Failed to save runtime model to path");
                                *model_lock = Some(loaded_model);
                                self.local_version = AtomicI64::from(resp.version);
                                println!(
                                    "[RelayRLAgent - poll_for_model] Updated local model from server (poll)."
                                );
                            }
                            Err(e) => {
                                eprintln!(
                                    "[RelayRLAgent - poll_for_model] Failed to load updated model: {}",
                                    e
                                );
                            }
                        }
                    }
                } else if resp.code == 0 {
                    // No update available.
                } else if resp.code == -1 {
                    println!(
                        "[RelayRLAgent - poll_for_model] Server reported error: {}",
                        resp.error
                    );
                }
            }
            Err(e) => {
                eprintln!(
                    "[RelayRLAgent - poll_for_model] gRPC error while polling for model: {}",
                    e
                );
            }
        }
    }
}