use ghostflow_core::Tensor;
use crate::deep::layers::Dense;
use crate::deep::activations::{ReLU, Tanh};
pub struct DQN {
fc1: Dense,
fc2: Dense,
fc3: Dense,
num_actions: usize,
}
impl DQN {
pub fn new(state_dim: usize, num_actions: usize) -> Self {
DQN {
fc1: Dense::new(state_dim, 128),
fc2: Dense::new(128, 128),
fc3: Dense::new(128, num_actions),
num_actions,
}
}
pub fn forward(&mut self, state: &Tensor, training: bool) -> Tensor {
let mut out = self.fc1.forward(state, training);
out = ReLU::new().forward(&out);
out = self.fc2.forward(&out, training);
out = ReLU::new().forward(&out);
self.fc3.forward(&out, training)
}
}
pub struct DuelingDQN {
feature_layer: Dense,
value_stream: Vec<Dense>,
advantage_stream: Vec<Dense>,
num_actions: usize,
}
impl DuelingDQN {
pub fn new(state_dim: usize, num_actions: usize) -> Self {
DuelingDQN {
feature_layer: Dense::new(state_dim, 128),
value_stream: vec![
Dense::new(128, 128),
Dense::new(128, 1),
],
advantage_stream: vec![
Dense::new(128, 128),
Dense::new(128, num_actions),
],
num_actions,
}
}
pub fn forward(&mut self, state: &Tensor, training: bool) -> Tensor {
let mut features = self.feature_layer.forward(state, training);
features = ReLU::new().forward(&features);
let mut value = features.clone();
for layer in &mut self.value_stream {
value = layer.forward(&value, training);
}
let mut advantage = features;
for layer in &mut self.advantage_stream {
advantage = layer.forward(&advantage, training);
}
self.combine_streams(&value, &advantage)
}
fn combine_streams(&self, value: &Tensor, advantage: &Tensor) -> Tensor {
let value_data = value.data_f32();
let adv_data = advantage.data_f32();
let batch_size = advantage.dims()[0];
let mut result = Vec::new();
for b in 0..batch_size {
let v = value_data[b];
let mut sum = 0.0f32;
for a in 0..self.num_actions {
sum += adv_data[b * self.num_actions + a];
}
let mean_adv = sum / self.num_actions as f32;
for a in 0..self.num_actions {
let adv = adv_data[b * self.num_actions + a];
result.push(v + adv - mean_adv);
}
}
Tensor::from_slice(&result, &[batch_size, self.num_actions]).unwrap()
}
}
pub struct ActorCritic {
shared: Dense,
actor: Vec<Dense>,
critic: Vec<Dense>,
num_actions: usize,
}
impl ActorCritic {
pub fn new(state_dim: usize, num_actions: usize) -> Self {
ActorCritic {
shared: Dense::new(state_dim, 128),
actor: vec![
Dense::new(128, 128),
Dense::new(128, num_actions),
],
critic: vec![
Dense::new(128, 128),
Dense::new(128, 1),
],
num_actions,
}
}
pub fn forward(&mut self, state: &Tensor, training: bool) -> (Tensor, Tensor) {
let mut shared = self.shared.forward(state, training);
shared = ReLU::new().forward(&shared);
let mut policy = shared.clone();
for (i, layer) in self.actor.iter_mut().enumerate() {
policy = layer.forward(&policy, training);
if i < self.actor.len() - 1 {
policy = ReLU::new().forward(&policy);
}
}
policy = self.softmax(&policy);
let mut value = shared;
for layer in &mut self.critic {
value = layer.forward(&value, training);
}
(policy, value)
}
fn softmax(&self, x: &Tensor) -> Tensor {
let data = x.data_f32();
let batch_size = x.dims()[0];
let num_actions = x.dims()[1];
let mut result = vec![0.0f32; data.len()];
for b in 0..batch_size {
let offset = b * num_actions;
let mut max_val = data[offset];
for i in 1..num_actions {
max_val = max_val.max(data[offset + i]);
}
let mut sum = 0.0f32;
for i in 0..num_actions {
let exp_val = (data[offset + i] - max_val).exp();
result[offset + i] = exp_val;
sum += exp_val;
}
for i in 0..num_actions {
result[offset + i] /= sum;
}
}
Tensor::from_slice(&result, x.dims()).unwrap()
}
}
pub struct PPOActor {
fc1: Dense,
fc2: Dense,
mean_layer: Dense,
log_std_layer: Dense,
}
impl PPOActor {
pub fn new(state_dim: usize, action_dim: usize) -> Self {
PPOActor {
fc1: Dense::new(state_dim, 64),
fc2: Dense::new(64, 64),
mean_layer: Dense::new(64, action_dim),
log_std_layer: Dense::new(64, action_dim),
}
}
pub fn forward(&mut self, state: &Tensor, training: bool) -> (Tensor, Tensor) {
let mut out = self.fc1.forward(state, training);
out = Tanh::new().forward(&out);
out = self.fc2.forward(&out, training);
out = Tanh::new().forward(&out);
let mean = self.mean_layer.forward(&out, training);
let log_std = self.log_std_layer.forward(&out, training);
(mean, log_std)
}
}
pub struct PPOCritic {
fc1: Dense,
fc2: Dense,
value_layer: Dense,
}
impl PPOCritic {
pub fn new(state_dim: usize) -> Self {
PPOCritic {
fc1: Dense::new(state_dim, 64),
fc2: Dense::new(64, 64),
value_layer: Dense::new(64, 1),
}
}
pub fn forward(&mut self, state: &Tensor, training: bool) -> Tensor {
let mut out = self.fc1.forward(state, training);
out = Tanh::new().forward(&out);
out = self.fc2.forward(&out, training);
out = Tanh::new().forward(&out);
self.value_layer.forward(&out, training)
}
}
pub struct MAML {
layers: Vec<Dense>,
}
impl MAML {
pub fn new(input_dim: usize, hidden_dim: usize, output_dim: usize) -> Self {
MAML {
layers: vec![
Dense::new(input_dim, hidden_dim),
Dense::new(hidden_dim, hidden_dim),
Dense::new(hidden_dim, output_dim),
],
}
}
pub fn forward(&mut self, x: &Tensor, training: bool) -> Tensor {
let mut out = x.clone();
for (i, layer) in self.layers.iter_mut().enumerate() {
out = layer.forward(&out, training);
if i < self.layers.len() - 1 {
out = ReLU::new().forward(&out);
}
}
out
}
pub fn clone_params(&self) -> Vec<Vec<f32>> {
vec![vec![0.0f32; 100]; self.layers.len()]
}
pub fn set_params(&mut self, _params: Vec<Vec<f32>>) {
}
}
pub struct PrototypicalNetwork {
encoder: Vec<Dense>,
}
impl PrototypicalNetwork {
pub fn new(input_dim: usize, embedding_dim: usize) -> Self {
PrototypicalNetwork {
encoder: vec![
Dense::new(input_dim, 128),
Dense::new(128, 128),
Dense::new(128, embedding_dim),
],
}
}
pub fn encode(&mut self, x: &Tensor, training: bool) -> Tensor {
let mut out = x.clone();
for layer in &mut self.encoder {
out = layer.forward(&out, training);
out = ReLU::new().forward(&out);
}
out
}
pub fn compute_prototypes(&mut self, support_set: &Tensor, labels: &Tensor, training: bool) -> Tensor {
let embeddings = self.encode(support_set, training);
embeddings
}
pub fn classify(&mut self, query: &Tensor, prototypes: &Tensor, training: bool) -> Tensor {
let query_embedding = self.encode(query, training);
self.euclidean_distance(&query_embedding, prototypes)
}
fn euclidean_distance(&self, a: &Tensor, b: &Tensor) -> Tensor {
a.clone()
}
}
pub struct MatchingNetwork {
encoder: Vec<Dense>,
attention: Dense,
}
impl MatchingNetwork {
pub fn new(input_dim: usize, embedding_dim: usize) -> Self {
MatchingNetwork {
encoder: vec![
Dense::new(input_dim, 128),
Dense::new(128, embedding_dim),
],
attention: Dense::new(embedding_dim * 2, 1),
}
}
pub fn encode(&mut self, x: &Tensor, training: bool) -> Tensor {
let mut out = x.clone();
for layer in &mut self.encoder {
out = layer.forward(&out, training);
out = ReLU::new().forward(&out);
}
out
}
pub fn forward(&mut self, support: &Tensor, query: &Tensor, training: bool) -> Tensor {
let support_emb = self.encode(support, training);
let query_emb = self.encode(query, training);
self.compute_attention(&query_emb, &support_emb, training)
}
fn compute_attention(&mut self, query: &Tensor, support: &Tensor, training: bool) -> Tensor {
query.clone()
}
}
pub struct RelationNetwork {
feature_encoder: Vec<Dense>,
relation_module: Vec<Dense>,
}
impl RelationNetwork {
pub fn new(input_dim: usize, embedding_dim: usize) -> Self {
RelationNetwork {
feature_encoder: vec![
Dense::new(input_dim, 128),
Dense::new(128, embedding_dim),
],
relation_module: vec![
Dense::new(embedding_dim * 2, 128),
Dense::new(128, 1),
],
}
}
pub fn encode(&mut self, x: &Tensor, training: bool) -> Tensor {
let mut out = x.clone();
for layer in &mut self.feature_encoder {
out = layer.forward(&out, training);
out = ReLU::new().forward(&out);
}
out
}
pub fn compute_relation(&mut self, x1: &Tensor, x2: &Tensor, training: bool) -> Tensor {
let concat = self.concatenate(x1, x2);
let mut out = concat;
for layer in &mut self.relation_module {
out = layer.forward(&out, training);
out = ReLU::new().forward(&out);
}
out
}
fn concatenate(&self, a: &Tensor, b: &Tensor) -> Tensor {
let a_data = a.data_f32();
let b_data = b.data_f32();
let mut result = Vec::new();
result.extend_from_slice(a_data);
result.extend_from_slice(b_data);
Tensor::from_slice(&result, &[a.dims()[0], a.dims()[1] + b.dims()[1]]).unwrap()
}
}
pub struct SNAIL {
attention_blocks: Vec<SNAILAttentionBlock>,
fc: Dense,
}
struct SNAILAttentionBlock {
attention: Dense,
fc: Dense,
}
impl SNAILAttentionBlock {
fn new(dim: usize) -> Self {
SNAILAttentionBlock {
attention: Dense::new(dim, dim),
fc: Dense::new(dim, dim),
}
}
fn forward(&mut self, x: &Tensor, training: bool) -> Tensor {
let attn = self.attention.forward(x, training);
let out = self.fc.forward(&attn, training);
self.add_tensors(x, &out)
}
fn add_tensors(&self, a: &Tensor, b: &Tensor) -> Tensor {
let a_data = a.data_f32();
let b_data = b.data_f32();
let result: Vec<f32> = a_data.iter()
.zip(b_data.iter())
.map(|(&x, &y)| x + y)
.collect();
Tensor::from_slice(&result, a.dims()).unwrap()
}
}
impl SNAIL {
pub fn new(input_dim: usize, num_blocks: usize, output_dim: usize) -> Self {
SNAIL {
attention_blocks: (0..num_blocks).map(|_| SNAILAttentionBlock::new(input_dim)).collect(),
fc: Dense::new(input_dim, output_dim),
}
}
pub fn forward(&mut self, x: &Tensor, training: bool) -> Tensor {
let mut out = x.clone();
for block in &mut self.attention_blocks {
out = block.forward(&out, training);
}
self.fc.forward(&out, training)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_dqn() {
let mut dqn = DQN::new(4, 2);
let state = Tensor::from_slice(&vec![0.5f32; 1 * 4], &[1, 4]).unwrap();
let q_values = dqn.forward(&state, false);
assert_eq!(q_values.dims()[1], 2);
}
#[test]
fn test_actor_critic() {
let mut ac = ActorCritic::new(4, 2);
let state = Tensor::from_slice(&vec![0.5f32; 1 * 4], &[1, 4]).unwrap();
let (policy, value) = ac.forward(&state, false);
assert_eq!(policy.dims()[1], 2);
assert_eq!(value.dims()[1], 1);
}
#[test]
fn test_maml() {
let mut maml = MAML::new(10, 20, 5);
let input = Tensor::from_slice(&vec![0.5f32; 2 * 10], &[2, 10]).unwrap();
let output = maml.forward(&input, false);
assert_eq!(output.dims()[1], 5);
}
}