// Module: stdlib/rl/actor_critic.tern
// Purpose: Actor-Critic RL Methods
// Author: RFI-IRFOS
// Ref: https://ternlang.com
// Balances policy (Actor) and value (Critic) updates.
struct ActorCritic {
actor: trittensor<4 x 4>,
critic: trittensor<4 x 4>
}
fn actor_forward_trit(model: ActorCritic, state: trittensor<4 x 1>) -> trit {
@sparseskip
let out: trittensor<4 x 1> = model.actor * state;
return out[0, 0];
}
fn critic_forward_trit(model: ActorCritic, state: trittensor<4 x 1>) -> trit {
@sparseskip
let out: trittensor<4 x 1> = model.critic * state;
return out[0, 0];
}
fn ac_update_trit(actor_loss: trit, critic_loss: trit) -> trit {
if actor_loss == tend { return tend; }
if critic_loss == tend { return tend; }
return affirm;
}