use trustformers_core::{
errors::Result,
layers::{LayerNorm, Linear},
tensor::Tensor,
Layer,
};
use super::{
config::{BiologicalConfig, MemoryType, PlasticityType},
model::BiologicalModelOutput,
};
#[derive(Debug, Clone)]
pub struct MemoryTrace {
pub content: Tensor,
pub strength: f32,
pub age: f32,
pub memory_type: MemoryType,
}
#[derive(Debug, Clone)]
pub struct SynapticConnection {
pub weight: f32,
pub plasticity_trace: f32,
pub metaplasticity_state: f32,
}
#[derive(Debug)]
pub struct BiologicalMemoryLayer {
pub config: BiologicalConfig,
pub memory_traces: Vec<MemoryTrace>,
pub synaptic_connections: Vec<Vec<SynapticConnection>>,
pub input_projection: Linear,
pub memory_encoding: Linear,
pub memory_retrieval: Linear,
pub output_projection: Linear,
pub layer_norm: LayerNorm,
}
impl BiologicalMemoryLayer {
pub fn new(config: &BiologicalConfig) -> Result<Self> {
let d_model = config.d_model;
let memory_capacity = config.memory_capacity;
let input_projection = Linear::new(d_model, d_model, config.use_bias);
let memory_encoding = Linear::new(d_model, d_model, config.use_bias);
let memory_retrieval = Linear::new(d_model, d_model, config.use_bias);
let output_projection = Linear::new(d_model, d_model, config.use_bias);
let layer_norm = LayerNorm::new(vec![d_model], 1e-12)?;
let mut synaptic_connections = Vec::new();
for _ in 0..memory_capacity {
let mut connections = Vec::new();
for _ in 0..memory_capacity {
connections.push(SynapticConnection {
weight: 0.0,
plasticity_trace: 0.0,
metaplasticity_state: 0.0,
});
}
synaptic_connections.push(connections);
}
Ok(Self {
config: config.clone(),
memory_traces: Vec::new(),
synaptic_connections,
input_projection,
memory_encoding,
memory_retrieval,
output_projection,
layer_norm,
})
}
pub fn forward(&mut self, input: &Tensor) -> Result<Tensor> {
let _batch_size = input.shape()[0];
let seq_len = input.shape()[1];
let mut outputs = Vec::new();
for t in 0..seq_len {
let input_t = input.slice(1, t, t + 1)?.squeeze(1)?;
let output_t = self.forward_timestep(&input_t)?;
outputs.push(output_t);
}
let mut output = outputs[0].clone();
for i in 1..outputs.len() {
output = Tensor::concat(&[output, outputs[i].clone()], 1)?;
}
Ok(output)
}
fn forward_timestep(&mut self, input: &Tensor) -> Result<Tensor> {
let projected_input = self.input_projection.forward(input.clone())?;
let encoded_memory = self.memory_encoding.forward(projected_input.clone())?;
self.store_memory_trace(&encoded_memory)?;
let retrieved_memories = self.retrieve_memories(&projected_input)?;
let combined_input = if !retrieved_memories.is_empty() {
let mut combined = projected_input.clone();
for memory in &retrieved_memories {
combined = combined.add(memory)?;
}
combined.div_scalar(1.0 + retrieved_memories.len() as f32)?
} else {
projected_input
};
let normalized = self.layer_norm.forward(combined_input)?;
let output = self.output_projection.forward(normalized)?;
Ok(output)
}
fn store_memory_trace(&mut self, memory: &Tensor) -> Result<()> {
let memory_capacity = self.config.memory_capacity;
let trace = MemoryTrace {
content: memory.clone(),
strength: 1.0,
age: 0.0,
memory_type: self.config.memory_type.clone(),
};
self.memory_traces.push(trace);
if self.memory_traces.len() > memory_capacity {
let mut min_idx = 0;
let mut min_strength = f32::INFINITY;
for (i, trace) in self.memory_traces.iter().enumerate() {
let effective_strength = trace.strength / (trace.age + 1.0);
if effective_strength < min_strength {
min_strength = effective_strength;
min_idx = i;
}
}
self.memory_traces.remove(min_idx);
}
Ok(())
}
fn retrieve_memories(&mut self, query: &Tensor) -> Result<Vec<Tensor>> {
let mut retrieved = Vec::new();
let threshold = 0.5;
let mut similarities = Vec::new();
for trace in &self.memory_traces {
let similarity = self.compute_similarity(query, &trace.content)?;
similarities.push(similarity);
}
for (i, trace) in self.memory_traces.iter_mut().enumerate() {
let similarity = similarities[i];
if similarity > threshold {
trace.strength += 0.1 * similarity;
trace.strength = trace.strength.min(2.0);
retrieved.push(trace.content.clone());
}
trace.age += 1.0;
}
Ok(retrieved)
}
fn compute_similarity(&self, a: &Tensor, b: &Tensor) -> Result<f32> {
let dot_product = a.mul(b)?.sum_axes(&[1])?.mean()?;
let norm_a = a.pow_scalar(2.0)?.sum_axes(&[1])?.sqrt()?.mean()?;
let norm_b = b.pow_scalar(2.0)?.sum_axes(&[1])?.sqrt()?.mean()?;
let similarity = dot_product.div(&norm_a.mul(&norm_b)?.add_scalar(1e-8)?)?;
similarity.to_scalar()
}
pub fn update_plasticity(&mut self, targets: &Tensor) -> Result<()> {
match self.config.plasticity_type {
PlasticityType::STDP => self.update_stdp_plasticity(targets)?,
PlasticityType::Hebbian => self.update_hebbian_plasticity(targets)?,
PlasticityType::Metaplasticity => self.update_metaplasticity(targets)?,
_ => {}, }
Ok(())
}
fn update_stdp_plasticity(&mut self, _targets: &Tensor) -> Result<()> {
let learning_rate = self.config.learning_rate;
let _stdp_window = self.config.stdp_window;
for i in 0..self.synaptic_connections.len() {
for j in 0..self.synaptic_connections[i].len() {
let connection = &mut self.synaptic_connections[i][j];
let weight_change = learning_rate * connection.plasticity_trace;
connection.weight += weight_change;
connection.plasticity_trace *= 0.95;
}
}
Ok(())
}
fn update_hebbian_plasticity(&mut self, _targets: &Tensor) -> Result<()> {
let learning_rate = self.config.learning_rate;
for i in 0..self.synaptic_connections.len() {
for j in 0..self.synaptic_connections[i].len() {
let connection = &mut self.synaptic_connections[i][j];
let weight_change = learning_rate * connection.plasticity_trace;
connection.weight += weight_change;
}
}
Ok(())
}
fn update_metaplasticity(&mut self, _targets: &Tensor) -> Result<()> {
let learning_rate = self.config.learning_rate;
for i in 0..self.synaptic_connections.len() {
for j in 0..self.synaptic_connections[i].len() {
let connection = &mut self.synaptic_connections[i][j];
let meta_factor = 1.0 / (1.0 + connection.metaplasticity_state);
let weight_change = learning_rate * meta_factor * connection.plasticity_trace;
connection.weight += weight_change;
connection.metaplasticity_state += 0.01 * connection.plasticity_trace.abs();
connection.metaplasticity_state *= 0.99; }
}
Ok(())
}
pub fn reset_memory(&mut self) -> Result<()> {
self.memory_traces.clear();
for connections in &mut self.synaptic_connections {
for connection in connections {
connection.weight = 0.0;
connection.plasticity_trace = 0.0;
connection.metaplasticity_state = 0.0;
}
}
Ok(())
}
pub fn get_memory_traces(&self) -> &Vec<MemoryTrace> {
&self.memory_traces
}
pub fn parameter_count(&self) -> usize {
self.input_projection.parameter_count()
+ self.memory_encoding.parameter_count()
+ self.memory_retrieval.parameter_count()
+ self.output_projection.parameter_count()
+ self.layer_norm.parameter_count()
}
pub fn memory_usage(&self) -> f32 {
let param_memory = self.parameter_count() as f32 * 4.0 / 1_000_000.0;
let memory_traces_size =
self.memory_traces.len() as f32 * self.config.d_model as f32 * 4.0 / 1_000_000.0;
let synaptic_memory =
(self.synaptic_connections.len() * self.synaptic_connections[0].len()) as f32 * 12.0
/ 1_000_000.0; param_memory + memory_traces_size + synaptic_memory
}
}
#[derive(Debug)]
pub struct BiologicalMemory {
pub config: BiologicalConfig,
pub layers: Vec<BiologicalMemoryLayer>,
pub output_projection: Linear,
}
impl BiologicalMemory {
pub fn new(config: &BiologicalConfig) -> Result<Self> {
let mut layers = Vec::new();
for _ in 0..config.n_layer {
layers.push(BiologicalMemoryLayer::new(config)?);
}
let output_projection = Linear::new(config.d_model, config.d_model, config.use_bias);
Ok(Self {
config: config.clone(),
layers,
output_projection,
})
}
pub fn forward(&mut self, input: &Tensor) -> Result<BiologicalModelOutput> {
let mut hidden_states = input.clone();
let mut all_plasticity_traces = Vec::new();
for layer in &mut self.layers {
hidden_states = layer.forward(&hidden_states)?;
let traces = layer.get_memory_traces();
if !traces.is_empty() {
let trace_content = traces[0].content.clone();
all_plasticity_traces.push(trace_content);
}
}
let output = self.output_projection.forward(hidden_states)?;
let plasticity_traces = if !all_plasticity_traces.is_empty() {
let mut stacked = all_plasticity_traces[0].clone();
for i in 1..all_plasticity_traces.len() {
stacked = Tensor::concat(&[stacked, all_plasticity_traces[i].clone()], 1)?;
}
Some(stacked)
} else {
None
};
Ok(BiologicalModelOutput {
hidden_states: output,
spike_trains: None,
memory_states: None,
attention_weights: None,
capsule_outputs: None,
dendritic_activations: None,
plasticity_traces,
})
}
pub fn update_plasticity(&mut self, targets: &Tensor) -> Result<()> {
for layer in &mut self.layers {
layer.update_plasticity(targets)?;
}
Ok(())
}
pub fn reset_states(&mut self) -> Result<()> {
for layer in &mut self.layers {
layer.reset_memory()?;
}
Ok(())
}
pub fn parameter_count(&self) -> usize {
self.layers.iter().map(|l| l.parameter_count()).sum::<usize>()
+ self.output_projection.parameter_count()
}
pub fn memory_usage(&self) -> f32 {
self.layers.iter().map(|l| l.memory_usage()).sum::<f32>()
+ (self.output_projection.parameter_count() as f32 * 4.0 / 1_000_000.0)
}
}