use ghostflow_core::Tensor;
use crate::deep::layers::{Conv2d, Dense, BatchNorm2d};
use crate::deep::activations::ReLU;
pub struct Conv3d {
weight: Dense,
in_channels: usize,
out_channels: usize,
kernel_size: (usize, usize, usize),
}
impl Conv3d {
pub fn new(in_channels: usize, out_channels: usize, kernel_size: (usize, usize, usize)) -> Self {
let kernel_volume = kernel_size.0 * kernel_size.1 * kernel_size.2;
Conv3d {
weight: Dense::new(in_channels * kernel_volume, out_channels),
in_channels,
out_channels,
kernel_size,
}
}
pub fn forward(&mut self, x: &Tensor, training: bool) -> Tensor {
self.weight.forward(x, training)
}
}
pub struct C3D {
conv1: Conv3d,
conv2: Conv3d,
conv3a: Conv3d,
conv3b: Conv3d,
conv4a: Conv3d,
conv4b: Conv3d,
conv5a: Conv3d,
conv5b: Conv3d,
fc6: Dense,
fc7: Dense,
fc8: Dense,
}
impl C3D {
pub fn new(num_classes: usize) -> Self {
C3D {
conv1: Conv3d::new(3, 64, (3, 3, 3)),
conv2: Conv3d::new(64, 128, (3, 3, 3)),
conv3a: Conv3d::new(128, 256, (3, 3, 3)),
conv3b: Conv3d::new(256, 256, (3, 3, 3)),
conv4a: Conv3d::new(256, 512, (3, 3, 3)),
conv4b: Conv3d::new(512, 512, (3, 3, 3)),
conv5a: Conv3d::new(512, 512, (3, 3, 3)),
conv5b: Conv3d::new(512, 512, (3, 3, 3)),
fc6: Dense::new(512, 4096),
fc7: Dense::new(4096, 4096),
fc8: Dense::new(4096, num_classes),
}
}
pub fn forward(&mut self, x: &Tensor, training: bool) -> Tensor {
let mut out = self.conv1.forward(x, training);
out = ReLU::new().forward(&out);
out = self.conv2.forward(&out, training);
out = ReLU::new().forward(&out);
out = self.conv3a.forward(&out, training);
out = ReLU::new().forward(&out);
out = self.conv3b.forward(&out, training);
out = ReLU::new().forward(&out);
out = self.conv4a.forward(&out, training);
out = ReLU::new().forward(&out);
out = self.conv4b.forward(&out, training);
out = ReLU::new().forward(&out);
out = self.conv5a.forward(&out, training);
out = ReLU::new().forward(&out);
out = self.conv5b.forward(&out, training);
out = ReLU::new().forward(&out);
out = self.fc6.forward(&out, training);
out = ReLU::new().forward(&out);
out = self.fc7.forward(&out, training);
out = ReLU::new().forward(&out);
self.fc8.forward(&out, training)
}
}
pub struct I3D {
conv3d_1a: Conv3d,
conv3d_2a: Conv3d,
conv3d_2b: Conv3d,
conv3d_3a: Conv3d,
conv3d_3b: Conv3d,
conv3d_4a: Conv3d,
conv3d_4b: Conv3d,
conv3d_5a: Conv3d,
conv3d_5b: Conv3d,
fc: Dense,
}
impl I3D {
pub fn new(num_classes: usize) -> Self {
I3D {
conv3d_1a: Conv3d::new(3, 64, (7, 7, 7)),
conv3d_2a: Conv3d::new(64, 64, (1, 1, 1)),
conv3d_2b: Conv3d::new(64, 192, (3, 3, 3)),
conv3d_3a: Conv3d::new(192, 192, (1, 1, 1)),
conv3d_3b: Conv3d::new(192, 480, (3, 3, 3)),
conv3d_4a: Conv3d::new(480, 480, (1, 1, 1)),
conv3d_4b: Conv3d::new(480, 832, (3, 3, 3)),
conv3d_5a: Conv3d::new(832, 832, (1, 1, 1)),
conv3d_5b: Conv3d::new(832, 1024, (3, 3, 3)),
fc: Dense::new(1024, num_classes),
}
}
pub fn forward(&mut self, x: &Tensor, training: bool) -> Tensor {
let mut out = self.conv3d_1a.forward(x, training);
out = ReLU::new().forward(&out);
out = self.conv3d_2a.forward(&out, training);
out = ReLU::new().forward(&out);
out = self.conv3d_2b.forward(&out, training);
out = ReLU::new().forward(&out);
out = self.conv3d_3a.forward(&out, training);
out = ReLU::new().forward(&out);
out = self.conv3d_3b.forward(&out, training);
out = ReLU::new().forward(&out);
out = self.conv3d_4a.forward(&out, training);
out = ReLU::new().forward(&out);
out = self.conv3d_4b.forward(&out, training);
out = ReLU::new().forward(&out);
out = self.conv3d_5a.forward(&out, training);
out = ReLU::new().forward(&out);
out = self.conv3d_5b.forward(&out, training);
out = ReLU::new().forward(&out);
self.fc.forward(&out, training)
}
}
pub struct SlowFast {
slow_pathway: SlowPathway,
fast_pathway: FastPathway,
fusion: Dense,
fc: Dense,
}
struct SlowPathway {
conv1: Conv3d,
conv2: Conv3d,
conv3: Conv3d,
}
impl SlowPathway {
fn new() -> Self {
SlowPathway {
conv1: Conv3d::new(3, 64, (1, 7, 7)),
conv2: Conv3d::new(64, 128, (1, 3, 3)),
conv3: Conv3d::new(128, 256, (1, 3, 3)),
}
}
fn forward(&mut self, x: &Tensor, training: bool) -> Tensor {
let mut out = self.conv1.forward(x, training);
out = ReLU::new().forward(&out);
out = self.conv2.forward(&out, training);
out = ReLU::new().forward(&out);
out = self.conv3.forward(&out, training);
ReLU::new().forward(&out)
}
}
struct FastPathway {
conv1: Conv3d,
conv2: Conv3d,
conv3: Conv3d,
}
impl FastPathway {
fn new() -> Self {
FastPathway {
conv1: Conv3d::new(3, 8, (5, 7, 7)),
conv2: Conv3d::new(8, 16, (3, 3, 3)),
conv3: Conv3d::new(16, 32, (3, 3, 3)),
}
}
fn forward(&mut self, x: &Tensor, training: bool) -> Tensor {
let mut out = self.conv1.forward(x, training);
out = ReLU::new().forward(&out);
out = self.conv2.forward(&out, training);
out = ReLU::new().forward(&out);
out = self.conv3.forward(&out, training);
ReLU::new().forward(&out)
}
}
impl SlowFast {
pub fn new(num_classes: usize) -> Self {
SlowFast {
slow_pathway: SlowPathway::new(),
fast_pathway: FastPathway::new(),
fusion: Dense::new(256 + 32, 512),
fc: Dense::new(512, num_classes),
}
}
pub fn forward(&mut self, x_slow: &Tensor, x_fast: &Tensor, training: bool) -> Tensor {
let slow_out = self.slow_pathway.forward(x_slow, training);
let fast_out = self.fast_pathway.forward(x_fast, training);
let fused = self.concatenate(&slow_out, &fast_out);
let mut out = self.fusion.forward(&fused, training);
out = ReLU::new().forward(&out);
self.fc.forward(&out, training)
}
fn concatenate(&self, a: &Tensor, b: &Tensor) -> Tensor {
let a_data = a.data_f32();
let b_data = b.data_f32();
let mut result = Vec::new();
result.extend_from_slice(a_data);
result.extend_from_slice(b_data);
Tensor::from_slice(&result, &[a.dims()[0], a.dims()[1] + b.dims()[1]]).unwrap()
}
}
pub struct WaveNet {
causal_conv: Conv2d,
residual_blocks: Vec<WaveNetBlock>,
output_conv: Conv2d,
}
struct WaveNetBlock {
dilated_conv: Conv2d,
residual_conv: Conv2d,
skip_conv: Conv2d,
dilation: usize,
}
impl WaveNetBlock {
fn new(channels: usize, dilation: usize) -> Self {
WaveNetBlock {
dilated_conv: Conv2d::new(channels, channels, (2, 1)),
residual_conv: Conv2d::new(channels, channels, (1, 1)),
skip_conv: Conv2d::new(channels, channels, (1, 1)),
dilation,
}
}
fn forward(&mut self, x: &Tensor, training: bool) -> (Tensor, Tensor) {
let dilated = self.dilated_conv.forward(x, training);
let gated = self.gated_activation(&dilated);
let residual = self.residual_conv.forward(&gated, training);
let skip = self.skip_conv.forward(&gated, training);
let output = self.add_tensors(x, &residual);
(output, skip)
}
fn gated_activation(&self, x: &Tensor) -> Tensor {
let data = x.data_f32();
let half = data.len() / 2;
let tanh_part: Vec<f32> = data[..half].iter().map(|&v| v.tanh()).collect();
let sigmoid_part: Vec<f32> = data[half..].iter().map(|&v| 1.0 / (1.0 + (-v).exp())).collect();
let result: Vec<f32> = tanh_part.iter()
.zip(sigmoid_part.iter())
.map(|(&t, &s)| t * s)
.collect();
Tensor::from_slice(&result, &[x.dims()[0], x.dims()[1] / 2]).unwrap()
}
fn add_tensors(&self, a: &Tensor, b: &Tensor) -> Tensor {
let a_data = a.data_f32();
let b_data = b.data_f32();
let result: Vec<f32> = a_data.iter()
.zip(b_data.iter())
.map(|(&x, &y)| x + y)
.collect();
Tensor::from_slice(&result, a.dims()).unwrap()
}
}
impl WaveNet {
pub fn new(num_channels: usize, num_blocks: usize) -> Self {
let mut residual_blocks = Vec::new();
for i in 0..num_blocks {
let dilation = 2_usize.pow(i as u32 % 10);
residual_blocks.push(WaveNetBlock::new(num_channels, dilation));
}
WaveNet {
causal_conv: Conv2d::new(1, num_channels, (2, 1)),
residual_blocks,
output_conv: Conv2d::new(num_channels, 256, (1, 1)),
}
}
pub fn forward(&mut self, x: &Tensor, training: bool) -> Tensor {
let mut out = self.causal_conv.forward(x, training);
let mut skip_connections = Vec::new();
for block in &mut self.residual_blocks {
let (new_out, skip) = block.forward(&out, training);
out = new_out;
skip_connections.push(skip);
}
let skip_sum = self.sum_tensors(&skip_connections);
self.output_conv.forward(&skip_sum, training)
}
fn sum_tensors(&self, tensors: &[Tensor]) -> Tensor {
if tensors.is_empty() {
return Tensor::from_slice(&[0.0f32], &[1, 1]).unwrap();
}
let mut result = tensors[0].data_f32().to_vec();
for tensor in &tensors[1..] {
let data = tensor.data_f32();
for (i, &val) in data.iter().enumerate() {
result[i] += val;
}
}
Tensor::from_slice(&result, tensors[0].dims()).unwrap()
}
}
pub struct PointNet {
mlp1: Vec<Dense>,
mlp2: Vec<Dense>,
fc: Dense,
}
impl PointNet {
pub fn new(num_points: usize, num_classes: usize) -> Self {
PointNet {
mlp1: vec![
Dense::new(3, 64),
Dense::new(64, 64),
],
mlp2: vec![
Dense::new(64, 128),
Dense::new(128, 1024),
],
fc: Dense::new(1024, num_classes),
}
}
pub fn forward(&mut self, x: &Tensor, training: bool) -> Tensor {
let mut out = x.clone();
for layer in &mut self.mlp1 {
out = layer.forward(&out, training);
out = ReLU::new().forward(&out);
}
for layer in &mut self.mlp2 {
out = layer.forward(&out, training);
out = ReLU::new().forward(&out);
}
out = self.global_max_pool(&out);
self.fc.forward(&out, training)
}
fn global_max_pool(&self, x: &Tensor) -> Tensor {
let data = x.data_f32();
let batch_size = x.dims()[0];
let num_points = x.dims()[1];
let features = x.dims()[2];
let mut result = vec![f32::MIN; batch_size * features];
for b in 0..batch_size {
for f in 0..features {
for p in 0..num_points {
let idx = (b * num_points + p) * features + f;
let out_idx = b * features + f;
result[out_idx] = result[out_idx].max(data[idx]);
}
}
}
Tensor::from_slice(&result, &[batch_size, features]).unwrap()
}
}
pub struct PointNetPlusPlus {
sa_modules: Vec<SetAbstractionModule>,
fc_layers: Vec<Dense>,
}
struct SetAbstractionModule {
mlp: Vec<Dense>,
}
impl SetAbstractionModule {
fn new(in_features: usize, out_features: usize) -> Self {
SetAbstractionModule {
mlp: vec![
Dense::new(in_features, out_features / 2),
Dense::new(out_features / 2, out_features),
],
}
}
fn forward(&mut self, x: &Tensor, training: bool) -> Tensor {
let mut out = x.clone();
for layer in &mut self.mlp {
out = layer.forward(&out, training);
out = ReLU::new().forward(&out);
}
out
}
}
impl PointNetPlusPlus {
pub fn new(num_classes: usize) -> Self {
PointNetPlusPlus {
sa_modules: vec![
SetAbstractionModule::new(3, 128),
SetAbstractionModule::new(128, 256),
SetAbstractionModule::new(256, 512),
],
fc_layers: vec![
Dense::new(512, 256),
Dense::new(256, 128),
Dense::new(128, num_classes),
],
}
}
pub fn forward(&mut self, x: &Tensor, training: bool) -> Tensor {
let mut out = x.clone();
for module in &mut self.sa_modules {
out = module.forward(&out, training);
}
out = self.global_max_pool(&out);
for (i, layer) in self.fc_layers.iter_mut().enumerate() {
out = layer.forward(&out, training);
if i < self.fc_layers.len() - 1 {
out = ReLU::new().forward(&out);
}
}
out
}
fn global_max_pool(&self, x: &Tensor) -> Tensor {
let data = x.data_f32();
let batch_size = x.dims()[0];
let num_points = x.dims()[1];
let features = x.dims()[2];
let mut result = vec![f32::MIN; batch_size * features];
for b in 0..batch_size {
for f in 0..features {
for p in 0..num_points {
let idx = (b * num_points + p) * features + f;
let out_idx = b * features + f;
result[out_idx] = result[out_idx].max(data[idx]);
}
}
}
Tensor::from_slice(&result, &[batch_size, features]).unwrap()
}
}
pub struct Tacotron2 {
encoder: TacotronEncoder,
decoder: TacotronDecoder,
}
struct TacotronEncoder {
embedding: Dense,
convolutions: Vec<Conv2d>,
lstm: Dense, }
impl TacotronEncoder {
fn new(vocab_size: usize, embedding_dim: usize) -> Self {
TacotronEncoder {
embedding: Dense::new(vocab_size, embedding_dim),
convolutions: vec![
Conv2d::new(embedding_dim, 512, (5, 1)).padding((2, 0)),
Conv2d::new(512, 512, (5, 1)).padding((2, 0)),
Conv2d::new(512, 512, (5, 1)).padding((2, 0)),
],
lstm: Dense::new(512, 512),
}
}
fn forward(&mut self, x: &Tensor, training: bool) -> Tensor {
let mut out = self.embedding.forward(x, training);
for conv in &mut self.convolutions {
out = conv.forward(&out, training);
out = ReLU::new().forward(&out);
}
self.lstm.forward(&out, training)
}
}
struct TacotronDecoder {
prenet: Vec<Dense>,
attention_rnn: Dense,
decoder_rnn: Dense,
linear_projection: Dense,
}
impl TacotronDecoder {
fn new(mel_dim: usize) -> Self {
TacotronDecoder {
prenet: vec![
Dense::new(mel_dim, 256),
Dense::new(256, 256),
],
attention_rnn: Dense::new(256, 1024),
decoder_rnn: Dense::new(1024, 1024),
linear_projection: Dense::new(1024, mel_dim),
}
}
fn forward(&mut self, x: &Tensor, training: bool) -> Tensor {
let mut out = x.clone();
for layer in &mut self.prenet {
out = layer.forward(&out, training);
out = ReLU::new().forward(&out);
}
out = self.attention_rnn.forward(&out, training);
out = self.decoder_rnn.forward(&out, training);
self.linear_projection.forward(&out, training)
}
}
impl Tacotron2 {
pub fn new(vocab_size: usize, mel_dim: usize) -> Self {
Tacotron2 {
encoder: TacotronEncoder::new(vocab_size, 512),
decoder: TacotronDecoder::new(mel_dim),
}
}
pub fn forward(&mut self, text: &Tensor, mel: &Tensor, training: bool) -> Tensor {
let _encoder_out = self.encoder.forward(text, training);
self.decoder.forward(mel, training)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_c3d() {
let mut model = C3D::new(101);
let input = Tensor::from_slice(&vec![0.5f32; 1 * 3 * 16], &[1, 3, 16]).unwrap();
let output = model.forward(&input, false);
assert_eq!(output.dims()[1], 101);
}
#[test]
fn test_pointnet() {
let mut model = PointNet::new(1024, 40);
let input = Tensor::from_slice(&vec![0.5f32; 1 * 1024 * 3], &[1, 1024, 3]).unwrap();
let output = model.forward(&input, false);
assert_eq!(output.dims()[1], 40);
}
}