#[macro_use]
extern crate timeit;
extern crate coaster as co;
extern crate env_logger;
extern crate juice;
use crate::co::prelude::*;
use std::env;
use std::rc::Rc;
fn main() {
env_logger::init();
let nets: Vec<String> = vec!["alexnet".to_string(), "overfeat".to_string(), "vgg".to_string()];
if let Some(net) = env::args().nth(1) {
if nets.contains(&net) {
println!("Executing Model: {:?}", net);
if net == "alexnet".to_string() {
bench_alexnet();
} else if net == "overfeat".to_string() {
bench_overfeat();
} else if net == "vgg".to_string() {
bench_vgg_a();
}
} else {
println!("Sorry, no model found with name '{:?}'. Valid options: {:?}", net, nets);
}
} else {
println!(
"No `net` argument specified. Default: `alexnet`. Valid options: {:?}",
nets
);
bench_alexnet();
}
}
#[allow(dead_code)]
fn native_backend() -> Rc<Backend<Native>> {
let framework = Native::new();
let hardwares = &framework.hardwares().to_vec();
let backend_config = BackendConfig::new(framework, hardwares);
Rc::new(Backend::new(backend_config).unwrap())
}
#[cfg(feature = "cuda")]
#[allow(dead_code)]
use crate::co::frameworks::cuda::get_cuda_backend as cuda_backend;
use juice::layer::{Layer, LayerConfig, LayerType};
use juice::layers::{ConvolutionConfig, LinearConfig, PoolingConfig, PoolingMode, SequentialConfig};
#[cfg(feature = "opencl")]
#[allow(dead_code)]
fn opencl_backend() -> Rc<Backend<OpenCL>> {
let framework = OpenCL::new();
let hardwares = &framework.hardwares()[1..2].to_vec();
let backend_config = BackendConfig::new(framework, hardwares);
Rc::new(Backend::new(backend_config).unwrap())
}
#[inline(never)]
#[allow(dead_code)]
fn bench_profile<F: FnMut() -> ()>(name: &str, mut bench_func: F, times: usize) {
println!("Running benchmark {}", name);
println!("----------");
for _ in 0..10 {
bench_func();
}
let average_time = timeit_loops!(times, {
bench_func();
});
println!("----------");
println!("Average time {}", autoscale_time(average_time));
println!("");
}
#[allow(dead_code)]
fn autoscale_time(sec: f64) -> String {
let (div, unit_str) = get_time_scale(sec);
format!("{:.5} {}", sec / div, unit_str)
}
#[allow(dead_code)]
fn scale_time(sec: f64, unit: &str) -> String {
let div = match unit {
"s" => 1.0,
"ms" => 0.001,
"µs" => 0.000_001,
"ns" => 0.000_000_001,
_ => panic!(),
};
format!("{:.5} {}", sec / div, unit)
}
fn get_time_scale<'a>(sec: f64) -> (f64, &'a str) {
if sec > 1.0 {
(1.0, "s")
} else if sec > 0.001 {
(0.001, "ms")
} else if sec > 0.000_001 {
(0.000_001, "µs")
} else {
(0.000_000_001, "ns")
}
}
#[cfg(feature = "native")]
fn bench_alexnet() {
println!("Examples run only with CUDA support at the moment, because of missing native convolution implementation for the Coaster NN Plugin.");
println!(
"Try running with `cargo run --release --no-default-features --features cuda --example benchmarks alexnet`."
);
}
#[cfg(all(feature = "cuda", not(feature = "native")))]
fn bench_alexnet() {
let mut cfg = SequentialConfig::default();
cfg.add_input("data", &[128, 3, 224, 224]);
let conv1_layer_cfg = ConvolutionConfig {
num_output: 64,
filter_shape: vec![11],
padding: vec![2],
stride: vec![4],
};
cfg.add_layer(LayerConfig::new("conv1", conv1_layer_cfg));
cfg.add_layer(LayerConfig::new("conv1/relu", LayerType::ReLU));
cfg.add_layer(LayerConfig::new(
"pool1",
PoolingConfig {
mode: PoolingMode::Max,
filter_shape: vec![3],
stride: vec![2],
padding: vec![0],
},
));
cfg.add_layer(LayerConfig::new(
"conv2",
ConvolutionConfig {
num_output: 192,
filter_shape: vec![5],
padding: vec![2],
stride: vec![1],
},
));
cfg.add_layer(LayerConfig::new("conv2/relu", LayerType::ReLU));
cfg.add_layer(LayerConfig::new(
"pool2",
PoolingConfig {
mode: PoolingMode::Max,
filter_shape: vec![3],
stride: vec![2],
padding: vec![0],
},
));
cfg.add_layer(LayerConfig::new(
"conv3",
ConvolutionConfig {
num_output: 384,
filter_shape: vec![3],
padding: vec![1],
stride: vec![1],
},
));
cfg.add_layer(LayerConfig::new("conv3/relu", LayerType::ReLU));
cfg.add_layer(LayerConfig::new(
"conv4",
ConvolutionConfig {
num_output: 256,
filter_shape: vec![3],
padding: vec![1],
stride: vec![1],
},
));
cfg.add_layer(LayerConfig::new("conv4/relu", LayerType::ReLU));
cfg.add_layer(LayerConfig::new(
"conv5",
ConvolutionConfig {
num_output: 256,
filter_shape: vec![3],
padding: vec![1],
stride: vec![1],
},
));
cfg.add_layer(LayerConfig::new("conv5/relu", LayerType::ReLU));
cfg.add_layer(LayerConfig::new(
"pool3",
PoolingConfig {
mode: PoolingMode::Max,
filter_shape: vec![3],
stride: vec![2],
padding: vec![0],
},
));
cfg.add_layer(LayerConfig::new("fc1", LinearConfig { output_size: 4096 }));
cfg.add_layer(LayerConfig::new("fc2", LinearConfig { output_size: 4096 }));
cfg.add_layer(LayerConfig::new("fc3", LinearConfig { output_size: 1000 }));
let backend = Rc::new(cuda_backend());
let mut network = Layer::from_config(
backend.clone(),
&LayerConfig::new("alexnet", LayerType::Sequential(cfg)),
);
{
let func = || {
let forward_time = timeit_loops!(1, {
{
let inp = SharedTensor::<f32>::new(&[128, 3, 224, 224]);
let inp_lock = Arc::new(RwLock::new(inp));
network.forward(&[inp_lock.clone()]);
}
});
println!("Forward step: {}", scale_time(forward_time, "ms"));
};
{
bench_profile("alexnet_forward", func, 10);
}
}
{
let func = || {
let backward_time = timeit_loops!(1, {
{
network.backward_input(&[]);
}
});
println!("backward input step: {}", scale_time(backward_time, "ms"));
};
{
bench_profile("alexnet_backward_input", func, 10);
}
}
{
let func = || {
let backward_time = timeit_loops!(1, {
{
network.backward_parameters();
}
});
println!("backward parameters step: {}", scale_time(backward_time, "ms"));
};
{
bench_profile("alexnet_backward_parameters", func, 10);
}
}
}
#[cfg(feature = "native")]
fn bench_overfeat() {
println!("Examples run only with CUDA support at the moment, because of missing native convolution implementation for the Coaster NN Plugin.");
println!(
"Try running with `cargo run --release --no-default-features --features cuda --example benchmarks overfeat`."
);
}
#[cfg(all(feature = "cuda", not(feature = "native")))]
fn bench_overfeat() {
let mut cfg = SequentialConfig::default();
cfg.add_input("data", &[128, 3, 231, 231]);
let conv1_layer_cfg = ConvolutionConfig {
num_output: 96,
filter_shape: vec![11],
padding: vec![0],
stride: vec![4],
};
cfg.add_layer(LayerConfig::new("conv1", conv1_layer_cfg));
cfg.add_layer(LayerConfig::new("conv1/relu", LayerType::ReLU));
let pool1_layer_cfg = PoolingConfig {
mode: PoolingMode::Max,
filter_shape: vec![2],
stride: vec![2],
padding: vec![0],
};
cfg.add_layer(LayerConfig::new("pool1", pool1_layer_cfg));
let conv2_layer_cfg = ConvolutionConfig {
num_output: 256,
filter_shape: vec![5],
padding: vec![0],
stride: vec![1],
};
cfg.add_layer(LayerConfig::new("conv2", conv2_layer_cfg));
cfg.add_layer(LayerConfig::new("conv2/relu", LayerType::ReLU));
let pool2_layer_cfg = PoolingConfig {
mode: PoolingMode::Max,
filter_shape: vec![2],
stride: vec![2],
padding: vec![0],
};
cfg.add_layer(LayerConfig::new("pool2", pool2_layer_cfg));
let conv3_layer_cfg = ConvolutionConfig {
num_output: 512,
filter_shape: vec![3],
padding: vec![1],
stride: vec![1],
};
cfg.add_layer(LayerConfig::new("conv3", conv3_layer_cfg));
cfg.add_layer(LayerConfig::new("conv3/relu", LayerType::ReLU));
let conv4_layer_cfg = ConvolutionConfig {
num_output: 1024,
filter_shape: vec![3],
padding: vec![1],
stride: vec![1],
};
cfg.add_layer(LayerConfig::new("conv4", conv4_layer_cfg));
cfg.add_layer(LayerConfig::new("conv4/relu", LayerType::ReLU));
let conv5_layer_cfg = ConvolutionConfig {
num_output: 1024,
filter_shape: vec![3],
padding: vec![1],
stride: vec![1],
};
cfg.add_layer(LayerConfig::new("conv5", conv5_layer_cfg));
cfg.add_layer(LayerConfig::new("conv5/relu", LayerType::ReLU));
let pool5_layer_cfg = PoolingConfig {
mode: PoolingMode::Max,
filter_shape: vec![2],
stride: vec![2],
padding: vec![0],
};
cfg.add_layer(LayerConfig::new("pool5", pool5_layer_cfg));
cfg.add_layer(LayerConfig::new("fc1", LinearConfig { output_size: 3072 }));
cfg.add_layer(LayerConfig::new("fc2", LinearConfig { output_size: 4096 }));
cfg.add_layer(LayerConfig::new("fc3", LinearConfig { output_size: 1000 }));
let backend = Rc::new(cuda_backend());
let mut network = Layer::from_config(
backend.clone(),
&LayerConfig::new("overfeat", LayerType::Sequential(cfg)),
);
{
let func = || {
let forward_time = timeit_loops!(1, {
{
let inp = SharedTensor::new(&[128, 3, 231, 231]);
let inp_lock = Arc::new(RwLock::new(inp));
network.forward(&[inp_lock.clone()]);
}
});
println!("Forward step: {}", scale_time(forward_time, "ms"));
};
{
bench_profile("overfeat_forward", func, 10);
}
}
{
let func = || {
let backward_time = timeit_loops!(1, {
{
network.backward_input(&[]);
}
});
println!("backward input step: {}", scale_time(backward_time, "ms"));
};
{
bench_profile("overfeat_backward_input", func, 10);
}
}
{
let func = || {
let backward_time = timeit_loops!(1, {
{
network.backward_parameters();
}
});
println!("backward parameters step: {}", scale_time(backward_time, "ms"));
};
{
bench_profile("overfeat_backward_parameters", func, 10);
}
}
}
#[cfg(feature = "native")]
fn bench_vgg_a() {
println!("Examples run only with CUDA support at the moment, because of missing native convolution implementation for the Coaster NN Plugin.");
println!("Try running with `cargo run --release --no-default-features --features cuda --example benchmarks vgg`.");
}
#[cfg(all(feature = "cuda", not(feature = "native")))]
fn bench_vgg_a() {
let mut cfg = SequentialConfig::default();
cfg.add_input("data", &[64, 3, 224, 224]);
let conv1_layer_cfg = ConvolutionConfig {
num_output: 64,
filter_shape: vec![3],
padding: vec![1],
stride: vec![1],
};
cfg.add_layer(LayerConfig::new("conv1", conv1_layer_cfg));
cfg.add_layer(LayerConfig::new("conv1/relu", LayerType::ReLU));
cfg.add_layer(LayerConfig::new(
"pool1",
PoolingConfig {
mode: PoolingMode::Max,
filter_shape: vec![2],
stride: vec![2],
padding: vec![0],
},
));
cfg.add_layer(LayerConfig::new(
"conv2",
ConvolutionConfig {
num_output: 128,
filter_shape: vec![3],
padding: vec![1],
stride: vec![1],
},
));
cfg.add_layer(LayerConfig::new("conv2/relu", LayerType::ReLU));
let pool2_layer_cfg = PoolingConfig {
mode: PoolingMode::Max,
filter_shape: vec![2],
stride: vec![2],
padding: vec![0],
};
cfg.add_layer(LayerConfig::new(
"pool2",
PoolingConfig {
mode: PoolingMode::Max,
filter_shape: vec![2],
stride: vec![2],
padding: vec![0],
},
));
cfg.add_layer(LayerConfig::new(
"conv3",
ConvolutionConfig {
num_output: 256,
filter_shape: vec![3],
padding: vec![1],
stride: vec![1],
},
));
cfg.add_layer(LayerConfig::new("conv3/relu", LayerType::ReLU));
cfg.add_layer(LayerConfig::new(
"conv4",
ConvolutionConfig {
num_output: 256,
filter_shape: vec![3],
padding: vec![1],
stride: vec![1],
},
));
cfg.add_layer(LayerConfig::new("conv4/relu", LayerType::ReLU));
cfg.add_layer(LayerConfig::new(
"pool3",
PoolingConfig {
mode: PoolingMode::Max,
filter_shape: vec![2],
stride: vec![2],
padding: vec![0],
},
));
cfg.add_layer(LayerConfig::new(
"conv5",
ConvolutionConfig {
num_output: 512,
filter_shape: vec![3],
padding: vec![1],
stride: vec![1],
},
));
cfg.add_layer(LayerConfig::new("conv5/relu", LayerType::ReLU));
cfg.add_layer(LayerConfig::new(
"conv6",
ConvolutionConfig {
num_output: 512,
filter_shape: vec![3],
padding: vec![1],
stride: vec![1],
},
));
cfg.add_layer(LayerConfig::new("conv6/relu", LayerType::ReLU));
cfg.add_layer(LayerConfig::new(
"pool4",
PoolingConfig {
mode: PoolingMode::Max,
filter_shape: vec![2],
stride: vec![2],
padding: vec![0],
},
));
cfg.add_layer(LayerConfig::new(
"conv7",
ConvolutionConfig {
num_output: 512,
filter_shape: vec![3],
padding: vec![1],
stride: vec![1],
},
));
cfg.add_layer(LayerConfig::new("conv7/relu", LayerType::ReLU));
cfg.add_layer(LayerConfig::new(
"conv8",
ConvolutionConfig {
num_output: 512,
filter_shape: vec![3],
padding: vec![1],
stride: vec![1],
},
));
cfg.add_layer(LayerConfig::new("conv8/relu", LayerType::ReLU));
cfg.add_layer(LayerConfig::new(
"pool5",
PoolingConfig {
mode: PoolingMode::Max,
filter_shape: vec![2],
stride: vec![2],
padding: vec![0],
},
));
cfg.add_layer(LayerConfig::new("fc1", LinearConfig { output_size: 4096 }));
cfg.add_layer(LayerConfig::new("fc2", LinearConfig { output_size: 4096 }));
cfg.add_layer(LayerConfig::new("fc3", LinearConfig { output_size: 1000 }));
let backend = Rc::new(cuda_backend());
let mut network = Layer::from_config(backend.clone(), &LayerConfig::new("vgg_a", LayerType::Sequential(cfg)));
{
let func = || {
let forward_time = timeit_loops!(1, {
{
let inp = SharedTensor::new(&[64, 3, 224, 224]);
let inp_lock = Arc::new(RwLock::new(inp));
network.forward(&[inp_lock.clone()]);
}
});
println!("Forward step: {}", scale_time(forward_time, "ms"));
};
{
bench_profile("overfeat_forward", func, 10);
}
}
{
let func = || {
let backward_time = timeit_loops!(1, {
{
network.backward_input(&[]);
}
});
println!("backward input step: {}", scale_time(backward_time, "ms"));
};
{
bench_profile("overfeat_backward_input", func, 10);
}
}
{
let func = || {
let backward_time = timeit_loops!(1, {
{
network.backward_parameters();
}
});
println!("backward parameters step: {}", scale_time(backward_time, "ms"));
};
{
bench_profile("overfeat_backward_parameters", func, 10);
}
}
}