Skip to main content

Crate flodl

Crate flodl 

Source
Expand description

flodl — a deep learning framework built on libtorch, from Rust.

Stack: flodl-sys (C++ shim FFI) → tensorautogradnngraph.

use flodl::*;

// Build a model as a computation graph
let model = FlowBuilder::from(Linear::new(4, 8)?)
    .through(GELU)
    .through(Linear::new(8, 2)?)
    .build()?;

// Forward pass
let x = Variable::new(Tensor::randn(&[1, 4], Default::default())?, false);
let target = Variable::new(Tensor::randn(&[1, 2], Default::default())?, false);
let pred = model.forward(&x)?;

// Backward + optimize
let params = model.parameters();
let mut optimizer = Adam::new(&params, 1e-3);
let loss = mse_loss(&pred, &target)?;
optimizer.zero_grad();
loss.backward()?;
optimizer.step()?;

Re-exports§

pub use log::Verbosity;
pub use log::set_verbosity;
pub use log::verbosity;
pub use tensor::cuda_available;
pub use tensor::cuda_device_count;
pub use tensor::cuda_memory_info;
pub use tensor::cuda_memory_info_idx;
pub use tensor::cuda_allocated_bytes;
pub use tensor::cuda_allocated_bytes_idx;
pub use tensor::cuda_active_bytes;
pub use tensor::cuda_active_bytes_idx;
pub use tensor::cuda_peak_active_bytes;
pub use tensor::cuda_peak_active_bytes_idx;
pub use tensor::cuda_peak_reserved_bytes;
pub use tensor::cuda_peak_reserved_bytes_idx;
pub use tensor::cuda_reset_peak_stats;
pub use tensor::cuda_reset_peak_stats_idx;
pub use tensor::cuda_empty_cache;
pub use tensor::cuda_utilization;
pub use tensor::cuda_utilization_idx;
pub use tensor::cuda_device_name;
pub use tensor::cuda_device_name_idx;
pub use tensor::cuda_devices;
pub use tensor::cuda_compute_capability;
pub use tensor::probe_device;
pub use tensor::usable_cuda_devices;
pub use tensor::DeviceInfo;
pub use tensor::set_current_cuda_device;
pub use tensor::current_cuda_device;
pub use tensor::cuda_synchronize;
pub use tensor::hardware_summary;
pub use tensor::set_cudnn_benchmark;
pub use tensor::manual_seed;
pub use tensor::cuda_manual_seed_all;
pub use tensor::malloc_trim;
pub use tensor::live_tensor_count;
pub use tensor::rss_kb;
pub use tensor::Device;
pub use tensor::DType;
pub use tensor::Result;
pub use tensor::Tensor;
pub use tensor::TensorError;
pub use tensor::TensorOptions;
pub use rng::Rng;
pub use autograd::Variable;
pub use autograd::no_grad;
pub use autograd::is_grad_enabled;
pub use autograd::NoGradGuard;
pub use autograd::max_pool2d;
pub use autograd::adaptive_avg_pool2d;
pub use autograd::grid_sample;
pub use autograd::embedding_bag;
pub use nn::Module;
pub use nn::NamedInputModule;
pub use nn::Parameter;
pub use nn::Buffer;
pub use nn::Linear;
pub use nn::Optimizer;
pub use nn::Stateful;
pub use nn::SGD;
pub use nn::SGDBuilder;
pub use nn::Adam;
pub use nn::AdamBuilder;
pub use nn::AdamW;
pub use nn::AdamWBuilder;
pub use nn::RMSprop;
pub use nn::RMSpropBuilder;
pub use nn::Adagrad;
pub use nn::AdagradBuilder;
pub use nn::RAdam;
pub use nn::NAdam;
pub use nn::save_checkpoint;
pub use nn::load_checkpoint;
pub use nn::save_checkpoint_file;
pub use nn::load_checkpoint_file;
pub use nn::migrate_checkpoint;
pub use nn::migrate_checkpoint_file;
pub use nn::checkpoint_version;
pub use nn::LoadReport;
pub use nn::MigrateReport;
pub use nn::GradScaler;
pub use nn::cast_parameters;
pub use nn::AutocastGuard;
pub use nn::autocast;
pub use nn::is_autocast_enabled;
pub use nn::Identity;
pub use nn::ReLU;
pub use nn::Sigmoid;
pub use nn::Tanh;
pub use nn::GELU;
pub use nn::SiLU;
pub use nn::LeakyReLU;
pub use nn::ELU;
pub use nn::Softplus;
pub use nn::Mish;
pub use nn::SELU;
pub use nn::Hardswish;
pub use nn::Hardsigmoid;
pub use nn::PReLU;
pub use nn::Softmax;
pub use nn::LogSoftmax;
pub use nn::Flatten;
pub use nn::Dropout;
pub use nn::Dropout2d;
pub use nn::AlphaDropout;
pub use nn::ZeroPad2d;
pub use nn::ReflectionPad2d;
pub use nn::LayerNorm;
pub use nn::RMSNorm;
pub use nn::Embedding;
pub use nn::EmbeddingBag;
pub use nn::GRUCell;
pub use nn::GRU;
pub use nn::LSTMCell;
pub use nn::LSTM;
pub use nn::Conv1d;
pub use nn::Conv1dBuilder;
pub use nn::Conv2d;
pub use nn::Conv2dBuilder;
pub use nn::ConvTranspose1d;
pub use nn::ConvTranspose2d;
pub use nn::Conv3d;
pub use nn::Conv3dBuilder;
pub use nn::ConvTranspose3d;
pub use nn::GroupNorm;
pub use nn::BatchNorm;
pub use nn::BatchNorm2d;
pub use nn::InstanceNorm;
pub use nn::MaxPool2d;
pub use nn::AvgPool2d;
pub use nn::MaxPool1d;
pub use nn::AvgPool1d;
pub use nn::AdaptiveMaxPool2d;
pub use nn::AdaptiveAvgPool2d;
pub use nn::PixelShuffle;
pub use nn::PixelUnshuffle;
pub use nn::Upsample;
pub use nn::Unfold;
pub use nn::Fold;
pub use nn::Bilinear;
pub use nn::MultiheadAttention;
pub use nn::mse_loss;
pub use nn::cross_entropy_loss;
pub use nn::bce_loss;
pub use nn::bce_with_logits_loss;
pub use nn::l1_loss;
pub use nn::smooth_l1_loss;
pub use nn::kl_div_loss;
pub use nn::nll_loss;
pub use nn::ctc_loss;
pub use nn::focal_loss;
pub use nn::triplet_margin_loss;
pub use nn::cosine_embedding_loss;
pub use nn::hinge_embedding_loss;
pub use nn::margin_ranking_loss;
pub use nn::poisson_nll_loss;
pub use nn::clip_grad_norm;
pub use nn::clip_grad_value;
pub use nn::Scheduler;
pub use nn::StepDecay;
pub use nn::CosineScheduler;
pub use nn::WarmupScheduler;
pub use nn::PlateauScheduler;
pub use nn::ExponentialLR;
pub use nn::MultiStepLR;
pub use nn::OneCycleLR;
pub use nn::CyclicLR;
pub use nn::xavier_uniform;
pub use nn::xavier_normal;
pub use nn::kaiming_uniform;
pub use nn::kaiming_normal;
pub use nn::uniform_bias;
pub use nn::uniform;
pub use nn::normal;
pub use nn::orthogonal;
pub use nn::trunc_normal;
pub use nn::walk_modules;
pub use nn::walk_modules_visited;
pub use nn::CudaGraph;
pub use nn::MemPoolId;
pub use nn::CaptureMode;
pub use nn::cuda_graph_capture;
pub use nn::cuda_graph_pool_handle;
pub use nn::GaussianBlur;
pub use nn::gaussian_blur_2d;
pub use distributed::CudaEvent;
pub use distributed::CudaEventFlags;
pub use distributed::CudaStream;
pub use distributed::StreamGuard;
pub use distributed::NcclComms;
pub use distributed::NcclRankComm;
pub use distributed::NcclUniqueId;
pub use distributed::ReduceOp;
pub use distributed::Ddp;
pub use distributed::DdpConfig;
pub use distributed::ElChe;
pub use distributed::ApplyPolicy;
pub use distributed::DdpHandle;
pub use distributed::DdpBuilder;
pub use distributed::DdpRunConfig;
pub use distributed::AverageBackend;
pub use distributed::TrainedState;
pub use distributed::EpochMetrics;
pub use distributed::record_scalar;
pub use distributed::drain_scalars;
pub use distributed::GpuWorker;
pub use graph::FlowBuilder;
pub use graph::MergeOp;
pub use graph::Graph;
pub use graph::LossContext;
pub use graph::MapBuilder;
pub use graph::Trend;
pub use graph::TrendGroup;
pub use graph::Profile;
pub use graph::NodeTiming;
pub use graph::LevelTiming;
pub use graph::format_duration;
pub use graph::SoftmaxRouter;
pub use graph::SigmoidRouter;
pub use graph::FixedSelector;
pub use graph::ArgmaxSelector;
pub use graph::ThresholdHalt;
pub use graph::LearnedHalt;
pub use graph::Reshape;
pub use graph::StateAdd;
pub use graph::Reduce;
pub use graph::ModelSnapshot;
pub use graph::PathKind;
pub use graph::GraphEpochIterator;
pub use graph::ActiveGraphEpochIterator;
pub use worker::CpuWorker;
pub use data::DataSet;
pub use data::BatchDataSet;
pub use data::Sampler;
pub use data::RandomSampler;
pub use data::SequentialSampler;
pub use data::DataLoader;
pub use data::DataLoaderBuilder;
pub use data::EpochIterator;
pub use data::DistributedEpochIterator;
pub use data::Batch;

Modules§

autograd
Reverse-mode automatic differentiation backed by libtorch.
data
Async data loading pipeline.
distributed
Distributed Data Parallel (DDP) infrastructure for multi-GPU training.
graph
Computation graph: fluent builder, parallel execution, observation, profiling, visualization, and hierarchical composition.
log
Verbosity-gated output for flodl.
monitor
Training monitor with human-readable ETA, resource tracking, and live dashboard.
nn
Neural network modules, losses, optimizers, and training utilities.
rng
CPU-side random number generator for data loading, shuffling, and augmentation.
tensor
Tensor — immutable, chainable wrapper around a libtorch tensor.
worker
Background CPU work queue.

Macros§

debug
Prints to stderr at -vv (Debug) and above.
modules
Shorthand for building Vec<Box<dyn Module>> from a list of modules. Use with split, gate, and switch to avoid manual Box::new() wrapping.
msg
Print to stdout, gated by verbosity level.
trace
Prints to stderr at -vvv (Trace) and above.
verbose
Prints to stdout at -v (Verbose) and above.