Expand description
flodl — a deep learning framework built on libtorch, from Rust.
Stack: flodl-sys (C++ shim FFI) → tensor → autograd → nn → graph.
ⓘ
use flodl::*;
// Build a model as a computation graph
let model = FlowBuilder::from(Linear::new(4, 8)?)
.through(GELU)
.through(Linear::new(8, 2)?)
.build()?;
// Forward pass
let x = Variable::new(Tensor::randn(&[1, 4], Default::default())?, false);
let target = Variable::new(Tensor::randn(&[1, 2], Default::default())?, false);
let pred = model.forward(&x)?;
// Backward + optimize
let params = model.parameters();
let mut optimizer = Adam::new(¶ms, 1e-3);
let loss = mse_loss(&pred, &target)?;
optimizer.zero_grad();
loss.backward()?;
optimizer.step()?;Re-exports§
pub use log::Verbosity;pub use log::set_verbosity;pub use log::verbosity;pub use tensor::cuda_available;pub use tensor::cuda_device_count;pub use tensor::cuda_memory_info;pub use tensor::cuda_memory_info_idx;pub use tensor::cuda_allocated_bytes;pub use tensor::cuda_allocated_bytes_idx;pub use tensor::cuda_active_bytes;pub use tensor::cuda_active_bytes_idx;pub use tensor::cuda_peak_active_bytes;pub use tensor::cuda_peak_active_bytes_idx;pub use tensor::cuda_peak_reserved_bytes;pub use tensor::cuda_peak_reserved_bytes_idx;pub use tensor::cuda_reset_peak_stats;pub use tensor::cuda_reset_peak_stats_idx;pub use tensor::cuda_empty_cache;pub use tensor::cuda_utilization;pub use tensor::cuda_utilization_idx;pub use tensor::cuda_device_name;pub use tensor::cuda_device_name_idx;pub use tensor::cuda_devices;pub use tensor::cuda_compute_capability;pub use tensor::probe_device;pub use tensor::usable_cuda_devices;pub use tensor::DeviceInfo;pub use tensor::set_current_cuda_device;pub use tensor::current_cuda_device;pub use tensor::cuda_synchronize;pub use tensor::hardware_summary;pub use tensor::set_cudnn_benchmark;pub use tensor::manual_seed;pub use tensor::cuda_manual_seed_all;pub use tensor::malloc_trim;pub use tensor::live_tensor_count;pub use tensor::rss_kb;pub use tensor::Device;pub use tensor::DType;pub use tensor::Result;pub use tensor::Tensor;pub use tensor::TensorError;pub use tensor::TensorOptions;pub use rng::Rng;pub use autograd::Variable;pub use autograd::no_grad;pub use autograd::is_grad_enabled;pub use autograd::NoGradGuard;pub use autograd::max_pool2d;pub use autograd::adaptive_avg_pool2d;pub use autograd::grid_sample;pub use autograd::embedding_bag;pub use nn::Module;pub use nn::NamedInputModule;pub use nn::Parameter;pub use nn::Buffer;pub use nn::Linear;pub use nn::Optimizer;pub use nn::Stateful;pub use nn::SGD;pub use nn::SGDBuilder;pub use nn::Adam;pub use nn::AdamBuilder;pub use nn::AdamW;pub use nn::AdamWBuilder;pub use nn::RMSprop;pub use nn::RMSpropBuilder;pub use nn::Adagrad;pub use nn::AdagradBuilder;pub use nn::RAdam;pub use nn::NAdam;pub use nn::save_checkpoint;pub use nn::load_checkpoint;pub use nn::save_checkpoint_file;pub use nn::load_checkpoint_file;pub use nn::migrate_checkpoint;pub use nn::migrate_checkpoint_file;pub use nn::checkpoint_version;pub use nn::LoadReport;pub use nn::MigrateReport;pub use nn::GradScaler;pub use nn::cast_parameters;pub use nn::AutocastGuard;pub use nn::autocast;pub use nn::is_autocast_enabled;pub use nn::Identity;pub use nn::ReLU;pub use nn::Sigmoid;pub use nn::Tanh;pub use nn::GELU;pub use nn::SiLU;pub use nn::LeakyReLU;pub use nn::ELU;pub use nn::Softplus;pub use nn::Mish;pub use nn::SELU;pub use nn::Hardswish;pub use nn::Hardsigmoid;pub use nn::PReLU;pub use nn::Softmax;pub use nn::LogSoftmax;pub use nn::Flatten;pub use nn::Dropout;pub use nn::Dropout2d;pub use nn::AlphaDropout;pub use nn::ZeroPad2d;pub use nn::ReflectionPad2d;pub use nn::LayerNorm;pub use nn::RMSNorm;pub use nn::Embedding;pub use nn::EmbeddingBag;pub use nn::GRUCell;pub use nn::GRU;pub use nn::LSTMCell;pub use nn::LSTM;pub use nn::Conv1d;pub use nn::Conv1dBuilder;pub use nn::Conv2d;pub use nn::Conv2dBuilder;pub use nn::ConvTranspose1d;pub use nn::ConvTranspose2d;pub use nn::Conv3d;pub use nn::Conv3dBuilder;pub use nn::ConvTranspose3d;pub use nn::GroupNorm;pub use nn::BatchNorm;pub use nn::BatchNorm2d;pub use nn::InstanceNorm;pub use nn::MaxPool2d;pub use nn::AvgPool2d;pub use nn::MaxPool1d;pub use nn::AvgPool1d;pub use nn::AdaptiveMaxPool2d;pub use nn::AdaptiveAvgPool2d;pub use nn::PixelShuffle;pub use nn::PixelUnshuffle;pub use nn::Upsample;pub use nn::Unfold;pub use nn::Fold;pub use nn::Bilinear;pub use nn::MultiheadAttention;pub use nn::mse_loss;pub use nn::cross_entropy_loss;pub use nn::bce_loss;pub use nn::bce_with_logits_loss;pub use nn::l1_loss;pub use nn::smooth_l1_loss;pub use nn::kl_div_loss;pub use nn::nll_loss;pub use nn::ctc_loss;pub use nn::focal_loss;pub use nn::triplet_margin_loss;pub use nn::cosine_embedding_loss;pub use nn::hinge_embedding_loss;pub use nn::margin_ranking_loss;pub use nn::poisson_nll_loss;pub use nn::clip_grad_norm;pub use nn::clip_grad_value;pub use nn::Scheduler;pub use nn::StepDecay;pub use nn::CosineScheduler;pub use nn::WarmupScheduler;pub use nn::PlateauScheduler;pub use nn::ExponentialLR;pub use nn::MultiStepLR;pub use nn::OneCycleLR;pub use nn::CyclicLR;pub use nn::xavier_uniform;pub use nn::xavier_normal;pub use nn::kaiming_uniform;pub use nn::kaiming_normal;pub use nn::uniform_bias;pub use nn::uniform;pub use nn::normal;pub use nn::orthogonal;pub use nn::trunc_normal;pub use nn::walk_modules;pub use nn::walk_modules_visited;pub use nn::CudaGraph;pub use nn::MemPoolId;pub use nn::CaptureMode;pub use nn::cuda_graph_capture;pub use nn::cuda_graph_pool_handle;pub use nn::GaussianBlur;pub use nn::gaussian_blur_2d;pub use distributed::CudaEvent;pub use distributed::CudaEventFlags;pub use distributed::CudaStream;pub use distributed::StreamGuard;pub use distributed::NcclComms;pub use distributed::NcclRankComm;pub use distributed::NcclUniqueId;pub use distributed::ReduceOp;pub use distributed::Ddp;pub use distributed::DdpConfig;pub use distributed::ElChe;pub use distributed::ApplyPolicy;pub use distributed::DdpHandle;pub use distributed::DdpBuilder;pub use distributed::DdpRunConfig;pub use distributed::AverageBackend;pub use distributed::TrainedState;pub use distributed::EpochMetrics;pub use distributed::record_scalar;pub use distributed::drain_scalars;pub use distributed::GpuWorker;pub use graph::FlowBuilder;pub use graph::MergeOp;pub use graph::Graph;pub use graph::LossContext;pub use graph::MapBuilder;pub use graph::Trend;pub use graph::TrendGroup;pub use graph::Profile;pub use graph::NodeTiming;pub use graph::LevelTiming;pub use graph::format_duration;pub use graph::SoftmaxRouter;pub use graph::SigmoidRouter;pub use graph::FixedSelector;pub use graph::ArgmaxSelector;pub use graph::ThresholdHalt;pub use graph::LearnedHalt;pub use graph::Reshape;pub use graph::StateAdd;pub use graph::Reduce;pub use graph::ModelSnapshot;pub use graph::PathKind;pub use graph::GraphEpochIterator;pub use graph::ActiveGraphEpochIterator;pub use worker::CpuWorker;pub use data::DataSet;pub use data::BatchDataSet;pub use data::Sampler;pub use data::RandomSampler;pub use data::SequentialSampler;pub use data::DataLoader;pub use data::DataLoaderBuilder;pub use data::EpochIterator;pub use data::DistributedEpochIterator;pub use data::Batch;
Modules§
- autograd
- Reverse-mode automatic differentiation backed by libtorch.
- data
- Async data loading pipeline.
- distributed
- Distributed Data Parallel (DDP) infrastructure for multi-GPU training.
- graph
- Computation graph: fluent builder, parallel execution, observation, profiling, visualization, and hierarchical composition.
- log
- Verbosity-gated output for flodl.
- monitor
- Training monitor with human-readable ETA, resource tracking, and live dashboard.
- nn
- Neural network modules, losses, optimizers, and training utilities.
- rng
- CPU-side random number generator for data loading, shuffling, and augmentation.
- tensor
- Tensor — immutable, chainable wrapper around a libtorch tensor.
- worker
- Background CPU work queue.
Macros§
- debug
- Prints to stderr at
-vv(Debug) and above. - modules
- Shorthand for building
Vec<Box<dyn Module>>from a list of modules. Use withsplit,gate, andswitchto avoid manualBox::new()wrapping. - msg
- Print to stdout, gated by verbosity level.
- trace
- Prints to stderr at
-vvv(Trace) and above. - verbose
- Prints to stdout at
-v(Verbose) and above.