List of all items
Structs
- generation::LogitsProcessor
- models::based::Config
- models::based::LinearAttentionConfig
- models::based::LinearAttentionFeatureMapConfig
- models::based::Model
- models::based::SlidingWindowAttentionConfig
- models::beit::BeitVisionTransformer
- models::bert::BertEncoder
- models::bert::BertForMaskedLM
- models::bert::BertLMPredictionHead
- models::bert::BertLayer
- models::bert::BertModel
- models::bert::BertOnlyMLMHead
- models::bert::Config
- models::bigcode::Config
- models::bigcode::GPTBigCode
- models::blip::BlipForConditionalGeneration
- models::blip::Config
- models::blip::VisionConfig
- models::blip::VisionModel
- models::blip_text::Config
- models::blip_text::TextLMHeadModel
- models::blip_text::TextPooler
- models::chatglm::Config
- models::chatglm::Model
- models::chinese_clip::ChineseClipConfig
- models::chinese_clip::ChineseClipModel
- models::chinese_clip::text_model::ChineseClipTextConfig
- models::chinese_clip::text_model::ChineseClipTextEmbeddings
- models::chinese_clip::text_model::ChineseClipTextTransformer
- models::chinese_clip::vision_model::ChineseClipVisionConfig
- models::chinese_clip::vision_model::ChineseClipVisionEmbeddings
- models::chinese_clip::vision_model::ChineseClipVisionEncoder
- models::chinese_clip::vision_model::ChineseClipVisionTransformer
- models::clip::ClipConfig
- models::clip::ClipModel
- models::clip::text_model::ClipEncoder
- models::clip::text_model::ClipTextConfig
- models::clip::text_model::ClipTextTransformer
- models::clip::vision_model::ClipVisionConfig
- models::clip::vision_model::ClipVisionTransformer
- models::codegeex4_9b::Config
- models::codegeex4_9b::Model
- models::colpali::Model
- models::convnext::Config
- models::dac::Config
- models::dac::Decoder
- models::dac::DecoderBlock
- models::dac::Encoder
- models::dac::EncoderBlock
- models::dac::Model
- models::dac::ResidualUnit
- models::dac::ResidualVectorQuantizer
- models::dac::Snake1d
- models::dac::VectorQuantizer
- models::depth_anything_v2::DPTHead
- models::depth_anything_v2::DepthAnythingV2
- models::depth_anything_v2::DepthAnythingV2Config
- models::depth_anything_v2::FeatureFusionBlock
- models::depth_anything_v2::ResidualConvUnit
- models::depth_anything_v2::Scratch
- models::dinov2::DinoVisionTransformer
- models::dinov2reg4::DinoVisionTransformer
- models::distilbert::Config
- models::distilbert::DistilBertModel
- models::efficientnet::EfficientNet
- models::efficientnet::MBConvConfig
- models::efficientvit::Config
- models::encodec::Config
- models::encodec::Decoder
- models::encodec::EncodecConv1d
- models::encodec::EncodecConvTranspose1d
- models::encodec::EncodecLSTM
- models::encodec::EncodecResnetBlock
- models::encodec::Encoder
- models::encodec::EuclideanCodebook
- models::encodec::Model
- models::encodec::ResidualVectorQuantizer
- models::encodec::VectorQuantization
- models::eva2::EVA2VisionTransformer
- models::falcon::Config
- models::falcon::Falcon
- models::fastvit::Config
- models::flux::autoencoder::AutoEncoder
- models::flux::autoencoder::Config
- models::flux::autoencoder::Decoder
- models::flux::autoencoder::DiagonalGaussian
- models::flux::autoencoder::Encoder
- models::flux::model::Config
- models::flux::model::DoubleStreamBlock
- models::flux::model::EmbedNd
- models::flux::model::Flux
- models::flux::model::LastLayer
- models::flux::model::MlpEmbedder
- models::flux::model::QkNorm
- models::flux::model::SelfAttention
- models::flux::model::SingleStreamBlock
- models::flux::quantized_model::DoubleStreamBlock
- models::flux::quantized_model::Flux
- models::flux::quantized_model::LastLayer
- models::flux::quantized_model::MlpEmbedder
- models::flux::quantized_model::QkNorm
- models::flux::quantized_model::SelfAttention
- models::flux::quantized_model::SingleStreamBlock
- models::flux::sampling::State
- models::gemma2::Config
- models::gemma2::Model
- models::gemma::Config
- models::gemma::Model
- models::glm4::Config
- models::glm4::Model
- models::granite::Cache
- models::granite::Config
- models::granite::Granite
- models::granite::GraniteConfig
- models::granite::GraniteRopeConfig
- models::hiera::Config
- models::jina_bert::BertModel
- models::jina_bert::Config
- models::llama2_c::Cache
- models::llama2_c::Config
- models::llama2_c::Llama
- models::llama2_c_weights::TransformerWeights
- models::llama::Cache
- models::llama::Config
- models::llama::Llama
- models::llama::Llama3RopeConfig
- models::llama::LlamaConfig
- models::llava::ClipVisionTower
- models::llava::IdentityMap
- models::llava::LLaVA
- models::llava::MMProjector
- models::llava::config::HFGenerationConfig
- models::llava::config::HFLLaVAConfig
- models::llava::config::HFLLaVATextConfig
- models::llava::config::HFLLaVAVisionConfig
- models::llava::config::HFPreProcessorConfig
- models::llava::config::LLaVAConfig
- models::mamba::Config
- models::mamba::MambaBlock
- models::mamba::Model
- models::mamba::ResidualBlock
- models::mamba::State
- models::marian::Config
- models::marian::Decoder
- models::marian::Encoder
- models::marian::MTModel
- models::metavoice::adapters::FlattenedInterleavedEncodec2Codebook
- models::metavoice::adapters::TiltedEncodec
- models::metavoice::gpt::Config
- models::metavoice::gpt::Model
- models::metavoice::speaker_encoder::Config
- models::metavoice::speaker_encoder::Model
- models::metavoice::tokenizers::BPE
- models::metavoice::transformer::Config
- models::metavoice::transformer::Model
- models::mimi::conv::ConvDownsample1d
- models::mimi::conv::ConvTrUpsample1d
- models::mimi::conv::NormConv1d
- models::mimi::conv::NormConvTranspose1d
- models::mimi::conv::StreamableConv1d
- models::mimi::conv::StreamableConvTranspose1d
- models::mimi::encodec::Config
- models::mimi::encodec::Encodec
- models::mimi::quantization::EuclideanCodebook
- models::mimi::quantization::ResidualVectorQuantization
- models::mimi::quantization::ResidualVectorQuantizer
- models::mimi::quantization::SplitResidualVectorQuantizer
- models::mimi::quantization::VectorQuantization
- models::mimi::seanet::Config
- models::mimi::seanet::SeaNetDecoder
- models::mimi::seanet::SeaNetEncoder
- models::mimi::seanet::SeaNetResnetBlock
- models::mimi::transformer::Config
- models::mimi::transformer::LayerScale
- models::mimi::transformer::ProjectedTransformer
- models::mimi::transformer::RmsNorm
- models::mimi::transformer::RotaryEmbedding
- models::mimi::transformer::StreamingMultiheadAttention
- models::mimi::transformer::StreamingMultiheadCrossAttention
- models::mimi::transformer::StreamingTransformer
- models::mimi::transformer::StreamingTransformerLayer
- models::mistral::Config
- models::mistral::Model
- models::mixformer::Config
- models::mixformer::MixFormerSequentialForCausalLM
- models::mixtral::Config
- models::mixtral::Model
- models::mmdit::blocks::ContextQkvOnlyJointBlock
- models::mmdit::blocks::DiTBlock
- models::mmdit::blocks::FinalLayer
- models::mmdit::blocks::LayerNormNoAffine
- models::mmdit::blocks::MMDiTJointBlock
- models::mmdit::blocks::MMDiTXJointBlock
- models::mmdit::blocks::ModulateIntermediates
- models::mmdit::blocks::QkvOnlyDiTBlock
- models::mmdit::blocks::SelfAttnDiTBlock
- models::mmdit::blocks::SelfAttnModulateIntermediates
- models::mmdit::embedding::PatchEmbedder
- models::mmdit::embedding::PositionEmbedder
- models::mmdit::embedding::TimestepEmbedder
- models::mmdit::embedding::Unpatchifier
- models::mmdit::embedding::VectorEmbedder
- models::mmdit::model::Config
- models::mmdit::model::MMDiT
- models::mmdit::model::MMDiTCore
- models::mmdit::projections::AttnProjections
- models::mmdit::projections::Mlp
- models::mmdit::projections::Qkv
- models::mmdit::projections::QkvOnlyAttnProjections
- models::mobileclip::MobileClipConfig
- models::mobileclip::MobileClipModel
- models::mobilenetv4::Config
- models::mobileone::Config
- models::moondream::Config
- models::moondream::Encoder
- models::moondream::Model
- models::moondream::VisionConfig
- models::moondream::VisionEncoder
- models::mpt::Config
- models::mpt::Model
- models::nvembed_v2::embedding::Model
- models::nvembed_v2::model::Model
- models::olmo::Config
- models::olmo::Model
- models::openclip::text_model::Config
- models::openclip::text_model::Encoder
- models::openclip::text_model::OpenClipTextTransformer
- models::paligemma::Config
- models::paligemma::Model
- models::paligemma::MultiModalProjector
- models::parler_tts::Attention
- models::parler_tts::Config
- models::parler_tts::Decoder
- models::parler_tts::DecoderConfig
- models::parler_tts::DecoderLayer
- models::parler_tts::Model
- models::persimmon::Config
- models::phi3::Config
- models::phi3::Model
- models::phi3::RotaryEmbedding
- models::phi::Config
- models::phi::Model
- models::pixtral::llava::Config
- models::pixtral::llava::Model
- models::pixtral::llava::MultiModalProjector
- models::pixtral::vision_model::Config
- models::pixtral::vision_model::Model
- models::quantized_blip::BlipForConditionalGeneration
- models::quantized_blip::VisionModel
- models::quantized_blip_text::TextLMHeadModel
- models::quantized_blip_text::TextPooler
- models::quantized_llama2_c::QLlama
- models::quantized_llama::ModelWeights
- models::quantized_metavoice::transformer::Model
- models::quantized_mistral::Model
- models::quantized_mixformer::MixFormerSequentialForCausalLM
- models::quantized_moondream::Encoder
- models::quantized_moondream::Model
- models::quantized_moondream::VisionEncoder
- models::quantized_mpt::Model
- models::quantized_phi3::ModelWeights
- models::quantized_phi::ModelWeights
- models::quantized_qwen2::ModelWeights
- models::quantized_recurrent_gemma::Model
- models::quantized_rwkv_v5::Model
- models::quantized_rwkv_v6::Model
- models::quantized_stable_lm::Model
- models::quantized_t5::Config
- models::quantized_t5::T5EncoderModel
- models::quantized_t5::T5ForConditionalGeneration
- models::qwen2::Config
- models::qwen2::Model
- models::qwen2::ModelForCausalLM
- models::qwen2_moe::Config
- models::qwen2_moe::Model
- models::recurrent_gemma::Config
- models::recurrent_gemma::Model
- models::repvgg::Config
- models::rwkv_v5::Config
- models::rwkv_v5::Model
- models::rwkv_v5::State
- models::rwkv_v5::StatePerLayer
- models::rwkv_v5::Tokenizer
- models::rwkv_v6::Model
- models::segformer::Config
- models::segformer::ImageClassificationModel
- models::segformer::SemanticSegmentationModel
- models::segment_anything::LayerNorm2d
- models::segment_anything::MlpBlock
- models::segment_anything::image_encoder::ImageEncoderViT
- models::segment_anything::mask_decoder::MaskDecoder
- models::segment_anything::prompt_encoder::PromptEncoder
- models::segment_anything::sam::Sam
- models::segment_anything::tiny_vit::TinyViT
- models::segment_anything::transformer::TwoWayTransformer
- models::siglip::Config
- models::siglip::Model
- models::siglip::TextConfig
- models::siglip::TextModel
- models::siglip::TextTransformer
- models::siglip::VisionConfig
- models::siglip::VisionModel
- models::stable_diffusion::StableDiffusionConfig
- models::stable_diffusion::attention::AttentionBlock
- models::stable_diffusion::attention::AttentionBlockConfig
- models::stable_diffusion::attention::CrossAttention
- models::stable_diffusion::attention::SpatialTransformer
- models::stable_diffusion::attention::SpatialTransformerConfig
- models::stable_diffusion::clip::ClipTextTransformer
- models::stable_diffusion::clip::Config
- models::stable_diffusion::ddim::DDIMScheduler
- models::stable_diffusion::ddim::DDIMSchedulerConfig
- models::stable_diffusion::ddpm::DDPMScheduler
- models::stable_diffusion::ddpm::DDPMSchedulerConfig
- models::stable_diffusion::embeddings::TimestepEmbedding
- models::stable_diffusion::embeddings::Timesteps
- models::stable_diffusion::euler_ancestral_discrete::EulerAncestralDiscreteScheduler
- models::stable_diffusion::euler_ancestral_discrete::EulerAncestralDiscreteSchedulerConfig
- models::stable_diffusion::resnet::ResnetBlock2D
- models::stable_diffusion::resnet::ResnetBlock2DConfig
- models::stable_diffusion::unet_2d::BlockConfig
- models::stable_diffusion::unet_2d::UNet2DConditionModel
- models::stable_diffusion::unet_2d::UNet2DConditionModelConfig
- models::stable_diffusion::unet_2d_blocks::CrossAttnDownBlock2D
- models::stable_diffusion::unet_2d_blocks::CrossAttnDownBlock2DConfig
- models::stable_diffusion::unet_2d_blocks::CrossAttnUpBlock2D
- models::stable_diffusion::unet_2d_blocks::CrossAttnUpBlock2DConfig
- models::stable_diffusion::unet_2d_blocks::DownBlock2D
- models::stable_diffusion::unet_2d_blocks::DownBlock2DConfig
- models::stable_diffusion::unet_2d_blocks::DownEncoderBlock2D
- models::stable_diffusion::unet_2d_blocks::DownEncoderBlock2DConfig
- models::stable_diffusion::unet_2d_blocks::UNetMidBlock2D
- models::stable_diffusion::unet_2d_blocks::UNetMidBlock2DConfig
- models::stable_diffusion::unet_2d_blocks::UNetMidBlock2DCrossAttn
- models::stable_diffusion::unet_2d_blocks::UNetMidBlock2DCrossAttnConfig
- models::stable_diffusion::unet_2d_blocks::UpBlock2D
- models::stable_diffusion::unet_2d_blocks::UpBlock2DConfig
- models::stable_diffusion::unet_2d_blocks::UpDecoderBlock2D
- models::stable_diffusion::unet_2d_blocks::UpDecoderBlock2DConfig
- models::stable_diffusion::uni_pc::EdmDpmMultistepScheduler
- models::stable_diffusion::uni_pc::ExponentialSigmaSchedule
- models::stable_diffusion::uni_pc::KarrasSigmaSchedule
- models::stable_diffusion::uni_pc::UniPCSchedulerConfig
- models::stable_diffusion::vae::AutoEncoderKL
- models::stable_diffusion::vae::AutoEncoderKLConfig
- models::stable_diffusion::vae::DiagonalGaussianDistribution
- models::stable_lm::Config
- models::stable_lm::Model
- models::starcoder2::Config
- models::starcoder2::Model
- models::stella_en_v5::Config
- models::stella_en_v5::EmbedHead
- models::stella_en_v5::EmbeddingModel
- models::stella_en_v5::Embeddings
- models::stella_en_v5::Model
- models::t5::ActivationWithOptionalGating
- models::t5::Config
- models::t5::Linear
- models::t5::T5EncoderModel
- models::t5::T5ForConditionalGeneration
- models::trocr::TrOCRConfig
- models::trocr::TrOCRDecoder
- models::trocr::TrOCREncoder
- models::trocr::TrOCRForCausalLM
- models::trocr::TrOCRModel
- models::vgg::Vgg
- models::vit::Config
- models::vit::Embeddings
- models::vit::Encoder
- models::vit::Model
- models::whisper::Config
- models::whisper::model::AudioEncoder
- models::whisper::model::TextDecoder
- models::whisper::model::Whisper
- models::whisper::quantized_model::AudioEncoder
- models::whisper::quantized_model::TextDecoder
- models::whisper::quantized_model::Whisper
- models::with_tracing::Conv2d
- models::with_tracing::Embedding
- models::with_tracing::LayerNorm
- models::with_tracing::Linear
- models::with_tracing::QMatMul
- models::with_tracing::RmsNorm
- models::wuerstchen::attention_processor::Attention
- models::wuerstchen::common::AttnBlock
- models::wuerstchen::common::GlobalResponseNorm
- models::wuerstchen::common::LayerNormNoWeights
- models::wuerstchen::common::ResBlock
- models::wuerstchen::common::TimestepBlock
- models::wuerstchen::common::WLayerNorm
- models::wuerstchen::ddpm::DDPMWScheduler
- models::wuerstchen::ddpm::DDPMWSchedulerConfig
- models::wuerstchen::diffnext::ResBlockStageB
- models::wuerstchen::diffnext::WDiffNeXt
- models::wuerstchen::paella_vq::MixingResidualBlock
- models::wuerstchen::paella_vq::PaellaVQ
- models::wuerstchen::prior::WPrior
- models::xlm_roberta::Config
- models::xlm_roberta::XLMRobertaForMaskedLM
- models::xlm_roberta::XLMRobertaForSequenceClassification
- models::xlm_roberta::XLMRobertaModel
- models::yi::Config
- models::yi::Model
- object_detection::Bbox
- object_detection::KeyPoint
- quantized_nn::Embedding
- quantized_nn::Linear
- quantized_nn::RmsNorm
- quantized_var_builder::VarBuilder
Enums
- generation::Sampling
- models::bert::HiddenAct
- models::bert::PositionEmbeddingType
- models::chinese_clip::Activation
- models::chinese_clip::EncoderConfig
- models::chinese_clip::text_model::PositionEmbeddingType
- models::clip::EncoderConfig
- models::clip::text_model::Activation
- models::encodec::NormType
- models::encodec::PadMode
- models::granite::GraniteEosToks
- models::granite::GraniteRopeType
- models::jina_bert::PositionEmbeddingType
- models::llama::Llama3RopeType
- models::llama::LlamaEosToks
- models::metavoice::gpt::AttnKernelType
- models::metavoice::gpt::NonLinearityType
- models::metavoice::gpt::NormType
- models::mimi::NormType
- models::mimi::conv::Norm
- models::mimi::conv::PadMode
- models::mimi::encodec::ResampleMethod
- models::mimi::transformer::Mlp
- models::mimi::transformer::Norm
- models::mimi::transformer::PositionalEmbedding
- models::persimmon::PositionEmbeddingType
- models::recurrent_gemma::TemporalBlockType
- models::stable_diffusion::clip::Activation
- models::stable_diffusion::ddpm::DDPMVarianceType
- models::stable_diffusion::schedulers::BetaSchedule
- models::stable_diffusion::schedulers::PredictionType
- models::stable_diffusion::schedulers::TimestepSpacing
- models::stable_diffusion::uni_pc::AlgorithmType
- models::stable_diffusion::uni_pc::CorrectorConfiguration
- models::stable_diffusion::uni_pc::FinalSigmasType
- models::stable_diffusion::uni_pc::SigmaSchedule
- models::stable_diffusion::uni_pc::SolverType
- models::stable_diffusion::uni_pc::TimestepSchedule
- models::stella_en_v5::EmbedDim
- models::stella_en_v5::ModelVariant
- models::vgg::Models
Traits
- models::flux::WithForward
- models::mmdit::blocks::JointBlock
- models::stable_diffusion::schedulers::Scheduler
- models::stable_diffusion::schedulers::SchedulerConfig
- models::whisper::audio::Float
Functions
- models::beit::vit_base
- models::beit::vit_large
- models::chinese_clip::div_l2_norm
- models::clip::div_l2_norm
- models::convmixer::c1024_20
- models::convmixer::c1536_20
- models::convnext::convnext
- models::convnext::convnext_no_final_layer
- models::dinov2::vit_small
- models::dinov2reg4::vit_base
- models::dinov2reg4::vit_small
- models::efficientvit::efficientvit
- models::efficientvit::efficientvit_no_final_layer
- models::encodec::conv1d_weight_norm
- models::encodec::conv_transpose1d_weight_norm
- models::eva2::vit_base
- models::eva2::vit_large
- models::fastvit::fastvit
- models::fastvit::fastvit_no_final_layer
- models::flux::sampling::denoise
- models::flux::sampling::get_noise
- models::flux::sampling::get_schedule
- models::flux::sampling::unpack
- models::hiera::hiera
- models::hiera::hiera_no_final_layer
- models::llava::utils::get_anyres_image_grid_shape
- models::llava::utils::select_best_resolution
- models::mimi::encodec::load
- models::mobileclip::div_l2_norm
- models::mobilenetv4::mobilenetv4
- models::mobilenetv4::mobilenetv4_no_final_layer
- models::mobileone::mobileone
- models::mobileone::mobileone_no_final_layer
- models::repvgg::repvgg
- models::repvgg::repvgg_no_final_layer
- models::resnet::resnet101
- models::resnet::resnet101_no_final_layer
- models::resnet::resnet152
- models::resnet::resnet152_no_final_layer
- models::resnet::resnet18
- models::resnet::resnet18_no_final_layer
- models::resnet::resnet34
- models::resnet::resnet34_no_final_layer
- models::resnet::resnet50
- models::resnet::resnet50_no_final_layer
- models::segment_anything::linear
- models::segment_anything::tiny_vit::tiny_vit_5m
- models::stable_diffusion::build_clip_transformer
- models::stable_diffusion::utils::interp
- models::stable_diffusion::utils::linspace
- models::t5::deserialize_feed_forward_proj_activation
- models::t5::linear_no_bias
- models::whisper::audio::log_mel_spectrogram_
- models::whisper::audio::pcm_to_mel
- models::with_tracing::conv2d
- models::with_tracing::layer_norm
- models::with_tracing::linear
- models::with_tracing::linear_b
- models::with_tracing::linear_no_bias
- object_detection::iou
- object_detection::non_maximum_suppression
- object_detection::soft_non_maximum_suppression
- quantized_nn::layer_norm
- quantized_nn::layer_norm_no_bias
- quantized_nn::linear
- quantized_nn::linear_b
- quantized_nn::linear_no_bias
- utils::apply_repeat_penalty
- utils::repeat_kv
Type Aliases
- models::quantized_blip::Config
- models::quantized_blip::VisionConfig
- models::quantized_blip_text::Config
Constants
- models::bert::DTYPE
- models::distilbert::DTYPE
- models::granite::DEFAULT_MAX_SEQ_LEN
- models::jina_bert::DTYPE
- models::llama::DEFAULT_MAX_SEQ_LEN
- models::persimmon::DTYPE
- models::quantized_llama::MAX_SEQ_LEN
- models::quantized_phi::MAX_SEQ_LEN
- models::segment_anything::sam::IMAGE_SIZE
- models::whisper::CHUNK_LENGTH
- models::whisper::COMPRESSION_RATIO_THRESHOLD
- models::whisper::DTYPE
- models::whisper::EOT_TOKEN
- models::whisper::HOP_LENGTH
- models::whisper::LOGPROB_THRESHOLD
- models::whisper::NO_SPEECH_THRESHOLD
- models::whisper::NO_SPEECH_TOKENS
- models::whisper::NO_TIMESTAMPS_TOKEN
- models::whisper::N_FFT
- models::whisper::N_FRAMES
- models::whisper::N_SAMPLES
- models::whisper::SAMPLE_RATE
- models::whisper::SOT_TOKEN
- models::whisper::TEMPERATURES
- models::whisper::TRANSCRIBE_TOKEN
- models::whisper::TRANSLATE_TOKEN