#![allow(clippy::needless_range_loop)]
#![allow(clippy::useless_vec)]
#![allow(clippy::redundant_locals)]
#![allow(clippy::len_without_is_empty)]
#![allow(clippy::await_holding_lock)]
#![allow(clippy::if_same_then_else)]
#![allow(clippy::derivable_impls)]
#![allow(clippy::wrong_self_convention)]
#![allow(clippy::same_item_push)]
#![allow(clippy::vec_init_then_push)]
#![allow(clippy::ptr_arg)]
#![allow(clippy::result_large_err)]
#![allow(clippy::excessive_nesting)]
#![allow(clippy::too_many_arguments)]
#![allow(clippy::type_complexity)]
#![allow(clippy::large_enum_variant)]
#[cfg(feature = "bert")]
pub mod bert;
#[cfg(feature = "roberta")]
pub mod roberta;
#[cfg(feature = "distilbert")]
pub mod distilbert;
#[cfg(feature = "gpt2")]
pub mod gpt2;
#[cfg(feature = "gpt_neo")]
pub mod gpt_neo;
#[cfg(feature = "gpt_j")]
pub mod gpt_j;
#[cfg(feature = "t5")]
pub mod t5;
#[cfg(feature = "albert")]
pub mod albert;
#[cfg(feature = "electra")]
pub mod electra;
#[cfg(feature = "deberta")]
pub mod deberta;
#[cfg(feature = "vit")]
pub mod vit;
#[cfg(feature = "llama")]
pub mod llama;
#[cfg(feature = "llama2")]
pub mod llama2;
#[cfg(feature = "llama3")]
pub mod llama3;
#[cfg(feature = "codellama")]
pub mod codellama;
#[cfg(feature = "deepseek")]
pub mod deepseek;
#[cfg(feature = "gpt_neox")]
pub mod gpt_neox;
#[cfg(feature = "mistral")]
pub mod mistral;
#[cfg(feature = "clip")]
pub mod clip;
pub mod cogvlm;
pub mod recursive;
#[cfg(feature = "blip2")]
pub mod blip2;
#[cfg(feature = "llava")]
pub mod llava;
#[cfg(feature = "dalle")]
pub mod dalle;
#[cfg(feature = "flamingo")]
pub mod flamingo;
#[cfg(feature = "gemma")]
pub mod gemma;
#[cfg(feature = "qwen")]
pub mod qwen;
#[cfg(feature = "phi3")]
pub mod phi3;
#[cfg(feature = "gemma2")]
pub mod gemma2;
#[cfg(feature = "internlm2")]
pub mod internlm2;
#[cfg(feature = "falcon2")]
pub mod falcon2;
#[cfg(feature = "deepseek_v2")]
pub mod deepseek_v2;
#[cfg(feature = "qwen2_5")]
pub mod qwen2_5;
pub mod hyena;
pub mod mamba;
pub mod retnet;
pub mod rwkv;
pub mod s4;
pub mod falcon;
pub mod stablelm;
#[cfg(feature = "opt")]
pub mod opt;
pub mod command_r;
#[cfg(feature = "granite")]
pub mod granite;
#[cfg(feature = "aya")]
pub mod aya;
#[cfg(feature = "jamba")]
pub mod jamba;
#[cfg(feature = "jamba2")]
pub mod jamba2;
#[cfg(feature = "sd3")]
pub mod sd3;
#[cfg(feature = "llama3_2")]
pub mod llama3_2;
#[cfg(feature = "mistral_v3")]
pub mod mistral_v3;
#[cfg(feature = "mixtral")]
pub mod mixtral;
#[cfg(feature = "phi2")]
pub mod phi2;
#[cfg(feature = "mamba2")]
pub mod mamba2;
#[cfg(feature = "phi4")]
pub mod phi4;
#[cfg(feature = "nemotron")]
pub mod nemotron;
#[cfg(feature = "whisper")]
pub mod whisper;
#[cfg(feature = "yi")]
pub mod yi;
#[cfg(feature = "starcoder2")]
pub mod starcoder2;
pub mod claude;
pub mod moe;
pub mod fnet;
pub mod linformer;
pub mod performer;
pub mod sparse_attention;
pub mod cross_attention;
pub mod hierarchical;
pub mod advanced_quantization;
pub mod ring_attention;
pub mod weight_loading;
pub mod generation_utils;
pub mod batch_inference;
pub mod dynamic_pruning;
pub mod knowledge_distillation;
pub mod model_compression;
pub mod continual_learning;
pub mod curriculum_learning;
pub mod multi_task_learning;
pub mod progressive_training;
pub mod meta_learning;
#[cfg(feature = "llama")]
pub mod code_specialized;
#[cfg(feature = "llama")]
pub mod math_specialized;
pub mod scientific_specialized;
pub mod legal_medical_specialized;
pub mod creative_writing_specialized;
pub mod common_patterns;
pub mod comprehensive_testing;
pub mod model_cards;
pub mod neural_architecture_search;
pub mod automated_model_design;
pub mod hybrid_architectures;
pub mod memory_profiling;
pub mod error_recovery;
pub mod mixed_bit_quantization;
pub mod performance_optimization;
pub mod model_serving;
pub mod xlstm;
pub mod biologically_inspired;
pub mod quantum_classical_hybrids;
pub mod benchmarking;
pub mod numerical_parity_tests;
pub mod developer_tools;
#[cfg(feature = "bert")]
pub use bert::{BertConfig, BertForMaskedLM, BertForSequenceClassification, BertModel};
#[cfg(feature = "roberta")]
pub use roberta::{
RobertaConfig, RobertaForMaskedLM, RobertaForQuestionAnswering,
RobertaForSequenceClassification, RobertaForTokenClassification, RobertaModel,
};
#[cfg(feature = "distilbert")]
pub use distilbert::{
DistilBertConfig, DistilBertForMaskedLM, DistilBertForQuestionAnswering,
DistilBertForSequenceClassification, DistilBertForTokenClassification, DistilBertModel,
};
#[cfg(feature = "gpt2")]
pub use gpt2::{Gpt2Config, Gpt2LMHeadModel, Gpt2Model};
#[cfg(feature = "gpt_neo")]
pub use gpt_neo::{GptNeoConfig, GptNeoLMHeadModel, GptNeoModel};
#[cfg(feature = "gpt_j")]
pub use gpt_j::{GptJConfig, GptJLMHeadModel, GptJModel};
#[cfg(feature = "t5")]
pub use t5::{T5Config, T5ForConditionalGeneration, T5Model};
#[cfg(feature = "albert")]
pub use albert::{
AlbertConfig, AlbertForMaskedLM, AlbertForQuestionAnswering, AlbertForSequenceClassification,
AlbertForTokenClassification, AlbertModel,
};
#[cfg(feature = "electra")]
pub use electra::{
ElectraConfig, ElectraForMultipleChoice, ElectraForPreTraining, ElectraForQuestionAnswering,
ElectraForSequenceClassification, ElectraForTokenClassification, ElectraModel,
};
#[cfg(feature = "deberta")]
pub use deberta::{
DebertaConfig, DebertaForMaskedLM, DebertaForMultipleChoice, DebertaForQuestionAnswering,
DebertaForSequenceClassification, DebertaForTokenClassification, DebertaModel,
};
#[cfg(feature = "vit")]
pub use vit::{ViTConfig, ViTForImageClassification, ViTModel};
#[cfg(feature = "llama")]
pub use llama::{LlamaConfig, LlamaForCausalLM, LlamaModel};
#[cfg(feature = "gpt_neox")]
pub use gpt_neox::{GPTNeoXConfig, GPTNeoXForCausalLM, GPTNeoXModel};
#[cfg(feature = "mistral")]
pub use mistral::{MistralConfig, MistralForCausalLM, MistralModel};
#[cfg(feature = "clip")]
pub use clip::{CLIPConfig, CLIPModel, CLIPTextConfig, CLIPVisionConfig};
#[cfg(feature = "blip2")]
pub use blip2::{
Blip2ConditionalGenerationOutput, Blip2Config, Blip2ForConditionalGeneration, Blip2Model,
Blip2Output, Blip2QFormerConfig, Blip2QFormerModel, Blip2QFormerOutput, Blip2TextConfig,
Blip2VisionConfig, Blip2VisionModel, LanguageModelOutput,
};
#[cfg(feature = "llava")]
pub use llava::{LlavaConfig, LlavaForConditionalGeneration, LlavaVisionConfig};
#[cfg(feature = "dalle")]
pub use dalle::{
DalleConfig, DalleDiffusionConfig, DalleImageConfig, DalleImageEncoder, DalleMLP, DalleModel,
DalleModelOutput, DalleTextConfig, DalleTextEncoder, DalleTimeEmbedding, DalleUNet, DalleVAE,
DalleVisionConfig,
};
#[cfg(feature = "flamingo")]
pub use flamingo::{
FlamingoConfig, FlamingoLanguageConfig, FlamingoLanguageModel, FlamingoLanguageOutput,
FlamingoModel, FlamingoOutput, FlamingoPerceiverConfig, FlamingoVisionConfig,
FlamingoVisionEncoder, FlamingoXAttentionConfig, PerceiverResampler,
};
#[cfg(feature = "gemma")]
pub use gemma::{GemmaConfig, GemmaForCausalLM, GemmaModel};
#[cfg(feature = "qwen")]
pub use qwen::{QwenConfig, QwenForCausalLM, QwenModel};
#[cfg(feature = "phi3")]
pub use phi3::{Phi3Config, Phi3ForCausalLM, Phi3Model};
#[cfg(feature = "gemma2")]
pub use gemma2::{Gemma2Config, Gemma2ForCausalLM, Gemma2Model};
#[cfg(feature = "deepseek_v2")]
pub use deepseek_v2::{
ActivationType as DeepSeekV2ActivationType, DeepSeekV2Config, DeepSeekV2Error,
DeepSeekV2ForCausalLM, DeepSeekV2Model, TopKMethod,
};
#[cfg(feature = "qwen2_5")]
pub use qwen2_5::{
Qwen25Config, Qwen25Error, Qwen25ForCausalLM, Qwen25ForSequenceClassification, Qwen25Model,
};
pub use automated_model_design::{
ArchitectureTemplate, ConstraintSolver, DeploymentEnvironment, DesignPatternLibrary,
DesignRequirements, DesignRequirementsBuilder, Modality, ModelDesign, ModelDesignMetadata,
ModelDesigner, ModelMetrics, PerformanceTarget, ResourceConstraints,
TaskType as DesignTaskType, TemplateMetadata,
};
pub use claude::{ClaudeConfig, ClaudeForCausalLM, ClaudeModel};
#[cfg(feature = "llama")]
pub use code_specialized::{
CodeLlamaConfig, CodeLlamaForCausalLM, CodeLlamaModel, CodeModelVariant, CodeSpecialTokens,
CodeSpecializedConfig, CodeSpecializedForCausalLM, CodeSpecializedModel, DeepSeekCoderConfig,
DeepSeekCoderForCausalLM, DeepSeekCoderModel, QwenCoderConfig, QwenCoderForCausalLM,
QwenCoderModel, StarCoderConfig, StarCoderForCausalLM, StarCoderModel,
};
pub use command_r::{CommandRConfig, CommandRForCausalLM, CommandRModel};
pub use common_patterns::{
components, get_global_registry, ArchitectureType, ComputeRequirements, EvaluableModel,
EvaluationData, EvaluationMetric, EvaluationResults, GenerationConfig, GenerationStrategy,
GenerativeModel, InitializationStrategy, MemoryEstimate, ModelFamily, ModelFamilyMetadata,
ModelRegistry, ModelUtils, TaskType as CommonTaskType,
};
pub use comprehensive_testing::{
reporting, BiasMetric, BiasmitigationStrategy, FairnessAssessment, FairnessConfig,
FairnessMetricType, FairnessResult, FairnessTestData, FairnessViolation, GroupData,
LayerPerformance, MemoryAnalysis, ModelTestSuite, NumericalDifferences, NumericalParityResults,
OverallPerformance, PerformanceProfiler, PerformanceResults, ReferenceComparator,
StatisticalTest, TestDataType, TestInputConfig, TestResult, TestStatistics,
ThroughputMeasurements, TimingInfo, ValidationConfig,
};
pub use continual_learning::{
utils as continual_learning_utils, ContinualLearningConfig, ContinualLearningMetrics,
ContinualLearningOutput, ContinualLearningTrainer, ContinualStrategy, LearningRateSchedule,
MemoryBuffer, MemorySelectionStrategy, TaskEvaluation, TaskInfo,
};
pub use creative_writing_specialized::{
CreativeWritingConfig, CreativeWritingForCausalLM, CreativeWritingModel,
CreativeWritingSpecialTokens, EmotionalTone, ImprovementType, LiteraryDevice,
NarrativePerspective, PoetryStyle, StyleAnalysis, WritingGenre, WritingImprovement,
WritingStyle,
};
pub use cross_attention::{
AdaptiveCrossAttention, CrossAttention, CrossAttentionConfig, GatedCrossAttention,
HierarchicalCrossAttention, MultiHeadCrossAttention, SparseCrossAttention,
};
pub use curriculum_learning::{
utils as curriculum_learning_utils, CurriculumAnalysis, CurriculumConfig,
CurriculumEpochOutput, CurriculumExample, CurriculumLearningOutput, CurriculumLearningTrainer,
CurriculumStats, CurriculumStrategy, DifficultyMeasure, PacingFunction,
};
pub use dynamic_pruning::*;
pub use error_recovery::{
ErrorCategory, ErrorRecoveryManager, ErrorTrends, ModelCheckpoint, RecoverableOperation,
RecoveryAttempt, RecoveryConfig, RecoveryMetrics, RecoveryReport, RecoveryStrategy,
};
pub use falcon::{FalconConfig, FalconForCausalLM, FalconModel};
pub use fnet::{FNetConfig, FNetForMaskedLM, FNetForSequenceClassification, FNetModel};
pub use hierarchical::{
HierarchicalConfig, HierarchicalForLanguageModeling, HierarchicalForSequenceClassification,
HierarchicalTransformer, NestedTransformer, PyramidTransformer, TreeTransformer,
};
pub use hybrid_architectures::{
AdaptiveConfig, ArchitecturalComponent, ArchitectureSummary, AttentionType, CNNArchitecture,
CrossModalConfig, EnsembleMethod, FusionStrategy, GlobalParams, HierarchyType,
HybridArchitecture, HybridConfig, HybridConfigBuilder, MemoryType, ParallelFusionMethod,
RNNCellType, StateSpaceType, SwitchingCriteria, TransformerVariant,
};
pub use hyena::{
HyenaConfig, HyenaForLanguageModeling, HyenaForSequenceClassification, HyenaModel,
};
pub use knowledge_distillation::{
utils as knowledge_distillation_utils, DistillationConfig, DistillationOutput,
DistillationStrategy, KnowledgeDistillationTrainer, ProgressiveStage, StudentOutputs,
TeacherOutputs,
};
pub use legal_medical_specialized::{
Citation, CitationType, ComplianceReport, ComplianceViolation, DocumentAnalysis,
LegalMedicalConfig, LegalMedicalDomain, LegalMedicalForCausalLM, LegalMedicalModel,
LegalMedicalSpecialTokens, LegalSystem, MedicalStandard, PrivacyRequirement,
};
pub use linformer::{
LinformerConfig, LinformerForMaskedLM, LinformerForSequenceClassification, LinformerModel,
};
pub use mamba::{MambaConfig, MambaModel};
#[cfg(feature = "llama")]
pub use math_specialized::{
ChainOfThoughtConfig, DeepSeekMathConfig, DeepSeekMathForCausalLM, DeepSeekMathModel,
MammothConfig, MammothForCausalLM, MammothModel, MathDomain, MathLlamaConfig,
MathLlamaForCausalLM, MathLlamaModel, MathModelVariant, MathProblemType, MathReasoningOutput,
MathSpecialTokens, MathSpecializedConfig, MathSpecializedForCausalLM, MathSpecializedModel,
MinervaConfig, MinervaForCausalLM, MinervaModel, ReasoningStep, ReasoningStrategy,
};
pub use meta_learning::{
utils as meta_learning_utils, ConvergenceMetrics, EpisodeResult, EvaluationResult, Example,
ExampleSet, MetaAlgorithm, MetaLearner, MetaLearningConfig, MetaLearningModel, MetaOptimizer,
MetaStatistics, PerformanceMetrics, Task, TaskBatch, TaskResult, TaskSampler,
TaskType as MetaTaskType,
};
pub use mixed_bit_quantization::{
BitAllocationStrategy, CalibrationConfig, CalibrationMethod,
HardwareConstraints as QuantizationHardwareConstraints,
HardwarePlatform as QuantizationHardwarePlatform, LayerQuantizationConstraints,
MixedBitQuantizationConfig, MixedBitQuantizer, ProgressiveQuantizationConfig,
QuantizationFormat, QuantizationParams, QuantizationQualityMetrics, QuantizationResults,
QuantizedLayerInfo, SensitivityAnalysisResults,
};
pub use model_compression::{
utils as model_compression_utils, ClusteringMethod, CompressedModel, CompressionAnalysis,
CompressionConfig, CompressionPipeline, CompressionStrategy, CompressionSummary,
DecompositionType, LayerCompressionStats, OptimizationObjective, PruningStrategy,
StructuredPruningGranularity,
};
pub use model_serving::{
InferenceRequest, InferenceResponse, LoadBalancer, LoadBalancingStrategy, ModelInstance,
ModelServingManager, RequestPriority, RequestQueue, ServingConfig, ServingMetrics,
};
pub use moe::{
glam_config, switch_config, Expert, ExpertParallel, MLPExpert, MoEConfig, RouterOutput,
RoutingStats, SparseMoE, SwitchMoE, TopKRouter,
};
pub use multi_task_learning::{
utils as multi_task_learning_utils, LossBalancingStrategy, MTLAnalysis, MTLArchitecture,
MTLConfig, MTLStats, MultiTaskEvaluation, MultiTaskLearningTrainer, MultiTaskOutput,
TaskConfig, TaskEvaluation as MTLTaskEvaluation, TaskPriority, TaskType as MTLTaskType,
};
pub use neural_architecture_search::{
Architecture, ArchitectureConstraint, ArchitectureEvaluation, ArchitectureMetadata,
DimensionRange, HardwareConstraints, HardwarePlatform, NASConfig, NeuralArchitectureSearcher,
OptimizationObjective as NASOptimizationObjective, SearchSpace, SearchStatistics,
SearchStrategy,
};
#[cfg(feature = "opt")]
pub use opt::{
format_completion_prompt, OptAttention, OptCausalLMOutput, OptConfig, OptDecoder,
OptDecoderLayer, OptError, OptFeedForward, OptForCausalLM, OptLayerNorm,
OptLearnedPositionalEmbedding, OptLinear, OptModel,
};
pub use performance_optimization::{
BatchProcessor, BatchingStrategy, CachedTensor, DynamicBatchManager, GpuCacheStatistics,
GpuMemoryChunk, GpuMemoryOptimizer, GpuMemoryPool, GpuMemoryStats,
GpuOptimizationRecommendations, GpuTensorCache, MemoryOptimizer, PerformanceConfig,
PerformanceMonitor, PerformanceStatistics,
};
pub use performer::{
PerformerConfig, PerformerForMaskedLM, PerformerForSequenceClassification, PerformerModel,
};
pub use progressive_training::{
utils as progressive_training_utils, GrowthDimension, GrowthEvent, GrowthInfo, GrowthResult,
GrowthSchedule, GrowthStrategy, LearningProgress, ProgressiveConfig, ProgressiveModel,
ProgressiveTrainer,
};
pub use retnet::{
RetNetConfig, RetNetForLanguageModeling, RetNetForSequenceClassification, RetNetModel,
};
pub use rwkv::{RwkvConfig, RwkvModel};
pub use s4::{S4Config, S4ForLanguageModeling, S4Model};
pub use scientific_specialized::{
CitationStyle, ScientificAnalysis, ScientificConfig, ScientificDomain, ScientificForCausalLM,
ScientificModel, ScientificSpecialTokens,
};
pub use sparse_attention::{
utils as sparse_attention_utils, SparseAttention, SparseAttentionConfig, SparseAttentionMask,
SparsePattern,
};
pub use stablelm::{StableLMConfig, StableLMForCausalLM, StableLMModel};
pub use weight_loading::{
auto_create_loader, create_distributed_loader, create_gguf_loader, create_huggingface_loader,
create_memory_mapped_loader, DistributedStats, DistributedWeightLoader, GGMLType, GGUFLoader,
HuggingFaceLoader, LazyTensor, MemoryMappedLoader, QuantizationConfig, StreamingLoader,
TensorMetadata, WeightDataType, WeightFormat, WeightLoader, WeightLoadingConfig,
};
#[cfg(feature = "phi4")]
pub use phi4::{Phi4Config, Phi4Error, Phi4ForCausalLM, Phi4Model, Phi4RopeScaling};
#[cfg(feature = "nemotron")]
pub use nemotron::{NemotronConfig, NemotronError, NemotronForCausalLM, NemotronModel, NormType};
pub use xlstm::{
ExponentialGatingConfig, FeedForward, MLstmBlock, MLstmConfig, SLstmBlock, SLstmConfig,
XLSTMBlockConfig, XLSTMBlockType, XLSTMConfig, XLSTMForCausalLM,
XLSTMForSequenceClassification, XLSTMLayer, XLSTMModel, XLSTMState,
};
#[cfg(test)]
mod tests {
#[test]
fn it_works() {
assert_eq!(2 + 2, 4);
}
}