use std::time::{Duration, SystemTime};
use tempfile::tempdir;
use tokio::fs;
#[tokio::test]
async fn test_batch_queue_manager() {
use inferno::backends::InferenceParams;
use inferno::batch::queue::{
BatchJob, JobPriority, JobQueueConfig, JobQueueManager, NotificationConfig, ResourceLimits,
ResourceRequirements, RetryConfig, RetryPolicy,
};
use inferno::batch::{BatchConfig, BatchInput};
use std::collections::HashMap;
let config = JobQueueConfig {
max_concurrent_jobs: 5,
max_queue_size: 100,
job_timeout_minutes: 5,
priority_enabled: true,
scheduling_enabled: true,
retry_policy: RetryPolicy {
max_attempts: 3,
initial_delay_seconds: 1,
backoff_multiplier: 2.0,
max_delay_seconds: 60,
retry_on_timeout: true,
retry_on_error: true,
},
resource_limits: ResourceLimits {
max_memory_mb: Some(1024),
max_cpu_percent: Some(80.0),
max_disk_space_mb: Some(10240),
max_network_bandwidth_mbps: Some(100.0),
},
notification_config: NotificationConfig::default(),
};
let manager = JobQueueManager::new(config);
manager
.create_queue(
"test-queue".to_string(),
"Test Queue".to_string(),
"A test batch queue".to_string(),
)
.await
.unwrap();
let job = BatchJob {
id: "job-1".to_string(),
name: "Test Job".to_string(),
description: Some("A test batch job".to_string()),
priority: JobPriority::Normal,
inputs: vec![BatchInput {
id: "input-1".to_string(),
content: "Test input".to_string(),
metadata: None,
}],
inference_params: InferenceParams::default(),
model_name: "test-model".to_string(),
batch_config: BatchConfig::default(),
schedule: None,
dependencies: vec![],
resource_requirements: ResourceRequirements {
cpu_cores: Some(1.0),
memory_mb: Some(512),
gpu_required: false,
gpu_memory_mb: None,
disk_space_mb: None,
network_bandwidth_mbps: None,
},
timeout_minutes: Some(5),
retry_count: 0,
max_retries: 3,
retry_config: RetryConfig::default(),
created_at: SystemTime::now(),
scheduled_at: None,
tags: HashMap::new(),
metadata: HashMap::new(),
};
let result = manager.submit_job("test-queue", job).await;
assert!(result.is_ok());
let queues = manager.list_all_queues().await.unwrap();
assert!(!queues.is_empty());
assert!(queues.iter().any(|q| q.id == "test-queue"));
}
#[tokio::test]
async fn test_model_versioning_config() {
use inferno::versioning::{ModelVersionManager, SemanticVersion, VersioningConfig};
let temp_dir = tempdir().unwrap();
let version_dir = temp_dir.path().join("versions");
fs::create_dir_all(&version_dir).await.unwrap();
let config = VersioningConfig {
storage_path: version_dir.clone(),
..Default::default()
};
let _manager = ModelVersionManager::new(config).await.unwrap();
let version = SemanticVersion::from_string("1.2.3").unwrap();
assert_eq!(version.major, 1);
assert_eq!(version.minor, 2);
assert_eq!(version.patch, 3);
let version = SemanticVersion::from_string("1.0.0-alpha").unwrap();
assert_eq!(version.major, 1);
assert_eq!(version.pre_release, Some("alpha".to_string()));
}
#[tokio::test]
async fn test_gpu_manager() {
use inferno::gpu::{GpuConfiguration, GpuManager, GpuVendor};
let config = GpuConfiguration {
enabled: true,
preferred_vendor: Some(GpuVendor::Nvidia),
memory_limit_mb: Some(4096),
max_utilization_percent: 90.0,
temperature_limit_celsius: 85.0,
power_limit_percent: None,
fallback_to_cpu: true,
auto_scaling: false,
monitoring_interval_seconds: 5,
};
let manager = GpuManager::new(config);
let gpus = manager.get_available_gpus().await;
if !gpus.is_empty() {
let gpu_info = manager.get_gpu_info(0).await;
assert!(gpu_info.is_some());
}
let config = manager.get_configuration();
assert!(config.enabled);
}
#[tokio::test]
async fn test_audit_logger() {
use inferno::audit::{
Actor, ActorType, AlertConfiguration, AuditConfiguration, AuditEvent, AuditLogger,
AuditQuery, CompressionMethod, EventContext, EventDetails, EventOutcome, EventType,
ExportFormat, LogLevel, Resource, ResourceType, Severity,
};
use std::collections::HashMap;
let temp_dir = tempdir().unwrap();
let audit_dir = temp_dir.path().join("audit");
fs::create_dir_all(&audit_dir).await.unwrap();
let config = AuditConfiguration {
enabled: true,
log_level: LogLevel::All,
storage_path: audit_dir.clone(),
max_file_size_mb: 100,
max_files: 10,
compression_enabled: false,
compression_method: CompressionMethod::Gzip,
compression_level: 6,
encryption_enabled: false,
encryption_key_env: String::new(),
encryption_sensitive_fields_only: false,
retention_days: 90,
batch_size: 1000,
flush_interval_seconds: 5,
include_request_body: true,
include_response_body: true,
exclude_patterns: vec![],
alert_on_critical: true,
alerting: AlertConfiguration::default(),
export_format: ExportFormat::Json,
};
let logger = AuditLogger::new(config).await.unwrap();
let event = AuditEvent {
id: "test-event-1".to_string(),
timestamp: SystemTime::now(),
event_type: EventType::ModelManagement,
severity: Severity::Info,
actor: Actor {
actor_type: ActorType::User,
id: "test-user".to_string(),
name: "Test User".to_string(),
ip_address: Some("127.0.0.1".to_string()),
user_agent: None,
session_id: Some("session-123".to_string()),
},
resource: Resource {
resource_type: ResourceType::Model,
id: "test-model".to_string(),
name: "Test Model".to_string(),
path: Some("/models/test-model.gguf".to_string()),
owner: None,
tags: vec![],
},
action: "load".to_string(),
details: EventDetails {
description: "Model loaded successfully".to_string(),
parameters: HashMap::new(),
request_id: Some("req-123".to_string()),
correlation_id: Some("corr-456".to_string()),
trace_id: None,
parent_event_id: None,
},
context: EventContext {
environment: "test".to_string(),
application: "inferno".to_string(),
version: "0.1.0".to_string(),
hostname: "localhost".to_string(),
process_id: std::process::id(),
thread_id: None,
request_path: None,
request_method: None,
client_info: None,
},
outcome: EventOutcome {
success: true,
status_code: None,
error_code: None,
error_message: None,
duration_ms: Some(150),
bytes_processed: None,
records_affected: None,
},
metadata: HashMap::new(),
};
logger.log_event(event.clone()).await.unwrap();
tokio::time::sleep(Duration::from_millis(100)).await;
let query = AuditQuery {
start_time: Some(SystemTime::now() - Duration::from_secs(60)),
end_time: Some(SystemTime::now()),
limit: Some(10),
..Default::default()
};
let events = logger.query_events(query).await.unwrap();
assert!(!events.is_empty());
assert_eq!(events[0].id, "test-event-1");
}
#[tokio::test]
async fn test_performance_monitor() {
use inferno::monitoring::{MonitoringConfig, PerformanceMetric, PerformanceMonitor};
let config = MonitoringConfig::default();
let monitor = PerformanceMonitor::new(config, None).await.unwrap();
let metric = PerformanceMetric {
timestamp: SystemTime::now(),
model_id: "test-model".to_string(),
response_time_ms: 150,
throughput_rps: 10.5,
error_rate_percent: 0.1,
memory_usage_mb: 1024,
cpu_usage_percent: 25.0,
queue_depth: 5,
cache_hit_rate_percent: 85.0,
active_connections: 10,
total_requests: 1000,
successful_requests: 990,
failed_requests: 10,
};
monitor.record_metric(metric).await.unwrap();
let metrics = monitor.get_current_metrics().await;
assert!(!metrics.is_empty());
}
#[tokio::test]
async fn test_response_cache_system() {
use inferno::response_cache::{CacheKey, ResponseCache, ResponseCacheConfig, ResponseMetadata};
let config = ResponseCacheConfig::default();
let mut cache = ResponseCache::new(config, None).await.unwrap();
let key = CacheKey {
request_hash: "request-hash-123".to_string(),
model_id: "test-model".to_string(),
parameters_hash: "params-hash-456".to_string(),
};
let response = b"Test response content".to_vec();
let metadata = ResponseMetadata {
model_id: "test-model".to_string(),
response_type: "text".to_string(),
token_count: Some(10),
processing_time_ms: 150,
quality_score: Some(0.95),
content_type: "text/plain".to_string(),
};
cache.put(&key, response.clone(), metadata).await.unwrap();
let cached_response = cache.get(&key).await;
assert!(cached_response.is_some());
assert_eq!(cached_response.unwrap(), response);
let stats = cache.get_stats().await;
assert_eq!(stats.total_entries, 1);
cache.shutdown().await;
}
#[tokio::test]
async fn test_distributed_inference() {
use inferno::backends::BackendConfig;
use inferno::distributed::{DistributedConfig, DistributedInference, PoolStrategy};
use inferno::models::ModelManager;
use std::sync::Arc;
let temp_dir = tempdir().unwrap();
let models_dir = temp_dir.path().join("models");
fs::create_dir_all(&models_dir).await.unwrap();
let config = DistributedConfig {
worker_count: 2,
max_concurrent_per_worker: 4,
request_timeout_seconds: 30,
load_balancing: true,
pool_strategy: PoolStrategy::RoundRobin,
preload_models: false,
max_models_per_worker: 2,
};
let backend_config = BackendConfig::default();
let model_manager = Arc::new(ModelManager::new(&models_dir));
let distributed = DistributedInference::new(
config,
backend_config,
model_manager,
None, )
.await
.unwrap();
let _stats = distributed.get_stats().await;
}
#[tokio::test]
async fn test_model_conversion_system() {
use inferno::config::Config;
use inferno::conversion::{ConversionConfig, ModelConverter, ModelFormat, OptimizationLevel};
use inferno::models::ModelManager;
use std::sync::Arc;
let temp_dir = tempdir().unwrap();
let input_dir = temp_dir.path().join("input");
let output_dir = temp_dir.path().join("output");
fs::create_dir_all(&input_dir).await.unwrap();
fs::create_dir_all(&output_dir).await.unwrap();
let input_path = input_dir.join("test-model.gguf");
fs::write(&input_path, b"GGUF\x00\x00\x00\x01mock model data")
.await
.unwrap();
let model_manager = Arc::new(ModelManager::new(&input_dir));
let config = Config::default();
let converter = ModelConverter::new(model_manager, config);
let conversion_config = ConversionConfig {
output_format: ModelFormat::Onnx,
quantization: None,
target_precision: None,
context_length: None,
batch_size: None,
optimization_level: OptimizationLevel::Balanced,
preserve_metadata: true,
verify_output: false,
};
let output_path = output_dir.join("converted-model.onnx");
let result = converter
.convert_model(&input_path, &output_path, &conversion_config)
.await;
assert!(result.is_ok() || result.is_err()); }
#[tokio::test]
async fn test_error_handling_and_edge_cases() {
use inferno::models::ModelManager;
let temp_dir = tempdir().unwrap();
let nonexistent_dir = temp_dir.path().join("nonexistent");
let manager = ModelManager::new(&nonexistent_dir);
let result = manager.list_models().await;
assert!(result.is_ok());
let models_dir = temp_dir.path().join("models");
fs::create_dir_all(&models_dir).await.unwrap();
let invalid_model = models_dir.join("invalid.gguf");
fs::write(&invalid_model, b"INVALID").await.unwrap();
let manager = ModelManager::new(&models_dir);
let validation_result = manager.validate_model(&invalid_model).await;
assert!(validation_result.is_ok());
let is_valid = validation_result.unwrap();
assert!(!is_valid);
}
#[tokio::test]
async fn test_concurrent_operations() {
use inferno::metrics::MetricsCollector;
use std::sync::Arc;
let (collector, processor) = MetricsCollector::new();
let collector = Arc::new(collector);
processor.start();
let mut handles = vec![];
for i in 0..10 {
let collector_clone = collector.clone();
let handle = tokio::spawn(async move {
collector_clone.record_model_loaded(
format!("model-{}", i),
1024 * 1024,
Duration::from_millis(100),
"gguf".to_string(),
);
});
handles.push(handle);
}
for handle in handles {
handle.await.unwrap();
}
tokio::time::sleep(Duration::from_millis(200)).await;
let snapshot = collector.get_snapshot().await.unwrap();
assert_eq!(snapshot.model_metrics.loaded_models.len(), 10);
}