use crate::storage::{QdrantConnectionConfig, QdrantSecurityConfig, EmbeddingCacheConfig, AccessControlConfig};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScalabilityAssessment {
pub workload_analysis: WorkloadAnalysis,
pub horizontal_scaling: HorizontalScalingPlan,
pub replication_strategy: ReplicationStrategy,
pub load_balancing: LoadBalancingConfig,
pub performance_projections: PerformanceProjections,
pub cost_analysis: CostAnalysis,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WorkloadAnalysis {
pub daily_documents: usize,
pub daily_queries: usize,
pub peak_concurrent_users: usize,
pub avg_document_size_bytes: usize,
pub avg_embedding_size: usize,
pub read_write_ratio: f32,
pub retention_days: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HorizontalScalingPlan {
pub qdrant_nodes: usize,
pub app_servers: usize,
pub sharding_strategy: ShardingStrategy,
pub auto_scaling: AutoScalingConfig,
pub geographic_distribution: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShardingStrategy {
pub shard_by_document_type: bool,
pub shard_by_time: bool,
pub shard_by_user: bool,
pub shards_per_node: usize,
pub replication_factor: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AutoScalingConfig {
pub min_nodes: usize,
pub max_nodes: usize,
pub scale_up_threshold: f32,
pub scale_down_threshold: f32,
pub cooldown_seconds: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ReplicationStrategy {
pub replication_factor: usize,
pub consistency_level: ConsistencyLevel,
pub write_quorum: usize,
pub read_quorum: usize,
pub cross_region_replication: bool,
pub backup_frequency_hours: u64,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum ConsistencyLevel {
Strong,
Eventual,
BoundedStaleness,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LoadBalancingConfig {
pub load_balancer_type: LoadBalancerType,
pub health_checks: HealthCheckConfig,
pub session_affinity: bool,
pub geographic_routing: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum LoadBalancerType {
Layer4,
Layer7,
Dns,
ClientSide,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HealthCheckConfig {
pub interval_seconds: u64,
pub timeout_seconds: u64,
pub unhealthy_threshold: usize,
pub healthy_threshold: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PerformanceProjections {
pub projected_qps: usize,
pub projected_latency_ms: LatencyPercentiles,
pub storage_requirements_tb: f32,
pub network_bandwidth_gbps: f32,
pub cpu_cores_required: usize,
pub memory_required_gb: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LatencyPercentiles {
pub p50: f32,
pub p95: f32,
pub p99: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CostAnalysis {
pub monthly_infrastructure_cost: f32,
pub monthly_storage_cost: f32,
pub monthly_network_cost: f32,
pub cost_per_million_queries: f32,
pub break_even_analysis: BreakEvenAnalysis,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BreakEvenAnalysis {
pub monthly_revenue_needed: f32,
pub customer_acquisition_cost: f32,
pub customer_lifetime_value: f32,
}
pub struct ScalabilityAssessor;
impl ScalabilityAssessor {
pub fn assess_scalability(workload: WorkloadAnalysis) -> ScalabilityAssessment {
let horizontal_scaling = Self::design_horizontal_scaling(&workload);
let replication_strategy = Self::design_replication_strategy(&workload);
let load_balancing = Self::design_load_balancing(&workload);
let performance_projections = Self::project_performance(&workload, &horizontal_scaling);
let cost_analysis = Self::analyze_costs(&workload, &horizontal_scaling);
ScalabilityAssessment {
workload_analysis: workload,
horizontal_scaling,
replication_strategy,
load_balancing,
performance_projections,
cost_analysis,
}
}
fn design_horizontal_scaling(workload: &WorkloadAnalysis) -> HorizontalScalingPlan {
let documents_per_day = workload.daily_documents;
let queries_per_day = workload.daily_queries;
let qdrant_nodes = ((documents_per_day / 10000).max(1) + (queries_per_day / 100000).max(1)) as usize;
let app_servers = (workload.peak_concurrent_users / 50).max(1);
let sharding_strategy = ShardingStrategy {
shard_by_document_type: true,
shard_by_time: workload.retention_days > 365, shard_by_user: workload.peak_concurrent_users > 1000, shards_per_node: 4,
replication_factor: 3,
};
let auto_scaling = AutoScalingConfig {
min_nodes: qdrant_nodes.min(3),
max_nodes: (qdrant_nodes * 3).max(10),
scale_up_threshold: 70.0,
scale_down_threshold: 30.0,
cooldown_seconds: 300,
};
HorizontalScalingPlan {
qdrant_nodes,
app_servers,
sharding_strategy,
auto_scaling,
geographic_distribution: vec!["us-east-1".to_string(), "us-west-2".to_string()], }
}
fn design_replication_strategy(workload: &WorkloadAnalysis) -> ReplicationStrategy {
let replication_factor = if workload.daily_documents > 100000 { 5 } else { 3 };
let consistency_level = if workload.read_write_ratio > 0.8 {
ConsistencyLevel::Strong } else {
ConsistencyLevel::Eventual
};
ReplicationStrategy {
replication_factor,
consistency_level,
write_quorum: replication_factor / 2 + 1,
read_quorum: 1, cross_region_replication: workload.peak_concurrent_users > 10000,
backup_frequency_hours: 24,
}
}
fn design_load_balancing(workload: &WorkloadAnalysis) -> LoadBalancingConfig {
let load_balancer_type = if workload.peak_concurrent_users > 10000 {
LoadBalancerType::Layer7
} else {
LoadBalancerType::Layer4
};
let health_checks = HealthCheckConfig {
interval_seconds: 30,
timeout_seconds: 5,
unhealthy_threshold: 3,
healthy_threshold: 2,
};
LoadBalancingConfig {
load_balancer_type,
health_checks,
session_affinity: workload.peak_concurrent_users < 1000, geographic_routing: workload.peak_concurrent_users > 5000,
}
}
fn project_performance(workload: &WorkloadAnalysis, scaling: &HorizontalScalingPlan) -> PerformanceProjections {
let total_nodes = scaling.qdrant_nodes;
let queries_per_second = workload.daily_queries / 86400;
let qps_per_node = 1000; let projected_qps = qps_per_node * total_nodes;
let base_latency = 50.0; let latency_multiplier = 1.0 / (total_nodes as f32).sqrt(); let p50 = base_latency * latency_multiplier;
let p95 = p50 * 2.0;
let p99 = p50 * 5.0;
let daily_data_gb = (workload.daily_documents * workload.avg_document_size_bytes) as f32 / 1_000_000_000.0;
let total_storage_tb = (daily_data_gb * workload.retention_days as f32) / 1000.0;
let network_bandwidth_gbps = (queries_per_second as f32 * 0.01).max(1.0);
let cpu_cores_required = total_nodes * 8; let memory_required_gb = total_nodes * 32;
PerformanceProjections {
projected_qps,
projected_latency_ms: LatencyPercentiles { p50, p95, p99 },
storage_requirements_tb: total_storage_tb,
network_bandwidth_gbps,
cpu_cores_required,
memory_required_gb,
}
}
fn analyze_costs(workload: &WorkloadAnalysis, scaling: &HorizontalScalingPlan) -> CostAnalysis {
let nodes = scaling.qdrant_nodes;
let cost_per_node_per_month = 500.0; let monthly_infrastructure_cost = nodes as f32 * cost_per_node_per_month;
let storage_cost_per_tb_per_month = 50.0; let monthly_storage_cost = 1.0 * storage_cost_per_tb_per_month;
let network_cost_per_gbps_per_month = 100.0;
let monthly_network_cost = 1.0 * network_cost_per_gbps_per_month;
let queries_per_month = workload.daily_queries * 30;
let cost_per_million_queries = 0.5; let query_cost = (queries_per_month as f32 / 1_000_000.0) * cost_per_million_queries;
let total_monthly_cost = monthly_infrastructure_cost + monthly_storage_cost + monthly_network_cost + query_cost;
let monthly_revenue_needed = total_monthly_cost * 1.5; let customer_acquisition_cost = 100.0; let customer_lifetime_value = monthly_revenue_needed / 100.0;
CostAnalysis {
monthly_infrastructure_cost,
monthly_storage_cost,
monthly_network_cost,
cost_per_million_queries,
break_even_analysis: BreakEvenAnalysis {
monthly_revenue_needed,
customer_acquisition_cost,
customer_lifetime_value,
},
}
}
}
pub fn generate_production_config(assessment: &ScalabilityAssessment) -> ProductionConfig {
ProductionConfig {
qdrant_config: QdrantConnectionConfig {
max_connections: assessment.horizontal_scaling.qdrant_nodes * 100,
connect_timeout_secs: 60,
request_timeout_secs: 120,
health_check_interval_secs: 60,
max_idle_secs: 300,
security: QdrantSecurityConfig {
api_key: None, tls_enabled: true,
ca_cert_path: Some("/etc/ssl/certs/ca-certificates.crt".to_string()),
client_cert_path: None,
client_key_path: None,
skip_tls_verify: false,
},
},
cache_config: EmbeddingCacheConfig {
max_size: 50000, ttl_secs: 7200, },
access_config: AccessControlConfig {
read_level: crate::storage::AccessLevel::Read,
write_level: crate::storage::AccessLevel::ReadWrite,
delete_level: crate::storage::AccessLevel::ReadWrite,
admin_level: crate::storage::AccessLevel::Admin,
enable_audit_log: true,
},
deployment_config: DeploymentConfig {
replicas: assessment.replication_strategy.replication_factor,
shards: assessment.horizontal_scaling.sharding_strategy.shards_per_node,
regions: assessment.horizontal_scaling.geographic_distribution.clone(),
},
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProductionConfig {
pub qdrant_config: QdrantConnectionConfig,
pub cache_config: EmbeddingCacheConfig,
pub access_config: AccessControlConfig,
pub deployment_config: DeploymentConfig,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DeploymentConfig {
pub replicas: usize,
pub shards: usize,
pub regions: Vec<String>,
}
pub fn quick_assessment(scenario: ScalabilityScenario) -> ScalabilityAssessment {
let workload = match scenario {
ScalabilityScenario::SmallTeam => WorkloadAnalysis {
daily_documents: 100,
daily_queries: 1000,
peak_concurrent_users: 10,
avg_document_size_bytes: 100000, avg_embedding_size: 768,
read_write_ratio: 0.9,
retention_days: 365,
},
ScalabilityScenario::GrowingStartup => WorkloadAnalysis {
daily_documents: 1000,
daily_queries: 10000,
peak_concurrent_users: 100,
avg_document_size_bytes: 500000, avg_embedding_size: 1024,
read_write_ratio: 0.8,
retention_days: 730,
},
ScalabilityScenario::Enterprise => WorkloadAnalysis {
daily_documents: 10000,
daily_queries: 100000,
peak_concurrent_users: 1000,
avg_document_size_bytes: 2000000, avg_embedding_size: 1536,
read_write_ratio: 0.7,
retention_days: 2555, },
ScalabilityScenario::LargeScale => WorkloadAnalysis {
daily_documents: 100000,
daily_queries: 1000000,
peak_concurrent_users: 10000,
avg_document_size_bytes: 5000000, avg_embedding_size: 2048,
read_write_ratio: 0.6,
retention_days: 2555,
},
};
ScalabilityAssessor::assess_scalability(workload)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum ScalabilityScenario {
SmallTeam,
GrowingStartup,
Enterprise,
LargeScale,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_quick_assessment_small_team() {
let assessment = quick_assessment(ScalabilityScenario::SmallTeam);
assert_eq!(assessment.workload_analysis.daily_documents, 100);
assert!(assessment.horizontal_scaling.qdrant_nodes >= 1);
assert!(assessment.performance_projections.projected_qps > 0);
}
#[test]
fn test_scalability_assessor() {
let workload = WorkloadAnalysis {
daily_documents: 1000,
daily_queries: 10000,
peak_concurrent_users: 50,
avg_document_size_bytes: 100000,
avg_embedding_size: 768,
read_write_ratio: 0.8,
retention_days: 365,
};
let assessment = ScalabilityAssessor::assess_scalability(workload);
assert!(assessment.horizontal_scaling.qdrant_nodes >= 1);
assert!(assessment.replication_strategy.replication_factor >= 3);
}
}