1use serde::{Deserialize, Serialize};
2use std::time::SystemTime;
3
4#[derive(Clone, Debug)]
7pub struct RequestObservation {
8 pub endpoint_id: String,
9 pub pool_id: String,
10 pub started_at: SystemTime,
11 pub success: bool,
12 pub error: Option<ObservationError>,
13 pub was_retry: bool,
14 pub latency: LatencyBreakdown,
15 pub tokens: TokenStats,
16 pub stream: Option<StreamMetrics>,
17}
18
19#[derive(Clone, Debug)]
20pub struct LatencyBreakdown {
21 pub total_ms: u64,
22 pub ttft_ms: Option<u64>,
23}
24
25#[derive(Clone, Debug, Default)]
26pub struct TokenStats {
27 pub input: u64,
28 pub output: u64,
29}
30
31#[derive(Clone, Debug)]
32pub struct StreamMetrics {
33 pub ttft_ms: u64,
34 pub tokens_per_second: f64,
35 pub max_inter_chunk_ms: u64,
36 pub broken: bool,
37}
38
39#[derive(Clone, Debug)]
40pub enum ObservationError {
41 Timeout,
42 HttpStatus(u16),
43 ConnectionFailed,
44 RateLimited,
45 StreamBroken,
46 EmptyResponse,
47 Truncated,
48 MalformedResponse,
49 Other(String),
50}
51
52#[derive(Clone, Debug, Serialize, Deserialize)]
55pub struct ScoreConfig {
56 pub window_size: usize,
57 pub decay_period_secs: u64,
58 pub baseline_score: f32,
59 pub availability_weight: f32,
60 pub latency_weight: f32,
61 pub quality_weight: f32,
62 pub cost_weight: f32,
63 pub good_ttft_ms: u64,
64 pub acceptable_ttft_ms: u64,
65 pub good_tps: f64,
66 pub max_error_rate: f32,
67 pub max_truncation_rate: f32,
68 pub max_empty_response_rate: f32,
69}
70
71impl Default for ScoreConfig {
72 fn default() -> Self {
73 Self {
74 window_size: 100,
75 decay_period_secs: 300,
76 baseline_score: 60.0,
77 availability_weight: 0.35,
78 latency_weight: 0.25,
79 quality_weight: 0.25,
80 cost_weight: 0.15,
81 good_ttft_ms: 500,
82 acceptable_ttft_ms: 2000,
83 good_tps: 50.0,
84 max_error_rate: 0.20,
85 max_truncation_rate: 0.10,
86 max_empty_response_rate: 0.05,
87 }
88 }
89}
90
91#[derive(Clone, Debug, Serialize, Deserialize)]
94pub struct EndpointScore {
95 pub endpoint_id: String,
96 pub pool_id: String,
97 pub score: f32,
98 pub tier: ScoreTier,
99 pub observation_count: usize,
100 pub last_updated: SystemTime,
101 pub breakdown: ScoreBreakdown,
102}
103
104#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
105pub enum ScoreTier {
106 Poor, Bronze, Silver, Gold, }
111
112impl PartialOrd for ScoreTier {
114 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
115 Some(self.cmp(other))
116 }
117}
118
119impl Ord for ScoreTier {
120 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
121 let self_val = match self {
122 ScoreTier::Poor => 0,
123 ScoreTier::Bronze => 1,
124 ScoreTier::Silver => 2,
125 ScoreTier::Gold => 3,
126 };
127 let other_val = match other {
128 ScoreTier::Poor => 0,
129 ScoreTier::Bronze => 1,
130 ScoreTier::Silver => 2,
131 ScoreTier::Gold => 3,
132 };
133 self_val.cmp(&other_val)
134 }
135}
136
137#[derive(Clone, Debug, Serialize, Deserialize)]
138pub struct ScoreBreakdown {
139 pub availability: f32,
140 pub latency: f32,
141 pub quality: f32,
142 pub cost: f32,
143 pub penalty: f32,
144}
145
146#[derive(Clone, Debug, Serialize, Deserialize)]
147pub struct RankingResult {
148 pub ranked_endpoints: Vec<EndpointScore>,
149 pub recommended: Option<EndpointScore>,
150 pub recommended_fallback: Option<EndpointScore>,
151 pub excluded: Vec<EndpointScore>,
152}