1use serde::{Deserialize, Serialize};
2use std::time::SystemTime;
3
4#[derive(Clone, Debug, Serialize, Deserialize)]
7pub struct RequestObservation {
8 pub endpoint_id: String,
9 pub pool_id: String,
10 pub started_at: SystemTime,
11 pub success: bool,
12 pub error: Option<ObservationError>,
13 pub was_retry: bool,
14 pub latency: LatencyBreakdown,
15 pub tokens: TokenStats,
16 pub stream: Option<StreamMetrics>,
17}
18
19#[derive(Clone, Debug, Serialize, Deserialize)]
20pub struct LatencyBreakdown {
21 pub total_ms: u64,
22 pub ttft_ms: Option<u64>,
23}
24
25#[derive(Clone, Debug, Default, Serialize, Deserialize)]
26pub struct TokenStats {
27 pub input: u64,
28 pub output: u64,
29}
30
31#[derive(Clone, Debug, Serialize, Deserialize)]
32pub struct StreamMetrics {
33 pub ttft_ms: u64,
34 pub tokens_per_second: Option<f64>,
35 pub max_inter_chunk_ms: Option<u64>,
36 pub chunk_count: u64,
37 pub completed_normally: bool,
38 pub stream_broken: bool,
39}
40
41#[derive(Clone, Debug, Serialize, Deserialize)]
42pub enum ObservationError {
43 Timeout,
44 RateLimited,
45 Upstream5xx,
46 Upstream4xx,
47 ConnectionFailure,
48 EmptyResponse,
49 TruncatedStream,
50 InvalidResponse,
51 Other { code: u16, message: String },
52}
53
54#[derive(Clone, Debug, Serialize, Deserialize)]
57pub struct ScoreConfig {
58 pub window_size: usize,
59 pub decay_period_secs: u64,
60 pub baseline_score: f64,
61 pub availability_weight: f64,
62 pub latency_weight: f64,
63 pub quality_weight: f64,
64 pub cost_weight: f64,
65 pub good_ttft_ms: u64,
66 pub acceptable_ttft_ms: u64,
67 pub good_tps: f64,
68}
69
70impl Default for ScoreConfig {
71 fn default() -> Self {
72 Self {
73 window_size: 100,
74 decay_period_secs: 300,
75 baseline_score: 60.0,
76 availability_weight: 0.35,
77 latency_weight: 0.25,
78 quality_weight: 0.25,
79 cost_weight: 0.15,
80 good_ttft_ms: 500,
81 acceptable_ttft_ms: 2000,
82 good_tps: 50.0,
83 }
84 }
85}
86
87#[derive(Clone, Debug, Serialize, Deserialize)]
90pub struct EndpointScore {
91 pub endpoint_id: String,
92 pub pool_id: String,
93 pub score: f64,
94 pub tier: ScoreTier,
95 pub observation_count: usize,
96 pub breakdown: ScoreBreakdown,
97 pub excluded: bool,
98 pub exclusion_reason: Option<String>,
99}
100
101#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
102pub enum ScoreTier {
103 Poor, Bronze, Silver, Gold, }
108
109impl PartialOrd for ScoreTier {
111 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
112 Some(self.cmp(other))
113 }
114}
115
116impl Ord for ScoreTier {
117 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
118 let self_val = match self {
119 ScoreTier::Poor => 0,
120 ScoreTier::Bronze => 1,
121 ScoreTier::Silver => 2,
122 ScoreTier::Gold => 3,
123 };
124 let other_val = match other {
125 ScoreTier::Poor => 0,
126 ScoreTier::Bronze => 1,
127 ScoreTier::Silver => 2,
128 ScoreTier::Gold => 3,
129 };
130 self_val.cmp(&other_val)
131 }
132}
133
134#[derive(Clone, Debug, Serialize, Deserialize)]
135pub struct ScoreBreakdown {
136 pub availability: f64,
137 pub latency: f64,
138 pub quality: f64,
139 pub cost: f64,
140 pub penalty: f64,
141}
142
143#[derive(Clone, Debug, Serialize, Deserialize)]
144pub struct PoolRanking {
145 pub pool_id: String,
146 pub ranked_endpoints: Vec<EndpointScore>,
147 pub recommended: Option<EndpointScore>,
148 pub recommended_fallback: Option<EndpointScore>,
149 pub excluded_endpoints: Vec<EndpointScore>,
150}
151
152pub trait Clock: Send + Sync {
154 fn now(&self) -> SystemTime;
155}
156
157#[derive(Default)]
159pub struct SystemClock;
160
161impl Clock for SystemClock {
162 fn now(&self) -> SystemTime {
163 SystemTime::now()
164 }
165}