dynamo_llm/kv_router/scoring.rs
1// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16//! Scoring functions for the KV router.
17
18use serde::{Deserialize, Serialize};
19use std::collections::HashMap;
20
21use crate::kv_router::scheduler::Endpoint;
22
23#[derive(Debug, Default, Serialize, Deserialize, Clone)]
24pub struct ProcessedEndpoints {
25 pub endpoints: HashMap<i64, Endpoint>,
26 pub load_avg: f64,
27 pub load_std: f64,
28}
29
30impl ProcessedEndpoints {
31 pub fn new(endpoints: Vec<Endpoint>) -> Self {
32 // compute some basic statistics
33 let load_values: Vec<f64> = endpoints
34 .iter()
35 .map(|x| x.data.kv_active_blocks as f64)
36 .collect();
37 let load_avg = load_values.iter().copied().sum::<f64>() / load_values.len() as f64;
38 let variance = load_values
39 .iter()
40 .map(|&x| (x - load_avg).powi(2))
41 .sum::<f64>()
42 / load_values.len() as f64;
43 let load_std = variance.sqrt();
44
45 let endpoints = endpoints.into_iter().map(|e| (e.worker_id(), e)).collect();
46
47 ProcessedEndpoints {
48 endpoints,
49 load_avg,
50 load_std,
51 }
52 }
53}