dynamo_llm/kv_router/
scoring.rs

1// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16//! Scoring functions for the KV router.
17
18use serde::{Deserialize, Serialize};
19use std::collections::HashMap;
20
21use crate::kv_router::scheduler::Endpoint;
22
23#[derive(Debug, Default, Serialize, Deserialize, Clone)]
24pub struct ProcessedEndpoints {
25    pub endpoints: HashMap<i64, Endpoint>,
26    pub load_avg: f64,
27    pub load_std: f64,
28}
29
30impl ProcessedEndpoints {
31    pub fn new(endpoints: Vec<Endpoint>) -> Self {
32        // compute some basic statistics
33        let load_values: Vec<f64> = endpoints
34            .iter()
35            .map(|x| x.data.kv_active_blocks as f64)
36            .collect();
37        let load_avg = load_values.iter().copied().sum::<f64>() / load_values.len() as f64;
38        let variance = load_values
39            .iter()
40            .map(|&x| (x - load_avg).powi(2))
41            .sum::<f64>()
42            / load_values.len() as f64;
43        let load_std = variance.sqrt();
44
45        let endpoints = endpoints.into_iter().map(|e| (e.worker_id(), e)).collect();
46
47        ProcessedEndpoints {
48            endpoints,
49            load_avg,
50            load_std,
51        }
52    }
53}