Skip to main content

latency

Function latency 

Source
pub fn latency<I: IndexCore>(
    index: &I,
    queries: &[Vec<f32>],
    params: &SearchParams,
    config: &LatencyConfig,
) -> Result<LatencyReport>
Expand description

Measure per-query latency for index over queries and return a LatencyReport.

Each query is timed with Instant::now immediately before the call to IndexCore::search and Instant::elapsed immediately after; the resulting microsecond delta is recorded as one sample. Build cost is excluded by construction — index is borrowed.

Percentiles use the nearest-rank method documented on LatencyReport. Single-threaded QPS is computed as query_count / sum_of_latencies_seconds and is therefore comparable across runs with identical query counts.

§Errors

§Examples

use std::sync::Arc;

use iqdb_eval::{latency, LatencyConfig};
use iqdb_flat::{FlatConfig, FlatIndex};
use iqdb_index::{Index, IndexCore};
use iqdb_types::{DistanceMetric, SearchParams, VectorId};

let mut idx = FlatIndex::new(2, DistanceMetric::Euclidean, FlatConfig)?;
idx.insert(VectorId::from(0u64), Arc::<[f32]>::from(&[0.0, 0.0][..]), None)?;
idx.insert(VectorId::from(1u64), Arc::<[f32]>::from(&[3.0, 4.0][..]), None)?;

let queries = vec![vec![0.0, 0.0], vec![3.0, 4.0]];
let params = SearchParams::new(1, DistanceMetric::Euclidean);
let cfg = LatencyConfig { warmup: 1 };

let report = latency(&idx, &queries, &params, &cfg)?;
assert_eq!(report.query_count, 2);
assert!(report.p50_us <= report.p95_us);
assert!(report.p95_us <= report.p99_us);