zeph_bench/deterministic.rs
1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Helpers for pinning generation parameters to reproducible values.
5//!
6//! By default every `bench run` forces `temperature=0.0` on the configured LLM
7//! provider so that two runs with identical inputs produce the same output. Pass
8//! `--no-deterministic` on the CLI (or `no_deterministic = true` to
9//! [`apply_deterministic_overrides`]) to opt out.
10
11use zeph_llm::provider::GenerationOverrides;
12
13/// Build the [`GenerationOverrides`] that pin temperature to `0.0`.
14///
15/// All other sampling parameters (`top_p`, `top_k`, `frequency_penalty`,
16/// `presence_penalty`) are left as `None` so that the provider's own defaults
17/// apply.
18///
19/// # Examples
20///
21/// ```
22/// use zeph_bench::deterministic::deterministic_overrides;
23///
24/// let overrides = deterministic_overrides();
25/// assert_eq!(overrides.temperature, Some(0.0));
26/// assert!(overrides.top_p.is_none());
27/// ```
28#[must_use]
29pub fn deterministic_overrides() -> GenerationOverrides {
30 GenerationOverrides {
31 temperature: Some(0.0),
32 top_p: None,
33 top_k: None,
34 frequency_penalty: None,
35 presence_penalty: None,
36 }
37}
38
39/// Optionally apply deterministic generation overrides to an [`AnyProvider`].
40///
41/// When `no_deterministic` is `false` (the default for `bench run`), temperature
42/// is forced to `0.0` via [`deterministic_overrides`]. When `true` the provider
43/// is returned unchanged so the caller's configured temperature is used.
44///
45/// This function is called by the bench runner after resolving the provider and
46/// before constructing the agent.
47///
48/// # Examples
49///
50/// ```no_run
51/// use zeph_bench::apply_deterministic_overrides;
52/// use zeph_llm::{any::AnyProvider, mock::MockProvider};
53///
54/// let provider = AnyProvider::Mock(MockProvider::with_responses(vec![]));
55///
56/// // Non-deterministic: provider is returned unchanged.
57/// let result = apply_deterministic_overrides(provider, true);
58/// assert!(matches!(result, AnyProvider::Mock(_)));
59/// ```
60///
61/// [`AnyProvider`]: zeph_llm::any::AnyProvider
62pub fn apply_deterministic_overrides(
63 provider: zeph_llm::any::AnyProvider,
64 no_deterministic: bool,
65) -> zeph_llm::any::AnyProvider {
66 if no_deterministic {
67 provider
68 } else {
69 provider.with_generation_overrides(deterministic_overrides())
70 }
71}
72
73#[cfg(test)]
74mod tests {
75 use super::*;
76
77 #[test]
78 fn deterministic_overrides_returns_temperature_zero() {
79 let overrides = deterministic_overrides();
80 assert_eq!(overrides.temperature, Some(0.0));
81 }
82
83 #[test]
84 fn deterministic_overrides_leaves_other_fields_none() {
85 let overrides = deterministic_overrides();
86 assert!(overrides.top_p.is_none());
87 assert!(overrides.top_k.is_none());
88 assert!(overrides.frequency_penalty.is_none());
89 assert!(overrides.presence_penalty.is_none());
90 }
91
92 #[test]
93 fn apply_with_no_deterministic_true_skips_override() {
94 // Use Mock provider (zero-network) to verify the skip branch.
95 let provider =
96 zeph_llm::any::AnyProvider::Mock(zeph_llm::mock::MockProvider::with_responses(vec![]));
97 // When no_deterministic=true, provider is returned without applying overrides.
98 // We can't introspect the override directly, but we verify the call doesn't panic
99 // and returns an AnyProvider (the mock variant).
100 let result = apply_deterministic_overrides(provider, true);
101 assert!(matches!(result, zeph_llm::any::AnyProvider::Mock(_)));
102 }
103
104 #[test]
105 fn apply_with_no_deterministic_false_applies_override() {
106 let provider =
107 zeph_llm::any::AnyProvider::Mock(zeph_llm::mock::MockProvider::with_responses(vec![]));
108 // Mock provider's with_generation_overrides is a no-op but still returns Mock variant.
109 let result = apply_deterministic_overrides(provider, false);
110 assert!(matches!(result, zeph_llm::any::AnyProvider::Mock(_)));
111 }
112}