pub struct LlmLoadArgs {Show 23 fields
pub url: String,
pub model: String,
pub concurrency: usize,
pub duration: String,
pub runtime_name: String,
pub prompt_profile: Option<String>,
pub prompt_file: Option<PathBuf>,
pub warmup: String,
pub output: Option<PathBuf>,
pub stream: bool,
pub rate: Option<f64>,
pub rate_distribution: String,
pub num_layers: Option<u32>,
pub validate: String,
pub fail_on_quality: Option<f64>,
pub spike_threshold: f64,
pub gpu_telemetry: bool,
pub gpu_poll_interval: String,
pub expected_clock_mhz: Option<u32>,
pub skip_health_check: bool,
pub dataset: Option<PathBuf>,
pub max_tokens: Option<u32>,
pub max_tokens_distribution: Option<String>,
}Expand description
Arguments for probador llm load
Fields§
§url: StringBase URL of the LLM API server
model: StringModel name to include in requests
concurrency: usizeNumber of concurrent workers
duration: StringTest duration (e.g., 30s, 2m, 1h)
runtime_name: StringRuntime name for reporting
prompt_profile: Option<String>Prompt profile: micro, short, medium, long
prompt_file: Option<PathBuf>Path to YAML prompt file
warmup: StringWarmup duration before measurement (e.g., 5s, 10s). Default: no warmup.
output: Option<PathBuf>Output file path for JSON results
stream: boolUse SSE streaming for real per-token timing (TTFT, TPOT, ITL). Use –stream false to disable.
rate: Option<f64>Target request rate (req/s). Omit for max throughput (closed-loop).
rate_distribution: StringRate distribution: poisson (default) or constant. Only used with –rate.
num_layers: Option<u32>Number of transformer layers in the model (e.g., 28 for Qwen 1.5B). Computes per-layer decode time for cross-runtime comparison.
validate: StringInline correctness validation: none, basic, contains:X, pattern:X
fail_on_quality: Option<f64>Exit non-zero if quality pass rate drops below this threshold (e.g., 0.95)
spike_threshold: f64Multiplier of median ITL for spike detection (default: 5.0)
gpu_telemetry: boolEnable GPU telemetry collection via nvidia-smi
gpu_poll_interval: StringGPU telemetry polling interval (e.g., 1s, 2s)
expected_clock_mhz: Option<u32>Expected GPU clock speed in MHz for throttle detection (auto-detect if omitted)
skip_health_check: boolSkip the pre-flight health check (not recommended)
dataset: Option<PathBuf>Path to JSONL dataset file for workload-driven benchmarking
max_tokens: Option<u32>Override max_tokens for all requests (e.g., –max-tokens 128)
max_tokens_distribution: Option<String>Max tokens distribution: uniform:MIN,MAX or fixed:N. Creates heterogeneous traffic with staggered completion times. Example: –max-tokens-distribution uniform:16,128
Trait Implementations§
Source§impl Args for LlmLoadArgs
impl Args for LlmLoadArgs
Source§fn augment_args<'b>(__clap_app: Command) -> Command
fn augment_args<'b>(__clap_app: Command) -> Command
Source§fn augment_args_for_update<'b>(__clap_app: Command) -> Command
fn augment_args_for_update<'b>(__clap_app: Command) -> Command
Command so it can instantiate self via
FromArgMatches::update_from_arg_matches_mut Read moreSource§impl CommandFactory for LlmLoadArgs
impl CommandFactory for LlmLoadArgs
Source§impl Debug for LlmLoadArgs
impl Debug for LlmLoadArgs
Source§impl FromArgMatches for LlmLoadArgs
impl FromArgMatches for LlmLoadArgs
Source§fn from_arg_matches(__clap_arg_matches: &ArgMatches) -> Result<Self, Error>
fn from_arg_matches(__clap_arg_matches: &ArgMatches) -> Result<Self, Error>
Source§fn from_arg_matches_mut(
__clap_arg_matches: &mut ArgMatches,
) -> Result<Self, Error>
fn from_arg_matches_mut( __clap_arg_matches: &mut ArgMatches, ) -> Result<Self, Error>
Source§fn update_from_arg_matches(
&mut self,
__clap_arg_matches: &ArgMatches,
) -> Result<(), Error>
fn update_from_arg_matches( &mut self, __clap_arg_matches: &ArgMatches, ) -> Result<(), Error>
ArgMatches to self.Source§fn update_from_arg_matches_mut(
&mut self,
__clap_arg_matches: &mut ArgMatches,
) -> Result<(), Error>
fn update_from_arg_matches_mut( &mut self, __clap_arg_matches: &mut ArgMatches, ) -> Result<(), Error>
ArgMatches to self.Source§impl Parser for LlmLoadArgs
impl Parser for LlmLoadArgs
Source§fn parse_from<I, T>(itr: I) -> Self
fn parse_from<I, T>(itr: I) -> Self
Source§fn try_parse_from<I, T>(itr: I) -> Result<Self, Error>
fn try_parse_from<I, T>(itr: I) -> Result<Self, Error>
Source§fn update_from<I, T>(&mut self, itr: I)
fn update_from<I, T>(&mut self, itr: I)
Auto Trait Implementations§
impl Freeze for LlmLoadArgs
impl RefUnwindSafe for LlmLoadArgs
impl Send for LlmLoadArgs
impl Sync for LlmLoadArgs
impl Unpin for LlmLoadArgs
impl UnsafeUnpin for LlmLoadArgs
impl UnwindSafe for LlmLoadArgs
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
impl<A, B, T> HttpServerConnExec<A, B> for Twhere
B: Body,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more