use super::*;
#[cfg(feature = "llm")]
pub(crate) fn host_side_llm_extract(
payload_metrics: &mut [crate::test_support::PayloadMetrics],
raw_outputs: &[crate::test_support::RawPayloadOutput],
) -> Vec<crate::assert::AssertDetail> {
let mut failures = Vec::new();
if raw_outputs.is_empty() {
return failures;
}
let pm_index_lookup: std::collections::HashMap<usize, usize> = payload_metrics
.iter()
.enumerate()
.map(|(pos, pm)| (pm.payload_index, pos))
.collect();
for raw in raw_outputs {
let Some(&pm_pos) = pm_index_lookup.get(&raw.payload_index) else {
failures.push(crate::assert::AssertDetail::new(
crate::assert::DetailKind::Other,
format!(
"LlmExtract host pairing: raw output at payload_index={} has no \
matching PayloadMetrics slot — guest emission contract violated, \
or SHM ring dropped the empty-metrics companion message",
raw.payload_index,
),
));
continue;
};
let hint_ref = raw.hint.as_deref();
let stdout_result = super::super::model::extract_via_llm(
&raw.stdout,
hint_ref,
crate::test_support::MetricStream::Stdout,
);
let (mut metrics, load_err) = match stdout_result {
Ok(m) => (m, None::<String>),
Err(reason) => (Vec::new(), Some(reason)),
};
if metrics.is_empty() && load_err.is_none() && !raw.stderr.is_empty() {
match super::super::model::extract_via_llm(
&raw.stderr,
hint_ref,
crate::test_support::MetricStream::Stderr,
) {
Ok(m) => metrics = m,
Err(reason) => {
failures.push(crate::assert::AssertDetail::new(
crate::assert::DetailKind::Other,
format!("{LLM_MODEL_LOAD_FAILED_PREFIX}{reason}"),
));
continue;
}
}
}
if let Some(reason) = load_err {
failures.push(crate::assert::AssertDetail::new(
crate::assert::DetailKind::Other,
format!("{LLM_MODEL_LOAD_FAILED_PREFIX}{reason}"),
));
continue;
}
crate::scenario::payload_run::resolve_polarities_owned(&mut metrics, &raw.metric_hints);
for reason in validate_llm_extraction(&metrics) {
failures.push(crate::assert::AssertDetail::new(
crate::assert::DetailKind::Other,
reason,
));
}
if let Some(bounds) = raw.metric_bounds.as_ref() {
for reason in validate_metric_bounds(&metrics, bounds) {
failures.push(crate::assert::AssertDetail::new(
crate::assert::DetailKind::Other,
reason,
));
}
}
payload_metrics[pm_pos].metrics = metrics;
}
let raw_indices: std::collections::HashSet<usize> =
raw_outputs.iter().map(|raw| raw.payload_index).collect();
let suspicious: Vec<usize> = payload_metrics
.iter()
.filter(|pm| pm.metrics.is_empty() && !raw_indices.contains(&pm.payload_index))
.map(|pm| pm.payload_index)
.collect();
if !suspicious.is_empty() {
failures.push(crate::assert::AssertDetail::new(
crate::assert::DetailKind::Other,
format!(
"LlmExtract host pairing: {} empty-metrics PayloadMetrics \
entries at payload_index={:?} have no matching RawPayloadOutput. \
If these were intended as LlmExtract payloads, the raw-output \
SHM messages may have been silently dropped during drain \
(CRC mismatch — the drop is invisible to the shm_drops \
counter, which only tracks ring-full / overflow). Re-run; \
transient CRC corruption is rare. False-positive case: a \
`Json` payload with no numeric leaves and an `ExitCode` \
payload both produce empty-metrics PayloadMetrics by design \
and would also surface here in a mixed-format test — \
dismiss this detail if your test mixes LlmExtract with \
legitimately-empty other formats.",
suspicious.len(),
suspicious,
),
));
}
failures
}
#[cfg(feature = "llm")]
pub(crate) fn validate_llm_extraction(metrics: &[crate::test_support::Metric]) -> Vec<String> {
use std::collections::HashSet;
if metrics.is_empty() {
return Vec::new();
}
let mut violations = Vec::new();
let mut seen: HashSet<&str> = HashSet::with_capacity(metrics.len());
for m in metrics {
if !seen.insert(m.name.as_str()) {
violations.push(format!(
"LlmExtract emitted duplicate metric name '{}' — downstream stats would \
misattribute one value to the other; check the LLM walker for an \
aggregation bug or a malformed JSON path emitted by the model",
m.name,
));
}
if !m.value.is_finite() {
violations.push(format!(
"LlmExtract metric '{}' has non-finite value {} — NaN / ±inf must not \
propagate into PayloadMetrics",
m.name, m.value,
));
}
if m.source != crate::test_support::MetricSource::LlmExtract {
violations.push(format!(
"LlmExtract metric '{}' has source {:?}, expected MetricSource::LlmExtract — \
a value reached the LlmExtract slot without traversing the LLM walker",
m.name, m.source,
));
}
}
violations
}
#[cfg(feature = "llm")]
pub(crate) fn validate_metric_bounds(
metrics: &[crate::test_support::Metric],
bounds: &crate::test_support::MetricBounds,
) -> Vec<String> {
let mut violations = Vec::new();
if let Some(min_count) = bounds.min_count
&& metrics.len() < min_count
{
violations.push(format!(
"LlmExtract bounds: extracted {} metric(s), payload requires at least {} — \
the model produced fewer metrics than the payload declared as a sanity \
floor. Common causes: a regression in the LLM walker that drops branches \
of the JSON tree, a payload output that's structurally different from \
what the prompt template assumes, or a too-tight floor on `min_count`.",
metrics.len(),
min_count,
));
}
for m in metrics {
if let Some(lo) = bounds.value_min
&& m.value < lo
{
violations.push(format!(
"LlmExtract bounds: metric '{}' has value {} below payload's declared \
lower bound {} — values below the floor are either an extraction \
error or a unit-confusion bug. Adjust `value_min` if the floor is \
too tight, or fix the payload's output schema if the value should \
not have crossed the floor.",
m.name, m.value, lo,
));
}
if let Some(hi) = bounds.value_max
&& m.value > hi
{
violations.push(format!(
"LlmExtract bounds: metric '{}' has value {} above payload's declared \
upper bound {} — values above the ceiling are either an extraction \
error or a runaway from a typo'd unit converter. Adjust `value_max` \
if the ceiling is too tight, or fix the payload's output if the \
value should have stayed bounded.",
m.name, m.value, hi,
));
}
}
violations
}