1use std::collections::BTreeMap;
9
10use serde::{Deserialize, Serialize};
11use serde_json::{json, Value};
12
13use crate::llm_config::{self, ProviderDef};
14use crate::value::VmValue;
15
16pub const TOOL_CONFORMANCE_SCHEMA_VERSION: u32 = 1;
17pub const TOOL_PROBE_TOOL_NAME: &str = "echo_marker";
18pub const DEFAULT_TOOL_PROBE_MARKER: &str = "harn_tool_probe_marker";
19
20#[derive(Debug, Clone)]
21pub struct ToolConformanceProbeOptions {
22 pub provider: String,
23 pub model: String,
24 pub base_url: Option<String>,
25 pub modes: Vec<ToolProbeMode>,
26 pub marker: String,
27 pub timeout_secs: u64,
28}
29
30impl ToolConformanceProbeOptions {
31 pub fn new(provider: impl Into<String>, model: impl Into<String>) -> Self {
32 Self {
33 provider: provider.into(),
34 model: model.into(),
35 base_url: None,
36 modes: vec![ToolProbeMode::NonStreaming, ToolProbeMode::Streaming],
37 marker: DEFAULT_TOOL_PROBE_MARKER.to_string(),
38 timeout_secs: 120,
39 }
40 }
41}
42
43#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
44#[serde(rename_all = "snake_case")]
45pub enum ToolProbeMode {
46 NonStreaming,
47 Streaming,
48}
49
50impl ToolProbeMode {
51 pub fn as_str(self) -> &'static str {
52 match self {
53 Self::NonStreaming => "non_streaming",
54 Self::Streaming => "streaming",
55 }
56 }
57}
58
59#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
60#[serde(rename_all = "snake_case")]
61pub enum ToolProbeClassification {
62 StructuredNativeToolCall,
63 ParseableHarnTextToolCall,
64 RawModelToolTag,
65 ProseOnlyNonTool,
66 MalformedJsonArguments,
67 EmptySilent,
68 HttpError,
69 TransportError,
70}
71
72#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
73#[serde(rename_all = "snake_case")]
74pub enum ToolProbeStatus {
75 Pass,
76 Fail,
77 Unknown,
78}
79
80impl ToolProbeStatus {
81 pub fn as_str(&self) -> &'static str {
82 match self {
83 Self::Pass => "pass",
84 Self::Fail => "fail",
85 Self::Unknown => "unknown",
86 }
87 }
88}
89
90#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
91#[serde(rename_all = "snake_case")]
92pub enum ToolProbeFallbackMode {
93 Native,
94 Text,
95 Disabled,
96}
97
98impl ToolProbeFallbackMode {
99 pub fn as_str(&self) -> &'static str {
100 match self {
101 Self::Native => "native",
102 Self::Text => "text",
103 Self::Disabled => "disabled",
104 }
105 }
106}
107
108#[derive(Debug, Clone, Serialize, Deserialize)]
109pub struct ToolConformanceReport {
110 pub schema_version: u32,
111 pub provider: String,
112 pub model: String,
113 #[serde(skip_serializing_if = "Option::is_none")]
114 pub base_url: Option<String>,
115 pub tool_name: String,
116 pub marker: String,
117 pub cases: Vec<ToolConformanceCase>,
118 pub tool_calling: ToolCallingConformanceSummary,
119}
120
121#[derive(Debug, Clone, Serialize, Deserialize)]
122pub struct ToolCallingConformanceSummary {
123 pub native: ToolProbeStatus,
124 pub text: ToolProbeStatus,
125 pub streaming_native: ToolProbeStatus,
126 pub fallback_mode: ToolProbeFallbackMode,
127 #[serde(skip_serializing_if = "Option::is_none")]
128 pub failure_reason: Option<String>,
129}
130
131#[derive(Debug, Clone, Serialize, Deserialize)]
132pub struct ToolConformanceCase {
133 pub mode: ToolProbeMode,
134 pub ok: bool,
135 pub classification: ToolProbeClassification,
136 pub fallback_mode: ToolProbeFallbackMode,
137 #[serde(skip_serializing_if = "Option::is_none")]
138 pub failure_reason: Option<String>,
139 #[serde(skip_serializing_if = "Option::is_none")]
140 pub http_status: Option<u16>,
141 #[serde(skip_serializing_if = "Option::is_none")]
142 pub elapsed_ms: Option<u64>,
143 pub native_tool_call_count: usize,
144 pub text_tool_call_count: usize,
145 #[serde(skip_serializing_if = "Vec::is_empty")]
146 pub parser_errors: Vec<String>,
147 #[serde(skip_serializing_if = "Vec::is_empty")]
148 pub protocol_violations: Vec<String>,
149 #[serde(skip_serializing_if = "Option::is_none")]
150 pub content_sample: Option<String>,
151}
152
153impl ToolConformanceCase {
154 fn transport_error(mode: ToolProbeMode, message: String, elapsed_ms: Option<u64>) -> Self {
155 Self {
156 mode,
157 ok: false,
158 classification: ToolProbeClassification::TransportError,
159 fallback_mode: ToolProbeFallbackMode::Disabled,
160 failure_reason: Some(message),
161 http_status: None,
162 elapsed_ms,
163 native_tool_call_count: 0,
164 text_tool_call_count: 0,
165 parser_errors: Vec::new(),
166 protocol_violations: Vec::new(),
167 content_sample: None,
168 }
169 }
170
171 fn http_error(
172 mode: ToolProbeMode,
173 status: u16,
174 message: String,
175 elapsed_ms: Option<u64>,
176 ) -> Self {
177 Self {
178 mode,
179 ok: false,
180 classification: ToolProbeClassification::HttpError,
181 fallback_mode: ToolProbeFallbackMode::Disabled,
182 failure_reason: Some(message),
183 http_status: Some(status),
184 elapsed_ms,
185 native_tool_call_count: 0,
186 text_tool_call_count: 0,
187 parser_errors: Vec::new(),
188 protocol_violations: Vec::new(),
189 content_sample: None,
190 }
191 }
192}
193
194pub async fn run_tool_conformance_probe(
195 options: ToolConformanceProbeOptions,
196) -> ToolConformanceReport {
197 let model = llm_config::resolve_model_info(&options.model);
198 let provider = if options.provider.trim().is_empty() {
199 model.provider.clone()
200 } else {
201 options.provider.clone()
202 };
203 let model_id = model.id;
204 let base_url = options.base_url.clone().or_else(|| {
205 llm_config::provider_config(&provider).map(|def| llm_config::resolve_base_url(&def))
206 });
207 let mut cases = Vec::new();
208 for mode in normalized_modes(&options.modes) {
209 cases.push(
210 execute_live_probe_case(
211 &provider,
212 &model_id,
213 base_url.as_deref(),
214 mode,
215 &options.marker,
216 options.timeout_secs,
217 )
218 .await,
219 );
220 }
221 report_from_cases(provider, model_id, base_url, options.marker, cases)
222}
223
224pub fn classify_tool_conformance_fixture(
225 provider: impl Into<String>,
226 model: impl Into<String>,
227 mode: ToolProbeMode,
228 marker: impl Into<String>,
229 raw: &str,
230) -> ToolConformanceReport {
231 let marker = marker.into();
232 let response = serde_json::from_str::<Value>(raw).unwrap_or_else(|_| json!({ "content": raw }));
233 let case = classify_tool_probe_response(mode, &response, &marker, None, None);
234 report_from_cases(provider.into(), model.into(), None, marker, vec![case])
235}
236
237pub fn report_satisfies_required_probe(report: &ToolConformanceReport, requirement: &str) -> bool {
238 match requirement {
239 "tool_probe" | "tool_call_probe" => {
240 report.tool_calling.fallback_mode != ToolProbeFallbackMode::Disabled
241 && report.cases.iter().any(|case| case.ok)
242 }
243 "native_tool_probe" => report.tool_calling.native == ToolProbeStatus::Pass,
244 "streaming_tool_probe" => report.tool_calling.streaming_native == ToolProbeStatus::Pass,
245 _ => false,
246 }
247}
248
249fn normalized_modes(modes: &[ToolProbeMode]) -> Vec<ToolProbeMode> {
250 if modes.is_empty() {
251 return vec![ToolProbeMode::NonStreaming, ToolProbeMode::Streaming];
252 }
253 let mut out = Vec::new();
254 for mode in modes {
255 if !out.contains(mode) {
256 out.push(*mode);
257 }
258 }
259 out
260}
261
262fn report_from_cases(
263 provider: String,
264 model: String,
265 base_url: Option<String>,
266 marker: String,
267 cases: Vec<ToolConformanceCase>,
268) -> ToolConformanceReport {
269 let summary = summarize_cases(&cases);
270 ToolConformanceReport {
271 schema_version: TOOL_CONFORMANCE_SCHEMA_VERSION,
272 provider,
273 model,
274 base_url,
275 tool_name: TOOL_PROBE_TOOL_NAME.to_string(),
276 marker,
277 cases,
278 tool_calling: summary,
279 }
280}
281
282fn summarize_cases(cases: &[ToolConformanceCase]) -> ToolCallingConformanceSummary {
283 let mut native = ToolProbeStatus::Unknown;
284 let mut streaming_native = ToolProbeStatus::Unknown;
285 let mut text = ToolProbeStatus::Unknown;
286
287 for case in cases {
288 if case.classification == ToolProbeClassification::StructuredNativeToolCall {
289 if case.mode == ToolProbeMode::Streaming {
290 streaming_native = if case.ok {
291 ToolProbeStatus::Pass
292 } else {
293 ToolProbeStatus::Fail
294 };
295 } else {
296 native = if case.ok {
297 ToolProbeStatus::Pass
298 } else {
299 ToolProbeStatus::Fail
300 };
301 }
302 } else if case.mode == ToolProbeMode::Streaming
303 && streaming_native == ToolProbeStatus::Unknown
304 {
305 streaming_native = ToolProbeStatus::Fail;
306 } else if case.mode == ToolProbeMode::NonStreaming && native == ToolProbeStatus::Unknown {
307 native = ToolProbeStatus::Fail;
308 }
309
310 if case.classification == ToolProbeClassification::ParseableHarnTextToolCall {
311 text = if case.ok {
312 ToolProbeStatus::Pass
313 } else {
314 ToolProbeStatus::Fail
315 };
316 } else if text == ToolProbeStatus::Unknown && case.text_tool_call_count > 0 {
317 text = ToolProbeStatus::Fail;
318 }
319 }
320
321 let fallback_mode =
322 if native == ToolProbeStatus::Pass || streaming_native == ToolProbeStatus::Pass {
323 ToolProbeFallbackMode::Native
324 } else if text == ToolProbeStatus::Pass {
325 ToolProbeFallbackMode::Text
326 } else {
327 ToolProbeFallbackMode::Disabled
328 };
329
330 let failure_reason = if fallback_mode == ToolProbeFallbackMode::Disabled {
331 cases.iter().find_map(|case| case.failure_reason.clone())
332 } else {
333 None
334 };
335
336 ToolCallingConformanceSummary {
337 native,
338 text,
339 streaming_native,
340 fallback_mode,
341 failure_reason,
342 }
343}
344
345async fn execute_live_probe_case(
346 provider: &str,
347 model: &str,
348 base_url: Option<&str>,
349 mode: ToolProbeMode,
350 marker: &str,
351 timeout_secs: u64,
352) -> ToolConformanceCase {
353 let clock = harn_clock::RealClock::arc();
354 let started_ms = clock.monotonic_ms();
355 let Some(def) = llm_config::provider_config(provider) else {
356 return ToolConformanceCase::transport_error(
357 mode,
358 format!("unknown provider: {provider}"),
359 Some(elapsed_ms(&*clock, started_ms)),
360 );
361 };
362 let base_url = base_url
363 .filter(|value| !value.trim().is_empty())
364 .map(str::to_string)
365 .unwrap_or_else(|| llm_config::resolve_base_url(&def));
366 let url = match chat_url(&def, &base_url) {
367 Ok(url) => url,
368 Err(message) => {
369 return ToolConformanceCase::transport_error(
370 mode,
371 message,
372 Some(elapsed_ms(&*clock, started_ms)),
373 );
374 }
375 };
376 let body = probe_request_body(provider, model, mode, marker);
377 let client = if mode == ToolProbeMode::Streaming {
378 crate::llm::shared_streaming_client().clone()
379 } else {
380 crate::llm::shared_blocking_client().clone()
381 };
382 let api_key = crate::llm::helpers::resolve_api_key(provider).unwrap_or_default();
383 let request = client
384 .post(&url)
385 .header("Content-Type", "application/json")
386 .timeout(std::time::Duration::from_secs(timeout_secs))
387 .json(&body);
388 let mut request = crate::llm::api::apply_auth_headers(request, &api_key, Some(&def));
389 for (name, value) in &def.extra_headers {
390 request = request.header(name.as_str(), value.as_str());
391 }
392
393 let response = match request.send().await {
394 Ok(response) => response,
395 Err(error) => {
396 return ToolConformanceCase::transport_error(
397 mode,
398 format!("provider request failed: {error}"),
399 Some(elapsed_ms(&*clock, started_ms)),
400 );
401 }
402 };
403 let status = response.status();
404 let text = match response.text().await {
405 Ok(text) => text,
406 Err(error) => {
407 return ToolConformanceCase::transport_error(
408 mode,
409 format!("provider response was unreadable: {error}"),
410 Some(elapsed_ms(&*clock, started_ms)),
411 );
412 }
413 };
414 let elapsed = Some(elapsed_ms(&*clock, started_ms));
415 if !status.is_success() {
416 return ToolConformanceCase::http_error(
417 mode,
418 status.as_u16(),
419 sample_failure(&text, "provider returned non-success HTTP status"),
420 elapsed,
421 );
422 }
423 let response_value = if mode == ToolProbeMode::Streaming {
424 aggregate_stream_text(&text, provider)
425 } else {
426 serde_json::from_str::<Value>(&text).unwrap_or_else(|_| json!({ "content": text }))
427 };
428 classify_tool_probe_response(
429 mode,
430 &response_value,
431 marker,
432 Some(status.as_u16()),
433 elapsed,
434 )
435}
436
437fn classify_tool_probe_response(
438 mode: ToolProbeMode,
439 response: &Value,
440 marker: &str,
441 http_status: Option<u16>,
442 elapsed_ms: Option<u64>,
443) -> ToolConformanceCase {
444 let native = extract_native_tool_calls(response);
445 let native_count = native.len();
446 let mut malformed_native = false;
447 for call in &native {
448 if call.name == TOOL_PROBE_TOOL_NAME {
449 match &call.arguments {
450 Some(Value::Object(map))
451 if map.get("value").and_then(Value::as_str) == Some(marker) =>
452 {
453 return ToolConformanceCase {
454 mode,
455 ok: true,
456 classification: ToolProbeClassification::StructuredNativeToolCall,
457 fallback_mode: ToolProbeFallbackMode::Native,
458 failure_reason: None,
459 http_status,
460 elapsed_ms,
461 native_tool_call_count: native_count,
462 text_tool_call_count: 0,
463 parser_errors: Vec::new(),
464 protocol_violations: Vec::new(),
465 content_sample: content_sample(response),
466 };
467 }
468 Some(Value::Object(_)) => {}
469 _ => malformed_native = true,
470 }
471 }
472 }
473
474 let content = extract_content(response);
475 let tools = probe_tool_registry();
476 let parsed = crate::llm::tools::parse_text_tool_calls_with_tools(&content, Some(&tools));
477 let text_count = parsed.calls.len();
478 let text_pass = parsed.calls.iter().any(|call| {
479 call.get("name").and_then(Value::as_str) == Some(TOOL_PROBE_TOOL_NAME)
480 && call
481 .get("arguments")
482 .and_then(|args| args.get("value"))
483 .and_then(Value::as_str)
484 == Some(marker)
485 });
486 if text_pass {
487 return ToolConformanceCase {
488 mode,
489 ok: true,
490 classification: ToolProbeClassification::ParseableHarnTextToolCall,
491 fallback_mode: ToolProbeFallbackMode::Text,
492 failure_reason: None,
493 http_status,
494 elapsed_ms,
495 native_tool_call_count: native_count,
496 text_tool_call_count: text_count,
497 parser_errors: parsed.errors,
498 protocol_violations: parsed.violations,
499 content_sample: sample_content(&content),
500 };
501 }
502
503 let (classification, failure_reason) = if malformed_native || !parsed.errors.is_empty() {
504 (
505 ToolProbeClassification::MalformedJsonArguments,
506 Some(first_non_empty(
507 parsed.errors.first().cloned(),
508 "malformed_tool_arguments",
509 )),
510 )
511 } else if content.trim().is_empty() && native_count == 0 {
512 (
513 ToolProbeClassification::EmptySilent,
514 Some("empty_silent_response".to_string()),
515 )
516 } else if has_raw_model_tool_tag(&content) {
517 (
518 ToolProbeClassification::RawModelToolTag,
519 Some("raw_tool_tag_no_structured_calls".to_string()),
520 )
521 } else {
522 (
523 ToolProbeClassification::ProseOnlyNonTool,
524 Some("no_executable_tool_call".to_string()),
525 )
526 };
527
528 ToolConformanceCase {
529 mode,
530 ok: false,
531 classification,
532 fallback_mode: ToolProbeFallbackMode::Disabled,
533 failure_reason,
534 http_status,
535 elapsed_ms,
536 native_tool_call_count: native_count,
537 text_tool_call_count: text_count,
538 parser_errors: parsed.errors,
539 protocol_violations: parsed.violations,
540 content_sample: sample_content(&content),
541 }
542}
543
544fn chat_url(def: &ProviderDef, base_url: &str) -> Result<String, String> {
545 let endpoint = if def.chat_endpoint.trim().is_empty() {
546 "/v1/chat/completions"
547 } else {
548 def.chat_endpoint.as_str()
549 };
550 let url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
551 endpoint.to_string()
552 } else if endpoint.starts_with('/') {
553 format!("{}{}", base_url.trim_end_matches('/'), endpoint)
554 } else {
555 format!("{}/{}", base_url.trim_end_matches('/'), endpoint)
556 };
557 reqwest::Url::parse(&url)
558 .map(|_| url.clone())
559 .map_err(|error| format!("invalid provider chat URL '{url}': {error}"))
560}
561
562fn probe_request_body(provider: &str, model: &str, mode: ToolProbeMode, marker: &str) -> Value {
563 let prompt = format!(
564 "Call the {TOOL_PROBE_TOOL_NAME} tool exactly once with value {marker:?}. Do not answer in prose."
565 );
566 let tool = json!({
567 "type": "function",
568 "function": {
569 "name": TOOL_PROBE_TOOL_NAME,
570 "description": "Echo the probe marker exactly.",
571 "parameters": {
572 "type": "object",
573 "properties": {
574 "value": {
575 "type": "string",
576 "description": "The marker value to echo."
577 }
578 },
579 "required": ["value"],
580 "additionalProperties": false
581 }
582 }
583 });
584 let mut body = json!({
585 "model": model,
586 "messages": [{"role": "user", "content": prompt}],
587 "tools": [tool],
588 "stream": mode == ToolProbeMode::Streaming,
589 "temperature": 0,
590 });
591 if !crate::llm::provider::provider_uses_ollama_messages(provider, model) {
592 body["tool_choice"] = json!({
593 "type": "function",
594 "function": {"name": TOOL_PROBE_TOOL_NAME}
595 });
596 }
597 body
598}
599
600#[derive(Debug)]
601struct NativeToolCall {
602 name: String,
603 arguments: Option<Value>,
604}
605
606fn extract_native_tool_calls(response: &Value) -> Vec<NativeToolCall> {
607 let mut calls = Vec::new();
608 visit_native_tool_call_arrays(response, &mut calls);
609 calls
610}
611
612fn visit_native_tool_call_arrays(value: &Value, calls: &mut Vec<NativeToolCall>) {
613 match value {
614 Value::Object(map) => {
615 if let Some(tool_calls) = map.get("tool_calls").and_then(Value::as_array) {
616 for item in tool_calls {
617 if let Some(call) = parse_native_tool_call(item) {
618 calls.push(call);
619 }
620 }
621 }
622 for child in map.values() {
623 visit_native_tool_call_arrays(child, calls);
624 }
625 }
626 Value::Array(items) => {
627 for item in items {
628 visit_native_tool_call_arrays(item, calls);
629 }
630 }
631 _ => {}
632 }
633}
634
635fn parse_native_tool_call(item: &Value) -> Option<NativeToolCall> {
636 let obj = item.as_object()?;
637 let function = obj.get("function").and_then(Value::as_object);
638 let name = function
639 .and_then(|function| function.get("name"))
640 .or_else(|| obj.get("name"))
641 .and_then(Value::as_str)?
642 .to_string();
643 let raw_args = function
644 .and_then(|function| function.get("arguments"))
645 .or_else(|| obj.get("arguments"));
646 let arguments = match raw_args {
647 Some(Value::String(raw)) => serde_json::from_str::<Value>(raw).ok(),
648 Some(value @ Value::Object(_)) => Some(value.clone()),
649 Some(_) => None,
650 None => Some(json!({})),
651 };
652 Some(NativeToolCall { name, arguments })
653}
654
655fn extract_content(response: &Value) -> String {
656 let mut parts = Vec::new();
657 visit_content(response, &mut parts);
658 parts
659 .into_iter()
660 .filter(|part| !part.trim().is_empty())
661 .collect::<Vec<_>>()
662 .join("\n")
663}
664
665fn visit_content(value: &Value, parts: &mut Vec<String>) {
666 match value {
667 Value::Object(map) => {
668 for key in ["content", "response", "text"] {
669 if let Some(text) = map.get(key).and_then(Value::as_str) {
670 parts.push(text.to_string());
671 }
672 }
673 for child in map.values() {
674 visit_content(child, parts);
675 }
676 }
677 Value::Array(items) => {
678 for item in items {
679 visit_content(item, parts);
680 }
681 }
682 _ => {}
683 }
684}
685
686fn aggregate_stream_text(text: &str, _provider: &str) -> Value {
687 let mut content = String::new();
688 let mut calls: BTreeMap<String, PartialStreamCall> = BTreeMap::new();
689 let mut frames = Vec::new();
690 for raw_line in text.lines() {
691 let line = raw_line.trim();
692 if line.is_empty() {
693 continue;
694 }
695 let payload = line.strip_prefix("data:").map(str::trim).unwrap_or(line);
696 if payload == "[DONE]" {
697 continue;
698 }
699 let Ok(frame) = serde_json::from_str::<Value>(payload) else {
700 continue;
701 };
702 collect_stream_content_and_calls(&frame, &mut content, &mut calls);
703 frames.push(frame);
704 }
705 let tool_calls: Vec<Value> = calls
706 .into_values()
707 .map(|call| {
708 json!({
709 "id": call.id.unwrap_or_else(|| "stream_tool".to_string()),
710 "type": "function",
711 "function": {
712 "name": call.name.unwrap_or_default(),
713 "arguments": call.arguments,
714 }
715 })
716 })
717 .collect();
718 json!({
719 "content": content,
720 "tool_calls": tool_calls,
721 "frames": frames,
722 })
723}
724
725#[derive(Debug, Default)]
726struct PartialStreamCall {
727 id: Option<String>,
728 name: Option<String>,
729 arguments: String,
730}
731
732fn collect_stream_content_and_calls(
733 frame: &Value,
734 content: &mut String,
735 calls: &mut BTreeMap<String, PartialStreamCall>,
736) {
737 if let Some(text) = frame
738 .pointer("/message/content")
739 .or_else(|| frame.pointer("/choices/0/delta/content"))
740 .or_else(|| frame.pointer("/choices/0/message/content"))
741 .or_else(|| frame.get("response"))
742 .and_then(Value::as_str)
743 {
744 content.push_str(text);
745 }
746 for item in frame
747 .pointer("/message/tool_calls")
748 .or_else(|| frame.pointer("/choices/0/delta/tool_calls"))
749 .or_else(|| frame.pointer("/choices/0/message/tool_calls"))
750 .and_then(Value::as_array)
751 .into_iter()
752 .flatten()
753 {
754 let key = item
755 .get("index")
756 .and_then(Value::as_u64)
757 .map(|index| index.to_string())
758 .or_else(|| item.get("id").and_then(Value::as_str).map(str::to_string))
759 .unwrap_or_else(|| calls.len().to_string());
760 let slot = calls.entry(key).or_default();
761 if let Some(id) = item.get("id").and_then(Value::as_str) {
762 slot.id = Some(id.to_string());
763 }
764 if let Some(name) = item
765 .pointer("/function/name")
766 .or_else(|| item.get("name"))
767 .and_then(Value::as_str)
768 {
769 slot.name = Some(name.to_string());
770 }
771 if let Some(arguments) = item
772 .pointer("/function/arguments")
773 .or_else(|| item.get("arguments"))
774 {
775 match arguments {
776 Value::String(delta) => slot.arguments.push_str(delta),
777 Value::Object(_) => slot.arguments = arguments.to_string(),
778 _ => {}
779 }
780 }
781 }
782}
783
784fn probe_tool_registry() -> VmValue {
785 let mut value_param = BTreeMap::new();
786 value_param.insert("type".to_string(), vm_str("string"));
787 value_param.insert(
788 "description".to_string(),
789 vm_str("The marker value to echo."),
790 );
791 let mut params = BTreeMap::new();
792 params.insert(
793 "value".to_string(),
794 VmValue::Dict(std::sync::Arc::new(value_param)),
795 );
796 let tool = vm_dict(&[
797 ("name", vm_str(TOOL_PROBE_TOOL_NAME)),
798 ("description", vm_str("Echo the probe marker exactly.")),
799 ("parameters", VmValue::Dict(std::sync::Arc::new(params))),
800 ]);
801 vm_dict(&[("tools", VmValue::List(std::sync::Arc::new(vec![tool])))])
802}
803
804fn vm_str(value: &str) -> VmValue {
805 VmValue::String(std::sync::Arc::from(value))
806}
807
808fn vm_dict(pairs: &[(&str, VmValue)]) -> VmValue {
809 let mut map = BTreeMap::new();
810 for (key, value) in pairs {
811 map.insert((*key).to_string(), value.clone());
812 }
813 VmValue::Dict(std::sync::Arc::new(map))
814}
815
816fn has_raw_model_tool_tag(content: &str) -> bool {
817 let lowered = content.to_ascii_lowercase();
818 lowered.contains("<tool_call")
819 || lowered.contains("<toolcall")
820 || lowered.contains("tool_code:")
821 || lowered.contains("tool_call:")
822 || lowered.contains("call:")
823 || lowered.contains("<function")
824}
825
826fn content_sample(response: &Value) -> Option<String> {
827 sample_content(&extract_content(response))
828}
829
830fn sample_content(content: &str) -> Option<String> {
831 let trimmed = content.trim();
832 if trimmed.is_empty() {
833 None
834 } else {
835 Some(trimmed.chars().take(240).collect())
836 }
837}
838
839fn sample_failure(text: &str, fallback: &str) -> String {
840 let trimmed = text.trim();
841 if trimmed.is_empty() {
842 fallback.to_string()
843 } else {
844 format!(
845 "{fallback}: {}",
846 trimmed.chars().take(240).collect::<String>()
847 )
848 }
849}
850
851fn first_non_empty(value: Option<String>, fallback: &str) -> String {
852 value
853 .filter(|value| !value.trim().is_empty())
854 .unwrap_or_else(|| fallback.to_string())
855}
856
857fn elapsed_ms(clock: &dyn harn_clock::Clock, started_ms: i64) -> u64 {
858 clock.monotonic_ms().saturating_sub(started_ms).max(0) as u64
859}
860
861#[cfg(test)]
862mod tests {
863 use super::*;
864
865 #[test]
866 fn classify_openai_native_tool_call_as_pass() {
867 let report = classify_tool_conformance_fixture(
868 "local",
869 "model",
870 ToolProbeMode::NonStreaming,
871 DEFAULT_TOOL_PROBE_MARKER,
872 r#"{"choices":[{"message":{"tool_calls":[{"id":"call_1","type":"function","function":{"name":"echo_marker","arguments":"{\"value\":\"harn_tool_probe_marker\"}"}}]}}]}"#,
873 );
874 assert_eq!(report.tool_calling.native, ToolProbeStatus::Pass);
875 assert_eq!(
876 report.tool_calling.fallback_mode,
877 ToolProbeFallbackMode::Native
878 );
879 assert_eq!(
880 report.cases[0].classification,
881 ToolProbeClassification::StructuredNativeToolCall
882 );
883 }
884
885 #[test]
886 fn classify_gemma_raw_json_tool_call_content_as_text_fallback() {
887 let report = classify_tool_conformance_fixture(
888 "ollama",
889 "gemma4:26b",
890 ToolProbeMode::NonStreaming,
891 DEFAULT_TOOL_PROBE_MARKER,
892 r#"{"message":{"content":"<tool_call>{\"name\":\"echo_marker\",\"arguments\":{\"value\":\"harn_tool_probe_marker\"}}</tool_call>"}}"#,
893 );
894 assert_eq!(report.tool_calling.native, ToolProbeStatus::Fail);
895 assert_eq!(report.tool_calling.text, ToolProbeStatus::Pass);
896 assert_eq!(
897 report.tool_calling.fallback_mode,
898 ToolProbeFallbackMode::Text
899 );
900 assert_eq!(
901 report.cases[0].classification,
902 ToolProbeClassification::ParseableHarnTextToolCall
903 );
904 }
905
906 #[test]
907 fn classify_qwen_call_colon_marker_as_text_fallback() {
908 let report = classify_tool_conformance_fixture(
909 "llamacpp",
910 "qwen",
911 ToolProbeMode::NonStreaming,
912 DEFAULT_TOOL_PROBE_MARKER,
913 r#"{"content":"call:echo_marker{ value: \"harn_tool_probe_marker\" }"}"#,
914 );
915 assert_eq!(report.tool_calling.text, ToolProbeStatus::Pass);
916 assert_eq!(
917 report.tool_calling.fallback_mode,
918 ToolProbeFallbackMode::Text
919 );
920 }
921
922 #[test]
923 fn classify_prose_only_as_disabled() {
924 let report = classify_tool_conformance_fixture(
925 "ollama",
926 "gemma4:26b",
927 ToolProbeMode::NonStreaming,
928 DEFAULT_TOOL_PROBE_MARKER,
929 r#"{"message":{"content":"The comment has been added. I will now verify it."}}"#,
930 );
931 assert_eq!(
932 report.tool_calling.fallback_mode,
933 ToolProbeFallbackMode::Disabled
934 );
935 assert_eq!(
936 report.cases[0].classification,
937 ToolProbeClassification::ProseOnlyNonTool
938 );
939 assert_eq!(
940 report.cases[0].failure_reason.as_deref(),
941 Some("no_executable_tool_call")
942 );
943 }
944
945 #[test]
946 fn aggregates_openai_streaming_tool_call_deltas() {
947 let raw = "data: {\"choices\":[{\"delta\":{\"tool_calls\":[{\"index\":0,\"id\":\"call_1\",\"function\":{\"name\":\"echo_marker\",\"arguments\":\"{\\\"value\\\":\"}}]}}]}\n\
948 data: {\"choices\":[{\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\\\"harn_tool_probe_marker\\\"}\"}}]}}]}\n\
949 data: [DONE]\n";
950 let response = aggregate_stream_text(raw, "local");
951 let case = classify_tool_probe_response(
952 ToolProbeMode::Streaming,
953 &response,
954 DEFAULT_TOOL_PROBE_MARKER,
955 None,
956 None,
957 );
958 assert!(case.ok, "{case:?}");
959 assert_eq!(
960 case.classification,
961 ToolProbeClassification::StructuredNativeToolCall
962 );
963 }
964
965 #[test]
966 fn report_satisfies_tool_probe_when_text_fallback_passes() {
967 let report = classify_tool_conformance_fixture(
968 "llamacpp",
969 "qwen",
970 ToolProbeMode::NonStreaming,
971 DEFAULT_TOOL_PROBE_MARKER,
972 r#"{"content":"echo_marker({ value: \"harn_tool_probe_marker\" })"}"#,
973 );
974 assert!(report_satisfies_required_probe(&report, "tool_probe"));
975 assert!(!report_satisfies_required_probe(
976 &report,
977 "native_tool_probe"
978 ));
979 }
980}