1use std::collections::BTreeMap;
9use std::rc::Rc;
10
11use serde::{Deserialize, Serialize};
12use serde_json::{json, Value};
13
14use crate::llm_config::{self, ProviderDef};
15use crate::value::VmValue;
16
17pub const TOOL_CONFORMANCE_SCHEMA_VERSION: u32 = 1;
18pub const TOOL_PROBE_TOOL_NAME: &str = "echo_marker";
19pub const DEFAULT_TOOL_PROBE_MARKER: &str = "harn_tool_probe_marker";
20
21#[derive(Debug, Clone)]
22pub struct ToolConformanceProbeOptions {
23 pub provider: String,
24 pub model: String,
25 pub base_url: Option<String>,
26 pub modes: Vec<ToolProbeMode>,
27 pub marker: String,
28 pub timeout_secs: u64,
29}
30
31impl ToolConformanceProbeOptions {
32 pub fn new(provider: impl Into<String>, model: impl Into<String>) -> Self {
33 Self {
34 provider: provider.into(),
35 model: model.into(),
36 base_url: None,
37 modes: vec![ToolProbeMode::NonStreaming, ToolProbeMode::Streaming],
38 marker: DEFAULT_TOOL_PROBE_MARKER.to_string(),
39 timeout_secs: 120,
40 }
41 }
42}
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
45#[serde(rename_all = "snake_case")]
46pub enum ToolProbeMode {
47 NonStreaming,
48 Streaming,
49}
50
51impl ToolProbeMode {
52 pub fn as_str(self) -> &'static str {
53 match self {
54 Self::NonStreaming => "non_streaming",
55 Self::Streaming => "streaming",
56 }
57 }
58}
59
60#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
61#[serde(rename_all = "snake_case")]
62pub enum ToolProbeClassification {
63 StructuredNativeToolCall,
64 ParseableHarnTextToolCall,
65 RawModelToolTag,
66 ProseOnlyNonTool,
67 MalformedJsonArguments,
68 EmptySilent,
69 HttpError,
70 TransportError,
71}
72
73#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
74#[serde(rename_all = "snake_case")]
75pub enum ToolProbeStatus {
76 Pass,
77 Fail,
78 Unknown,
79}
80
81impl ToolProbeStatus {
82 pub fn as_str(&self) -> &'static str {
83 match self {
84 Self::Pass => "pass",
85 Self::Fail => "fail",
86 Self::Unknown => "unknown",
87 }
88 }
89}
90
91#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
92#[serde(rename_all = "snake_case")]
93pub enum ToolProbeFallbackMode {
94 Native,
95 Text,
96 Disabled,
97}
98
99impl ToolProbeFallbackMode {
100 pub fn as_str(&self) -> &'static str {
101 match self {
102 Self::Native => "native",
103 Self::Text => "text",
104 Self::Disabled => "disabled",
105 }
106 }
107}
108
109#[derive(Debug, Clone, Serialize, Deserialize)]
110pub struct ToolConformanceReport {
111 pub schema_version: u32,
112 pub provider: String,
113 pub model: String,
114 #[serde(skip_serializing_if = "Option::is_none")]
115 pub base_url: Option<String>,
116 pub tool_name: String,
117 pub marker: String,
118 pub cases: Vec<ToolConformanceCase>,
119 pub tool_calling: ToolCallingConformanceSummary,
120}
121
122#[derive(Debug, Clone, Serialize, Deserialize)]
123pub struct ToolCallingConformanceSummary {
124 pub native: ToolProbeStatus,
125 pub text: ToolProbeStatus,
126 pub streaming_native: ToolProbeStatus,
127 pub fallback_mode: ToolProbeFallbackMode,
128 #[serde(skip_serializing_if = "Option::is_none")]
129 pub failure_reason: Option<String>,
130}
131
132#[derive(Debug, Clone, Serialize, Deserialize)]
133pub struct ToolConformanceCase {
134 pub mode: ToolProbeMode,
135 pub ok: bool,
136 pub classification: ToolProbeClassification,
137 pub fallback_mode: ToolProbeFallbackMode,
138 #[serde(skip_serializing_if = "Option::is_none")]
139 pub failure_reason: Option<String>,
140 #[serde(skip_serializing_if = "Option::is_none")]
141 pub http_status: Option<u16>,
142 #[serde(skip_serializing_if = "Option::is_none")]
143 pub elapsed_ms: Option<u64>,
144 pub native_tool_call_count: usize,
145 pub text_tool_call_count: usize,
146 #[serde(skip_serializing_if = "Vec::is_empty")]
147 pub parser_errors: Vec<String>,
148 #[serde(skip_serializing_if = "Vec::is_empty")]
149 pub protocol_violations: Vec<String>,
150 #[serde(skip_serializing_if = "Option::is_none")]
151 pub content_sample: Option<String>,
152}
153
154impl ToolConformanceCase {
155 fn transport_error(mode: ToolProbeMode, message: String, elapsed_ms: Option<u64>) -> Self {
156 Self {
157 mode,
158 ok: false,
159 classification: ToolProbeClassification::TransportError,
160 fallback_mode: ToolProbeFallbackMode::Disabled,
161 failure_reason: Some(message),
162 http_status: None,
163 elapsed_ms,
164 native_tool_call_count: 0,
165 text_tool_call_count: 0,
166 parser_errors: Vec::new(),
167 protocol_violations: Vec::new(),
168 content_sample: None,
169 }
170 }
171
172 fn http_error(
173 mode: ToolProbeMode,
174 status: u16,
175 message: String,
176 elapsed_ms: Option<u64>,
177 ) -> Self {
178 Self {
179 mode,
180 ok: false,
181 classification: ToolProbeClassification::HttpError,
182 fallback_mode: ToolProbeFallbackMode::Disabled,
183 failure_reason: Some(message),
184 http_status: Some(status),
185 elapsed_ms,
186 native_tool_call_count: 0,
187 text_tool_call_count: 0,
188 parser_errors: Vec::new(),
189 protocol_violations: Vec::new(),
190 content_sample: None,
191 }
192 }
193}
194
195pub async fn run_tool_conformance_probe(
196 options: ToolConformanceProbeOptions,
197) -> ToolConformanceReport {
198 let model = llm_config::resolve_model_info(&options.model);
199 let provider = if options.provider.trim().is_empty() {
200 model.provider.clone()
201 } else {
202 options.provider.clone()
203 };
204 let model_id = model.id;
205 let base_url = options.base_url.clone().or_else(|| {
206 llm_config::provider_config(&provider).map(|def| llm_config::resolve_base_url(&def))
207 });
208 let mut cases = Vec::new();
209 for mode in normalized_modes(&options.modes) {
210 cases.push(
211 execute_live_probe_case(
212 &provider,
213 &model_id,
214 base_url.as_deref(),
215 mode,
216 &options.marker,
217 options.timeout_secs,
218 )
219 .await,
220 );
221 }
222 report_from_cases(provider, model_id, base_url, options.marker, cases)
223}
224
225pub fn classify_tool_conformance_fixture(
226 provider: impl Into<String>,
227 model: impl Into<String>,
228 mode: ToolProbeMode,
229 marker: impl Into<String>,
230 raw: &str,
231) -> ToolConformanceReport {
232 let marker = marker.into();
233 let response = serde_json::from_str::<Value>(raw).unwrap_or_else(|_| json!({ "content": raw }));
234 let case = classify_tool_probe_response(mode, &response, &marker, None, None);
235 report_from_cases(provider.into(), model.into(), None, marker, vec![case])
236}
237
238pub fn report_satisfies_required_probe(report: &ToolConformanceReport, requirement: &str) -> bool {
239 match requirement {
240 "tool_probe" | "tool_call_probe" => {
241 report.tool_calling.fallback_mode != ToolProbeFallbackMode::Disabled
242 && report.cases.iter().any(|case| case.ok)
243 }
244 "native_tool_probe" => report.tool_calling.native == ToolProbeStatus::Pass,
245 "streaming_tool_probe" => report.tool_calling.streaming_native == ToolProbeStatus::Pass,
246 _ => false,
247 }
248}
249
250fn normalized_modes(modes: &[ToolProbeMode]) -> Vec<ToolProbeMode> {
251 if modes.is_empty() {
252 return vec![ToolProbeMode::NonStreaming, ToolProbeMode::Streaming];
253 }
254 let mut out = Vec::new();
255 for mode in modes {
256 if !out.contains(mode) {
257 out.push(*mode);
258 }
259 }
260 out
261}
262
263fn report_from_cases(
264 provider: String,
265 model: String,
266 base_url: Option<String>,
267 marker: String,
268 cases: Vec<ToolConformanceCase>,
269) -> ToolConformanceReport {
270 let summary = summarize_cases(&cases);
271 ToolConformanceReport {
272 schema_version: TOOL_CONFORMANCE_SCHEMA_VERSION,
273 provider,
274 model,
275 base_url,
276 tool_name: TOOL_PROBE_TOOL_NAME.to_string(),
277 marker,
278 cases,
279 tool_calling: summary,
280 }
281}
282
283fn summarize_cases(cases: &[ToolConformanceCase]) -> ToolCallingConformanceSummary {
284 let mut native = ToolProbeStatus::Unknown;
285 let mut streaming_native = ToolProbeStatus::Unknown;
286 let mut text = ToolProbeStatus::Unknown;
287
288 for case in cases {
289 if case.classification == ToolProbeClassification::StructuredNativeToolCall {
290 if case.mode == ToolProbeMode::Streaming {
291 streaming_native = if case.ok {
292 ToolProbeStatus::Pass
293 } else {
294 ToolProbeStatus::Fail
295 };
296 } else {
297 native = if case.ok {
298 ToolProbeStatus::Pass
299 } else {
300 ToolProbeStatus::Fail
301 };
302 }
303 } else if case.mode == ToolProbeMode::Streaming
304 && streaming_native == ToolProbeStatus::Unknown
305 {
306 streaming_native = ToolProbeStatus::Fail;
307 } else if case.mode == ToolProbeMode::NonStreaming && native == ToolProbeStatus::Unknown {
308 native = ToolProbeStatus::Fail;
309 }
310
311 if case.classification == ToolProbeClassification::ParseableHarnTextToolCall {
312 text = if case.ok {
313 ToolProbeStatus::Pass
314 } else {
315 ToolProbeStatus::Fail
316 };
317 } else if text == ToolProbeStatus::Unknown && case.text_tool_call_count > 0 {
318 text = ToolProbeStatus::Fail;
319 }
320 }
321
322 let fallback_mode =
323 if native == ToolProbeStatus::Pass || streaming_native == ToolProbeStatus::Pass {
324 ToolProbeFallbackMode::Native
325 } else if text == ToolProbeStatus::Pass {
326 ToolProbeFallbackMode::Text
327 } else {
328 ToolProbeFallbackMode::Disabled
329 };
330
331 let failure_reason = if fallback_mode == ToolProbeFallbackMode::Disabled {
332 cases.iter().find_map(|case| case.failure_reason.clone())
333 } else {
334 None
335 };
336
337 ToolCallingConformanceSummary {
338 native,
339 text,
340 streaming_native,
341 fallback_mode,
342 failure_reason,
343 }
344}
345
346async fn execute_live_probe_case(
347 provider: &str,
348 model: &str,
349 base_url: Option<&str>,
350 mode: ToolProbeMode,
351 marker: &str,
352 timeout_secs: u64,
353) -> ToolConformanceCase {
354 let clock = harn_clock::RealClock::arc();
355 let started_ms = clock.monotonic_ms();
356 let Some(def) = llm_config::provider_config(provider) else {
357 return ToolConformanceCase::transport_error(
358 mode,
359 format!("unknown provider: {provider}"),
360 Some(elapsed_ms(&*clock, started_ms)),
361 );
362 };
363 let base_url = base_url
364 .filter(|value| !value.trim().is_empty())
365 .map(str::to_string)
366 .unwrap_or_else(|| llm_config::resolve_base_url(&def));
367 let url = match chat_url(&def, &base_url) {
368 Ok(url) => url,
369 Err(message) => {
370 return ToolConformanceCase::transport_error(
371 mode,
372 message,
373 Some(elapsed_ms(&*clock, started_ms)),
374 );
375 }
376 };
377 let body = probe_request_body(provider, model, mode, marker);
378 let client = if mode == ToolProbeMode::Streaming {
379 crate::llm::shared_streaming_client().clone()
380 } else {
381 crate::llm::shared_blocking_client().clone()
382 };
383 let api_key = crate::llm::helpers::resolve_api_key(provider)
384 .map(|value| value.to_string())
385 .unwrap_or_default();
386 let request = client
387 .post(&url)
388 .header("Content-Type", "application/json")
389 .timeout(std::time::Duration::from_secs(timeout_secs))
390 .json(&body);
391 let mut request = crate::llm::api::apply_auth_headers(request, &api_key, Some(&def));
392 for (name, value) in &def.extra_headers {
393 request = request.header(name.as_str(), value.as_str());
394 }
395
396 let response = match request.send().await {
397 Ok(response) => response,
398 Err(error) => {
399 return ToolConformanceCase::transport_error(
400 mode,
401 format!("provider request failed: {error}"),
402 Some(elapsed_ms(&*clock, started_ms)),
403 );
404 }
405 };
406 let status = response.status();
407 let text = match response.text().await {
408 Ok(text) => text,
409 Err(error) => {
410 return ToolConformanceCase::transport_error(
411 mode,
412 format!("provider response was unreadable: {error}"),
413 Some(elapsed_ms(&*clock, started_ms)),
414 );
415 }
416 };
417 let elapsed = Some(elapsed_ms(&*clock, started_ms));
418 if !status.is_success() {
419 return ToolConformanceCase::http_error(
420 mode,
421 status.as_u16(),
422 sample_failure(&text, "provider returned non-success HTTP status"),
423 elapsed,
424 );
425 }
426 let response_value = if mode == ToolProbeMode::Streaming {
427 aggregate_stream_text(&text, provider)
428 } else {
429 serde_json::from_str::<Value>(&text).unwrap_or_else(|_| json!({ "content": text }))
430 };
431 classify_tool_probe_response(
432 mode,
433 &response_value,
434 marker,
435 Some(status.as_u16()),
436 elapsed,
437 )
438}
439
440fn classify_tool_probe_response(
441 mode: ToolProbeMode,
442 response: &Value,
443 marker: &str,
444 http_status: Option<u16>,
445 elapsed_ms: Option<u64>,
446) -> ToolConformanceCase {
447 let native = extract_native_tool_calls(response);
448 let native_count = native.len();
449 let mut malformed_native = false;
450 for call in &native {
451 if call.name == TOOL_PROBE_TOOL_NAME {
452 match &call.arguments {
453 Some(Value::Object(map))
454 if map.get("value").and_then(Value::as_str) == Some(marker) =>
455 {
456 return ToolConformanceCase {
457 mode,
458 ok: true,
459 classification: ToolProbeClassification::StructuredNativeToolCall,
460 fallback_mode: ToolProbeFallbackMode::Native,
461 failure_reason: None,
462 http_status,
463 elapsed_ms,
464 native_tool_call_count: native_count,
465 text_tool_call_count: 0,
466 parser_errors: Vec::new(),
467 protocol_violations: Vec::new(),
468 content_sample: content_sample(response),
469 };
470 }
471 Some(Value::Object(_)) => {}
472 _ => malformed_native = true,
473 }
474 }
475 }
476
477 let content = extract_content(response);
478 let tools = probe_tool_registry();
479 let parsed = crate::llm::tools::parse_text_tool_calls_with_tools(&content, Some(&tools));
480 let text_count = parsed.calls.len();
481 let text_pass = parsed.calls.iter().any(|call| {
482 call.get("name").and_then(Value::as_str) == Some(TOOL_PROBE_TOOL_NAME)
483 && call
484 .get("arguments")
485 .and_then(|args| args.get("value"))
486 .and_then(Value::as_str)
487 == Some(marker)
488 });
489 if text_pass {
490 return ToolConformanceCase {
491 mode,
492 ok: true,
493 classification: ToolProbeClassification::ParseableHarnTextToolCall,
494 fallback_mode: ToolProbeFallbackMode::Text,
495 failure_reason: None,
496 http_status,
497 elapsed_ms,
498 native_tool_call_count: native_count,
499 text_tool_call_count: text_count,
500 parser_errors: parsed.errors,
501 protocol_violations: parsed.violations,
502 content_sample: sample_content(&content),
503 };
504 }
505
506 let (classification, failure_reason) = if malformed_native || !parsed.errors.is_empty() {
507 (
508 ToolProbeClassification::MalformedJsonArguments,
509 Some(first_non_empty(
510 parsed.errors.first().cloned(),
511 "malformed_tool_arguments",
512 )),
513 )
514 } else if content.trim().is_empty() && native_count == 0 {
515 (
516 ToolProbeClassification::EmptySilent,
517 Some("empty_silent_response".to_string()),
518 )
519 } else if has_raw_model_tool_tag(&content) {
520 (
521 ToolProbeClassification::RawModelToolTag,
522 Some("raw_tool_tag_no_structured_calls".to_string()),
523 )
524 } else {
525 (
526 ToolProbeClassification::ProseOnlyNonTool,
527 Some("no_executable_tool_call".to_string()),
528 )
529 };
530
531 ToolConformanceCase {
532 mode,
533 ok: false,
534 classification,
535 fallback_mode: ToolProbeFallbackMode::Disabled,
536 failure_reason,
537 http_status,
538 elapsed_ms,
539 native_tool_call_count: native_count,
540 text_tool_call_count: text_count,
541 parser_errors: parsed.errors,
542 protocol_violations: parsed.violations,
543 content_sample: sample_content(&content),
544 }
545}
546
547fn chat_url(def: &ProviderDef, base_url: &str) -> Result<String, String> {
548 let endpoint = if def.chat_endpoint.trim().is_empty() {
549 "/v1/chat/completions"
550 } else {
551 def.chat_endpoint.as_str()
552 };
553 let url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
554 endpoint.to_string()
555 } else if endpoint.starts_with('/') {
556 format!("{}{}", base_url.trim_end_matches('/'), endpoint)
557 } else {
558 format!("{}/{}", base_url.trim_end_matches('/'), endpoint)
559 };
560 reqwest::Url::parse(&url)
561 .map(|_| url.clone())
562 .map_err(|error| format!("invalid provider chat URL '{url}': {error}"))
563}
564
565fn probe_request_body(provider: &str, model: &str, mode: ToolProbeMode, marker: &str) -> Value {
566 let prompt = format!(
567 "Call the {TOOL_PROBE_TOOL_NAME} tool exactly once with value {marker:?}. Do not answer in prose."
568 );
569 let tool = json!({
570 "type": "function",
571 "function": {
572 "name": TOOL_PROBE_TOOL_NAME,
573 "description": "Echo the probe marker exactly.",
574 "parameters": {
575 "type": "object",
576 "properties": {
577 "value": {
578 "type": "string",
579 "description": "The marker value to echo."
580 }
581 },
582 "required": ["value"],
583 "additionalProperties": false
584 }
585 }
586 });
587 let mut body = json!({
588 "model": model,
589 "messages": [{"role": "user", "content": prompt}],
590 "tools": [tool],
591 "stream": mode == ToolProbeMode::Streaming,
592 "temperature": 0,
593 });
594 if !crate::llm::provider::provider_uses_ollama_messages(provider, model) {
595 body["tool_choice"] = json!({
596 "type": "function",
597 "function": {"name": TOOL_PROBE_TOOL_NAME}
598 });
599 }
600 body
601}
602
603#[derive(Debug)]
604struct NativeToolCall {
605 name: String,
606 arguments: Option<Value>,
607}
608
609fn extract_native_tool_calls(response: &Value) -> Vec<NativeToolCall> {
610 let mut calls = Vec::new();
611 visit_native_tool_call_arrays(response, &mut calls);
612 calls
613}
614
615fn visit_native_tool_call_arrays(value: &Value, calls: &mut Vec<NativeToolCall>) {
616 match value {
617 Value::Object(map) => {
618 if let Some(tool_calls) = map.get("tool_calls").and_then(Value::as_array) {
619 for item in tool_calls {
620 if let Some(call) = parse_native_tool_call(item) {
621 calls.push(call);
622 }
623 }
624 }
625 for child in map.values() {
626 visit_native_tool_call_arrays(child, calls);
627 }
628 }
629 Value::Array(items) => {
630 for item in items {
631 visit_native_tool_call_arrays(item, calls);
632 }
633 }
634 _ => {}
635 }
636}
637
638fn parse_native_tool_call(item: &Value) -> Option<NativeToolCall> {
639 let obj = item.as_object()?;
640 let function = obj.get("function").and_then(Value::as_object);
641 let name = function
642 .and_then(|function| function.get("name"))
643 .or_else(|| obj.get("name"))
644 .and_then(Value::as_str)?
645 .to_string();
646 let raw_args = function
647 .and_then(|function| function.get("arguments"))
648 .or_else(|| obj.get("arguments"));
649 let arguments = match raw_args {
650 Some(Value::String(raw)) => serde_json::from_str::<Value>(raw).ok(),
651 Some(value @ Value::Object(_)) => Some(value.clone()),
652 Some(_) => None,
653 None => Some(json!({})),
654 };
655 Some(NativeToolCall { name, arguments })
656}
657
658fn extract_content(response: &Value) -> String {
659 let mut parts = Vec::new();
660 visit_content(response, &mut parts);
661 parts
662 .into_iter()
663 .filter(|part| !part.trim().is_empty())
664 .collect::<Vec<_>>()
665 .join("\n")
666}
667
668fn visit_content(value: &Value, parts: &mut Vec<String>) {
669 match value {
670 Value::Object(map) => {
671 for key in ["content", "response", "text"] {
672 if let Some(text) = map.get(key).and_then(Value::as_str) {
673 parts.push(text.to_string());
674 }
675 }
676 for child in map.values() {
677 visit_content(child, parts);
678 }
679 }
680 Value::Array(items) => {
681 for item in items {
682 visit_content(item, parts);
683 }
684 }
685 _ => {}
686 }
687}
688
689fn aggregate_stream_text(text: &str, _provider: &str) -> Value {
690 let mut content = String::new();
691 let mut calls: BTreeMap<String, PartialStreamCall> = BTreeMap::new();
692 let mut frames = Vec::new();
693 for raw_line in text.lines() {
694 let line = raw_line.trim();
695 if line.is_empty() {
696 continue;
697 }
698 let payload = line.strip_prefix("data:").map(str::trim).unwrap_or(line);
699 if payload == "[DONE]" {
700 continue;
701 }
702 let Ok(frame) = serde_json::from_str::<Value>(payload) else {
703 continue;
704 };
705 collect_stream_content_and_calls(&frame, &mut content, &mut calls);
706 frames.push(frame);
707 }
708 let tool_calls: Vec<Value> = calls
709 .into_values()
710 .map(|call| {
711 json!({
712 "id": call.id.unwrap_or_else(|| "stream_tool".to_string()),
713 "type": "function",
714 "function": {
715 "name": call.name.unwrap_or_default(),
716 "arguments": call.arguments,
717 }
718 })
719 })
720 .collect();
721 json!({
722 "content": content,
723 "tool_calls": tool_calls,
724 "frames": frames,
725 })
726}
727
728#[derive(Debug, Default)]
729struct PartialStreamCall {
730 id: Option<String>,
731 name: Option<String>,
732 arguments: String,
733}
734
735fn collect_stream_content_and_calls(
736 frame: &Value,
737 content: &mut String,
738 calls: &mut BTreeMap<String, PartialStreamCall>,
739) {
740 if let Some(text) = frame
741 .pointer("/message/content")
742 .or_else(|| frame.pointer("/choices/0/delta/content"))
743 .or_else(|| frame.pointer("/choices/0/message/content"))
744 .or_else(|| frame.get("response"))
745 .and_then(Value::as_str)
746 {
747 content.push_str(text);
748 }
749 for item in frame
750 .pointer("/message/tool_calls")
751 .or_else(|| frame.pointer("/choices/0/delta/tool_calls"))
752 .or_else(|| frame.pointer("/choices/0/message/tool_calls"))
753 .and_then(Value::as_array)
754 .into_iter()
755 .flatten()
756 {
757 let key = item
758 .get("index")
759 .and_then(Value::as_u64)
760 .map(|index| index.to_string())
761 .or_else(|| item.get("id").and_then(Value::as_str).map(str::to_string))
762 .unwrap_or_else(|| calls.len().to_string());
763 let slot = calls.entry(key).or_default();
764 if let Some(id) = item.get("id").and_then(Value::as_str) {
765 slot.id = Some(id.to_string());
766 }
767 if let Some(name) = item
768 .pointer("/function/name")
769 .or_else(|| item.get("name"))
770 .and_then(Value::as_str)
771 {
772 slot.name = Some(name.to_string());
773 }
774 if let Some(arguments) = item
775 .pointer("/function/arguments")
776 .or_else(|| item.get("arguments"))
777 {
778 match arguments {
779 Value::String(delta) => slot.arguments.push_str(delta),
780 Value::Object(_) => slot.arguments = arguments.to_string(),
781 _ => {}
782 }
783 }
784 }
785}
786
787fn probe_tool_registry() -> VmValue {
788 let mut value_param = BTreeMap::new();
789 value_param.insert("type".to_string(), vm_str("string"));
790 value_param.insert(
791 "description".to_string(),
792 vm_str("The marker value to echo."),
793 );
794 let mut params = BTreeMap::new();
795 params.insert("value".to_string(), VmValue::Dict(Rc::new(value_param)));
796 let tool = vm_dict(&[
797 ("name", vm_str(TOOL_PROBE_TOOL_NAME)),
798 ("description", vm_str("Echo the probe marker exactly.")),
799 ("parameters", VmValue::Dict(Rc::new(params))),
800 ]);
801 vm_dict(&[("tools", VmValue::List(Rc::new(vec![tool])))])
802}
803
804fn vm_str(value: &str) -> VmValue {
805 VmValue::String(Rc::from(value))
806}
807
808fn vm_dict(pairs: &[(&str, VmValue)]) -> VmValue {
809 let mut map = BTreeMap::new();
810 for (key, value) in pairs {
811 map.insert((*key).to_string(), value.clone());
812 }
813 VmValue::Dict(Rc::new(map))
814}
815
816fn has_raw_model_tool_tag(content: &str) -> bool {
817 let lowered = content.to_ascii_lowercase();
818 lowered.contains("<tool_call")
819 || lowered.contains("<toolcall")
820 || lowered.contains("tool_code:")
821 || lowered.contains("tool_call:")
822 || lowered.contains("call:")
823 || lowered.contains("<function")
824}
825
826fn content_sample(response: &Value) -> Option<String> {
827 sample_content(&extract_content(response))
828}
829
830fn sample_content(content: &str) -> Option<String> {
831 let trimmed = content.trim();
832 if trimmed.is_empty() {
833 None
834 } else {
835 Some(trimmed.chars().take(240).collect())
836 }
837}
838
839fn sample_failure(text: &str, fallback: &str) -> String {
840 let trimmed = text.trim();
841 if trimmed.is_empty() {
842 fallback.to_string()
843 } else {
844 format!(
845 "{fallback}: {}",
846 trimmed.chars().take(240).collect::<String>()
847 )
848 }
849}
850
851fn first_non_empty(value: Option<String>, fallback: &str) -> String {
852 value
853 .filter(|value| !value.trim().is_empty())
854 .unwrap_or_else(|| fallback.to_string())
855}
856
857fn elapsed_ms(clock: &dyn harn_clock::Clock, started_ms: i64) -> u64 {
858 clock.monotonic_ms().saturating_sub(started_ms).max(0) as u64
859}
860
861#[cfg(test)]
862mod tests {
863 use super::*;
864
865 #[test]
866 fn classify_openai_native_tool_call_as_pass() {
867 let report = classify_tool_conformance_fixture(
868 "local",
869 "model",
870 ToolProbeMode::NonStreaming,
871 DEFAULT_TOOL_PROBE_MARKER,
872 r#"{"choices":[{"message":{"tool_calls":[{"id":"call_1","type":"function","function":{"name":"echo_marker","arguments":"{\"value\":\"harn_tool_probe_marker\"}"}}]}}]}"#,
873 );
874 assert_eq!(report.tool_calling.native, ToolProbeStatus::Pass);
875 assert_eq!(
876 report.tool_calling.fallback_mode,
877 ToolProbeFallbackMode::Native
878 );
879 assert_eq!(
880 report.cases[0].classification,
881 ToolProbeClassification::StructuredNativeToolCall
882 );
883 }
884
885 #[test]
886 fn classify_gemma_raw_json_tool_call_content_as_text_fallback() {
887 let report = classify_tool_conformance_fixture(
888 "ollama",
889 "gemma4:26b",
890 ToolProbeMode::NonStreaming,
891 DEFAULT_TOOL_PROBE_MARKER,
892 r#"{"message":{"content":"<tool_call>{\"name\":\"echo_marker\",\"arguments\":{\"value\":\"harn_tool_probe_marker\"}}</tool_call>"}}"#,
893 );
894 assert_eq!(report.tool_calling.native, ToolProbeStatus::Fail);
895 assert_eq!(report.tool_calling.text, ToolProbeStatus::Pass);
896 assert_eq!(
897 report.tool_calling.fallback_mode,
898 ToolProbeFallbackMode::Text
899 );
900 assert_eq!(
901 report.cases[0].classification,
902 ToolProbeClassification::ParseableHarnTextToolCall
903 );
904 }
905
906 #[test]
907 fn classify_qwen_call_colon_marker_as_text_fallback() {
908 let report = classify_tool_conformance_fixture(
909 "llamacpp",
910 "qwen",
911 ToolProbeMode::NonStreaming,
912 DEFAULT_TOOL_PROBE_MARKER,
913 r#"{"content":"call:echo_marker{ value: \"harn_tool_probe_marker\" }"}"#,
914 );
915 assert_eq!(report.tool_calling.text, ToolProbeStatus::Pass);
916 assert_eq!(
917 report.tool_calling.fallback_mode,
918 ToolProbeFallbackMode::Text
919 );
920 }
921
922 #[test]
923 fn classify_prose_only_as_disabled() {
924 let report = classify_tool_conformance_fixture(
925 "ollama",
926 "gemma4:26b",
927 ToolProbeMode::NonStreaming,
928 DEFAULT_TOOL_PROBE_MARKER,
929 r#"{"message":{"content":"The comment has been added. I will now verify it."}}"#,
930 );
931 assert_eq!(
932 report.tool_calling.fallback_mode,
933 ToolProbeFallbackMode::Disabled
934 );
935 assert_eq!(
936 report.cases[0].classification,
937 ToolProbeClassification::ProseOnlyNonTool
938 );
939 assert_eq!(
940 report.cases[0].failure_reason.as_deref(),
941 Some("no_executable_tool_call")
942 );
943 }
944
945 #[test]
946 fn aggregates_openai_streaming_tool_call_deltas() {
947 let raw = "data: {\"choices\":[{\"delta\":{\"tool_calls\":[{\"index\":0,\"id\":\"call_1\",\"function\":{\"name\":\"echo_marker\",\"arguments\":\"{\\\"value\\\":\"}}]}}]}\n\
948 data: {\"choices\":[{\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\\\"harn_tool_probe_marker\\\"}\"}}]}}]}\n\
949 data: [DONE]\n";
950 let response = aggregate_stream_text(raw, "local");
951 let case = classify_tool_probe_response(
952 ToolProbeMode::Streaming,
953 &response,
954 DEFAULT_TOOL_PROBE_MARKER,
955 None,
956 None,
957 );
958 assert!(case.ok, "{case:?}");
959 assert_eq!(
960 case.classification,
961 ToolProbeClassification::StructuredNativeToolCall
962 );
963 }
964
965 #[test]
966 fn report_satisfies_tool_probe_when_text_fallback_passes() {
967 let report = classify_tool_conformance_fixture(
968 "llamacpp",
969 "qwen",
970 ToolProbeMode::NonStreaming,
971 DEFAULT_TOOL_PROBE_MARKER,
972 r#"{"content":"echo_marker({ value: \"harn_tool_probe_marker\" })"}"#,
973 );
974 assert!(report_satisfies_required_probe(&report, "tool_probe"));
975 assert!(!report_satisfies_required_probe(
976 &report,
977 "native_tool_probe"
978 ));
979 }
980}