1use std::collections::BTreeMap;
9use std::rc::Rc;
10
11use serde::{Deserialize, Serialize};
12use serde_json::{json, Value};
13
14use crate::llm_config::{self, ProviderDef};
15use crate::value::VmValue;
16
17pub const TOOL_CONFORMANCE_SCHEMA_VERSION: u32 = 1;
18pub const TOOL_PROBE_TOOL_NAME: &str = "echo_marker";
19pub const DEFAULT_TOOL_PROBE_MARKER: &str = "harn_tool_probe_marker";
20
21#[derive(Debug, Clone)]
22pub struct ToolConformanceProbeOptions {
23 pub provider: String,
24 pub model: String,
25 pub base_url: Option<String>,
26 pub modes: Vec<ToolProbeMode>,
27 pub marker: String,
28 pub timeout_secs: u64,
29}
30
31impl ToolConformanceProbeOptions {
32 pub fn new(provider: impl Into<String>, model: impl Into<String>) -> Self {
33 Self {
34 provider: provider.into(),
35 model: model.into(),
36 base_url: None,
37 modes: vec![ToolProbeMode::NonStreaming, ToolProbeMode::Streaming],
38 marker: DEFAULT_TOOL_PROBE_MARKER.to_string(),
39 timeout_secs: 120,
40 }
41 }
42}
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
45#[serde(rename_all = "snake_case")]
46pub enum ToolProbeMode {
47 NonStreaming,
48 Streaming,
49}
50
51impl ToolProbeMode {
52 pub fn as_str(self) -> &'static str {
53 match self {
54 Self::NonStreaming => "non_streaming",
55 Self::Streaming => "streaming",
56 }
57 }
58}
59
60#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
61#[serde(rename_all = "snake_case")]
62pub enum ToolProbeClassification {
63 StructuredNativeToolCall,
64 ParseableHarnTextToolCall,
65 RawModelToolTag,
66 ProseOnlyNonTool,
67 MalformedJsonArguments,
68 EmptySilent,
69 HttpError,
70 TransportError,
71}
72
73#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
74#[serde(rename_all = "snake_case")]
75pub enum ToolProbeStatus {
76 Pass,
77 Fail,
78 Unknown,
79}
80
81impl ToolProbeStatus {
82 pub fn as_str(&self) -> &'static str {
83 match self {
84 Self::Pass => "pass",
85 Self::Fail => "fail",
86 Self::Unknown => "unknown",
87 }
88 }
89}
90
91#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
92#[serde(rename_all = "snake_case")]
93pub enum ToolProbeFallbackMode {
94 Native,
95 Text,
96 Disabled,
97}
98
99impl ToolProbeFallbackMode {
100 pub fn as_str(&self) -> &'static str {
101 match self {
102 Self::Native => "native",
103 Self::Text => "text",
104 Self::Disabled => "disabled",
105 }
106 }
107}
108
109#[derive(Debug, Clone, Serialize, Deserialize)]
110pub struct ToolConformanceReport {
111 pub schema_version: u32,
112 pub provider: String,
113 pub model: String,
114 #[serde(skip_serializing_if = "Option::is_none")]
115 pub base_url: Option<String>,
116 pub tool_name: String,
117 pub marker: String,
118 pub cases: Vec<ToolConformanceCase>,
119 pub tool_calling: ToolCallingConformanceSummary,
120}
121
122#[derive(Debug, Clone, Serialize, Deserialize)]
123pub struct ToolCallingConformanceSummary {
124 pub native: ToolProbeStatus,
125 pub text: ToolProbeStatus,
126 pub streaming_native: ToolProbeStatus,
127 pub fallback_mode: ToolProbeFallbackMode,
128 #[serde(skip_serializing_if = "Option::is_none")]
129 pub failure_reason: Option<String>,
130}
131
132#[derive(Debug, Clone, Serialize, Deserialize)]
133pub struct ToolConformanceCase {
134 pub mode: ToolProbeMode,
135 pub ok: bool,
136 pub classification: ToolProbeClassification,
137 pub fallback_mode: ToolProbeFallbackMode,
138 #[serde(skip_serializing_if = "Option::is_none")]
139 pub failure_reason: Option<String>,
140 #[serde(skip_serializing_if = "Option::is_none")]
141 pub http_status: Option<u16>,
142 #[serde(skip_serializing_if = "Option::is_none")]
143 pub elapsed_ms: Option<u64>,
144 pub native_tool_call_count: usize,
145 pub text_tool_call_count: usize,
146 #[serde(skip_serializing_if = "Vec::is_empty")]
147 pub parser_errors: Vec<String>,
148 #[serde(skip_serializing_if = "Vec::is_empty")]
149 pub protocol_violations: Vec<String>,
150 #[serde(skip_serializing_if = "Option::is_none")]
151 pub content_sample: Option<String>,
152}
153
154impl ToolConformanceCase {
155 fn transport_error(mode: ToolProbeMode, message: String, elapsed_ms: Option<u64>) -> Self {
156 Self {
157 mode,
158 ok: false,
159 classification: ToolProbeClassification::TransportError,
160 fallback_mode: ToolProbeFallbackMode::Disabled,
161 failure_reason: Some(message),
162 http_status: None,
163 elapsed_ms,
164 native_tool_call_count: 0,
165 text_tool_call_count: 0,
166 parser_errors: Vec::new(),
167 protocol_violations: Vec::new(),
168 content_sample: None,
169 }
170 }
171
172 fn http_error(
173 mode: ToolProbeMode,
174 status: u16,
175 message: String,
176 elapsed_ms: Option<u64>,
177 ) -> Self {
178 Self {
179 mode,
180 ok: false,
181 classification: ToolProbeClassification::HttpError,
182 fallback_mode: ToolProbeFallbackMode::Disabled,
183 failure_reason: Some(message),
184 http_status: Some(status),
185 elapsed_ms,
186 native_tool_call_count: 0,
187 text_tool_call_count: 0,
188 parser_errors: Vec::new(),
189 protocol_violations: Vec::new(),
190 content_sample: None,
191 }
192 }
193}
194
195pub async fn run_tool_conformance_probe(
196 options: ToolConformanceProbeOptions,
197) -> ToolConformanceReport {
198 let model = llm_config::resolve_model_info(&options.model);
199 let provider = if options.provider.trim().is_empty() {
200 model.provider.clone()
201 } else {
202 options.provider.clone()
203 };
204 let model_id = model.id;
205 let base_url = options.base_url.clone().or_else(|| {
206 llm_config::provider_config(&provider).map(|def| llm_config::resolve_base_url(&def))
207 });
208 let mut cases = Vec::new();
209 for mode in normalized_modes(&options.modes) {
210 cases.push(
211 execute_live_probe_case(
212 &provider,
213 &model_id,
214 base_url.as_deref(),
215 mode,
216 &options.marker,
217 options.timeout_secs,
218 )
219 .await,
220 );
221 }
222 report_from_cases(provider, model_id, base_url, options.marker, cases)
223}
224
225pub fn classify_tool_conformance_fixture(
226 provider: impl Into<String>,
227 model: impl Into<String>,
228 mode: ToolProbeMode,
229 marker: impl Into<String>,
230 raw: &str,
231) -> ToolConformanceReport {
232 let marker = marker.into();
233 let response = serde_json::from_str::<Value>(raw).unwrap_or_else(|_| json!({ "content": raw }));
234 let case = classify_tool_probe_response(mode, &response, &marker, None, None);
235 report_from_cases(provider.into(), model.into(), None, marker, vec![case])
236}
237
238pub fn report_satisfies_required_probe(report: &ToolConformanceReport, requirement: &str) -> bool {
239 match requirement {
240 "tool_probe" | "tool_call_probe" => {
241 report.tool_calling.fallback_mode != ToolProbeFallbackMode::Disabled
242 && report.cases.iter().any(|case| case.ok)
243 }
244 "native_tool_probe" => report.tool_calling.native == ToolProbeStatus::Pass,
245 "streaming_tool_probe" => report.tool_calling.streaming_native == ToolProbeStatus::Pass,
246 _ => false,
247 }
248}
249
250fn normalized_modes(modes: &[ToolProbeMode]) -> Vec<ToolProbeMode> {
251 if modes.is_empty() {
252 return vec![ToolProbeMode::NonStreaming, ToolProbeMode::Streaming];
253 }
254 let mut out = Vec::new();
255 for mode in modes {
256 if !out.contains(mode) {
257 out.push(*mode);
258 }
259 }
260 out
261}
262
263fn report_from_cases(
264 provider: String,
265 model: String,
266 base_url: Option<String>,
267 marker: String,
268 cases: Vec<ToolConformanceCase>,
269) -> ToolConformanceReport {
270 let summary = summarize_cases(&cases);
271 ToolConformanceReport {
272 schema_version: TOOL_CONFORMANCE_SCHEMA_VERSION,
273 provider,
274 model,
275 base_url,
276 tool_name: TOOL_PROBE_TOOL_NAME.to_string(),
277 marker,
278 cases,
279 tool_calling: summary,
280 }
281}
282
283fn summarize_cases(cases: &[ToolConformanceCase]) -> ToolCallingConformanceSummary {
284 let mut native = ToolProbeStatus::Unknown;
285 let mut streaming_native = ToolProbeStatus::Unknown;
286 let mut text = ToolProbeStatus::Unknown;
287
288 for case in cases {
289 if case.classification == ToolProbeClassification::StructuredNativeToolCall {
290 if case.mode == ToolProbeMode::Streaming {
291 streaming_native = if case.ok {
292 ToolProbeStatus::Pass
293 } else {
294 ToolProbeStatus::Fail
295 };
296 } else {
297 native = if case.ok {
298 ToolProbeStatus::Pass
299 } else {
300 ToolProbeStatus::Fail
301 };
302 }
303 } else if case.mode == ToolProbeMode::Streaming
304 && streaming_native == ToolProbeStatus::Unknown
305 {
306 streaming_native = ToolProbeStatus::Fail;
307 } else if case.mode == ToolProbeMode::NonStreaming && native == ToolProbeStatus::Unknown {
308 native = ToolProbeStatus::Fail;
309 }
310
311 if case.classification == ToolProbeClassification::ParseableHarnTextToolCall {
312 text = if case.ok {
313 ToolProbeStatus::Pass
314 } else {
315 ToolProbeStatus::Fail
316 };
317 } else if text == ToolProbeStatus::Unknown && case.text_tool_call_count > 0 {
318 text = ToolProbeStatus::Fail;
319 }
320 }
321
322 let fallback_mode =
323 if native == ToolProbeStatus::Pass || streaming_native == ToolProbeStatus::Pass {
324 ToolProbeFallbackMode::Native
325 } else if text == ToolProbeStatus::Pass {
326 ToolProbeFallbackMode::Text
327 } else {
328 ToolProbeFallbackMode::Disabled
329 };
330
331 let failure_reason = if fallback_mode == ToolProbeFallbackMode::Disabled {
332 cases.iter().find_map(|case| case.failure_reason.clone())
333 } else {
334 None
335 };
336
337 ToolCallingConformanceSummary {
338 native,
339 text,
340 streaming_native,
341 fallback_mode,
342 failure_reason,
343 }
344}
345
346async fn execute_live_probe_case(
347 provider: &str,
348 model: &str,
349 base_url: Option<&str>,
350 mode: ToolProbeMode,
351 marker: &str,
352 timeout_secs: u64,
353) -> ToolConformanceCase {
354 let clock = harn_clock::RealClock::arc();
355 let started_ms = clock.monotonic_ms();
356 let Some(def) = llm_config::provider_config(provider) else {
357 return ToolConformanceCase::transport_error(
358 mode,
359 format!("unknown provider: {provider}"),
360 Some(elapsed_ms(&*clock, started_ms)),
361 );
362 };
363 let base_url = base_url
364 .filter(|value| !value.trim().is_empty())
365 .map(str::to_string)
366 .unwrap_or_else(|| llm_config::resolve_base_url(&def));
367 let url = match chat_url(&def, &base_url) {
368 Ok(url) => url,
369 Err(message) => {
370 return ToolConformanceCase::transport_error(
371 mode,
372 message,
373 Some(elapsed_ms(&*clock, started_ms)),
374 );
375 }
376 };
377 let body = probe_request_body(provider, model, mode, marker);
378 let client = if mode == ToolProbeMode::Streaming {
379 crate::llm::shared_streaming_client().clone()
380 } else {
381 crate::llm::shared_blocking_client().clone()
382 };
383 let api_key = crate::llm::helpers::resolve_api_key(provider).unwrap_or_default();
384 let request = client
385 .post(&url)
386 .header("Content-Type", "application/json")
387 .timeout(std::time::Duration::from_secs(timeout_secs))
388 .json(&body);
389 let mut request = crate::llm::api::apply_auth_headers(request, &api_key, Some(&def));
390 for (name, value) in &def.extra_headers {
391 request = request.header(name.as_str(), value.as_str());
392 }
393
394 let response = match request.send().await {
395 Ok(response) => response,
396 Err(error) => {
397 return ToolConformanceCase::transport_error(
398 mode,
399 format!("provider request failed: {error}"),
400 Some(elapsed_ms(&*clock, started_ms)),
401 );
402 }
403 };
404 let status = response.status();
405 let text = match response.text().await {
406 Ok(text) => text,
407 Err(error) => {
408 return ToolConformanceCase::transport_error(
409 mode,
410 format!("provider response was unreadable: {error}"),
411 Some(elapsed_ms(&*clock, started_ms)),
412 );
413 }
414 };
415 let elapsed = Some(elapsed_ms(&*clock, started_ms));
416 if !status.is_success() {
417 return ToolConformanceCase::http_error(
418 mode,
419 status.as_u16(),
420 sample_failure(&text, "provider returned non-success HTTP status"),
421 elapsed,
422 );
423 }
424 let response_value = if mode == ToolProbeMode::Streaming {
425 aggregate_stream_text(&text, provider)
426 } else {
427 serde_json::from_str::<Value>(&text).unwrap_or_else(|_| json!({ "content": text }))
428 };
429 classify_tool_probe_response(
430 mode,
431 &response_value,
432 marker,
433 Some(status.as_u16()),
434 elapsed,
435 )
436}
437
438fn classify_tool_probe_response(
439 mode: ToolProbeMode,
440 response: &Value,
441 marker: &str,
442 http_status: Option<u16>,
443 elapsed_ms: Option<u64>,
444) -> ToolConformanceCase {
445 let native = extract_native_tool_calls(response);
446 let native_count = native.len();
447 let mut malformed_native = false;
448 for call in &native {
449 if call.name == TOOL_PROBE_TOOL_NAME {
450 match &call.arguments {
451 Some(Value::Object(map))
452 if map.get("value").and_then(Value::as_str) == Some(marker) =>
453 {
454 return ToolConformanceCase {
455 mode,
456 ok: true,
457 classification: ToolProbeClassification::StructuredNativeToolCall,
458 fallback_mode: ToolProbeFallbackMode::Native,
459 failure_reason: None,
460 http_status,
461 elapsed_ms,
462 native_tool_call_count: native_count,
463 text_tool_call_count: 0,
464 parser_errors: Vec::new(),
465 protocol_violations: Vec::new(),
466 content_sample: content_sample(response),
467 };
468 }
469 Some(Value::Object(_)) => {}
470 _ => malformed_native = true,
471 }
472 }
473 }
474
475 let content = extract_content(response);
476 let tools = probe_tool_registry();
477 let parsed = crate::llm::tools::parse_text_tool_calls_with_tools(&content, Some(&tools));
478 let text_count = parsed.calls.len();
479 let text_pass = parsed.calls.iter().any(|call| {
480 call.get("name").and_then(Value::as_str) == Some(TOOL_PROBE_TOOL_NAME)
481 && call
482 .get("arguments")
483 .and_then(|args| args.get("value"))
484 .and_then(Value::as_str)
485 == Some(marker)
486 });
487 if text_pass {
488 return ToolConformanceCase {
489 mode,
490 ok: true,
491 classification: ToolProbeClassification::ParseableHarnTextToolCall,
492 fallback_mode: ToolProbeFallbackMode::Text,
493 failure_reason: None,
494 http_status,
495 elapsed_ms,
496 native_tool_call_count: native_count,
497 text_tool_call_count: text_count,
498 parser_errors: parsed.errors,
499 protocol_violations: parsed.violations,
500 content_sample: sample_content(&content),
501 };
502 }
503
504 let (classification, failure_reason) = if malformed_native || !parsed.errors.is_empty() {
505 (
506 ToolProbeClassification::MalformedJsonArguments,
507 Some(first_non_empty(
508 parsed.errors.first().cloned(),
509 "malformed_tool_arguments",
510 )),
511 )
512 } else if content.trim().is_empty() && native_count == 0 {
513 (
514 ToolProbeClassification::EmptySilent,
515 Some("empty_silent_response".to_string()),
516 )
517 } else if has_raw_model_tool_tag(&content) {
518 (
519 ToolProbeClassification::RawModelToolTag,
520 Some("raw_tool_tag_no_structured_calls".to_string()),
521 )
522 } else {
523 (
524 ToolProbeClassification::ProseOnlyNonTool,
525 Some("no_executable_tool_call".to_string()),
526 )
527 };
528
529 ToolConformanceCase {
530 mode,
531 ok: false,
532 classification,
533 fallback_mode: ToolProbeFallbackMode::Disabled,
534 failure_reason,
535 http_status,
536 elapsed_ms,
537 native_tool_call_count: native_count,
538 text_tool_call_count: text_count,
539 parser_errors: parsed.errors,
540 protocol_violations: parsed.violations,
541 content_sample: sample_content(&content),
542 }
543}
544
545fn chat_url(def: &ProviderDef, base_url: &str) -> Result<String, String> {
546 let endpoint = if def.chat_endpoint.trim().is_empty() {
547 "/v1/chat/completions"
548 } else {
549 def.chat_endpoint.as_str()
550 };
551 let url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
552 endpoint.to_string()
553 } else if endpoint.starts_with('/') {
554 format!("{}{}", base_url.trim_end_matches('/'), endpoint)
555 } else {
556 format!("{}/{}", base_url.trim_end_matches('/'), endpoint)
557 };
558 reqwest::Url::parse(&url)
559 .map(|_| url.clone())
560 .map_err(|error| format!("invalid provider chat URL '{url}': {error}"))
561}
562
563fn probe_request_body(provider: &str, model: &str, mode: ToolProbeMode, marker: &str) -> Value {
564 let prompt = format!(
565 "Call the {TOOL_PROBE_TOOL_NAME} tool exactly once with value {marker:?}. Do not answer in prose."
566 );
567 let tool = json!({
568 "type": "function",
569 "function": {
570 "name": TOOL_PROBE_TOOL_NAME,
571 "description": "Echo the probe marker exactly.",
572 "parameters": {
573 "type": "object",
574 "properties": {
575 "value": {
576 "type": "string",
577 "description": "The marker value to echo."
578 }
579 },
580 "required": ["value"],
581 "additionalProperties": false
582 }
583 }
584 });
585 let mut body = json!({
586 "model": model,
587 "messages": [{"role": "user", "content": prompt}],
588 "tools": [tool],
589 "stream": mode == ToolProbeMode::Streaming,
590 "temperature": 0,
591 });
592 if !crate::llm::provider::provider_uses_ollama_messages(provider, model) {
593 body["tool_choice"] = json!({
594 "type": "function",
595 "function": {"name": TOOL_PROBE_TOOL_NAME}
596 });
597 }
598 body
599}
600
601#[derive(Debug)]
602struct NativeToolCall {
603 name: String,
604 arguments: Option<Value>,
605}
606
607fn extract_native_tool_calls(response: &Value) -> Vec<NativeToolCall> {
608 let mut calls = Vec::new();
609 visit_native_tool_call_arrays(response, &mut calls);
610 calls
611}
612
613fn visit_native_tool_call_arrays(value: &Value, calls: &mut Vec<NativeToolCall>) {
614 match value {
615 Value::Object(map) => {
616 if let Some(tool_calls) = map.get("tool_calls").and_then(Value::as_array) {
617 for item in tool_calls {
618 if let Some(call) = parse_native_tool_call(item) {
619 calls.push(call);
620 }
621 }
622 }
623 for child in map.values() {
624 visit_native_tool_call_arrays(child, calls);
625 }
626 }
627 Value::Array(items) => {
628 for item in items {
629 visit_native_tool_call_arrays(item, calls);
630 }
631 }
632 _ => {}
633 }
634}
635
636fn parse_native_tool_call(item: &Value) -> Option<NativeToolCall> {
637 let obj = item.as_object()?;
638 let function = obj.get("function").and_then(Value::as_object);
639 let name = function
640 .and_then(|function| function.get("name"))
641 .or_else(|| obj.get("name"))
642 .and_then(Value::as_str)?
643 .to_string();
644 let raw_args = function
645 .and_then(|function| function.get("arguments"))
646 .or_else(|| obj.get("arguments"));
647 let arguments = match raw_args {
648 Some(Value::String(raw)) => serde_json::from_str::<Value>(raw).ok(),
649 Some(value @ Value::Object(_)) => Some(value.clone()),
650 Some(_) => None,
651 None => Some(json!({})),
652 };
653 Some(NativeToolCall { name, arguments })
654}
655
656fn extract_content(response: &Value) -> String {
657 let mut parts = Vec::new();
658 visit_content(response, &mut parts);
659 parts
660 .into_iter()
661 .filter(|part| !part.trim().is_empty())
662 .collect::<Vec<_>>()
663 .join("\n")
664}
665
666fn visit_content(value: &Value, parts: &mut Vec<String>) {
667 match value {
668 Value::Object(map) => {
669 for key in ["content", "response", "text"] {
670 if let Some(text) = map.get(key).and_then(Value::as_str) {
671 parts.push(text.to_string());
672 }
673 }
674 for child in map.values() {
675 visit_content(child, parts);
676 }
677 }
678 Value::Array(items) => {
679 for item in items {
680 visit_content(item, parts);
681 }
682 }
683 _ => {}
684 }
685}
686
687fn aggregate_stream_text(text: &str, _provider: &str) -> Value {
688 let mut content = String::new();
689 let mut calls: BTreeMap<String, PartialStreamCall> = BTreeMap::new();
690 let mut frames = Vec::new();
691 for raw_line in text.lines() {
692 let line = raw_line.trim();
693 if line.is_empty() {
694 continue;
695 }
696 let payload = line.strip_prefix("data:").map(str::trim).unwrap_or(line);
697 if payload == "[DONE]" {
698 continue;
699 }
700 let Ok(frame) = serde_json::from_str::<Value>(payload) else {
701 continue;
702 };
703 collect_stream_content_and_calls(&frame, &mut content, &mut calls);
704 frames.push(frame);
705 }
706 let tool_calls: Vec<Value> = calls
707 .into_values()
708 .map(|call| {
709 json!({
710 "id": call.id.unwrap_or_else(|| "stream_tool".to_string()),
711 "type": "function",
712 "function": {
713 "name": call.name.unwrap_or_default(),
714 "arguments": call.arguments,
715 }
716 })
717 })
718 .collect();
719 json!({
720 "content": content,
721 "tool_calls": tool_calls,
722 "frames": frames,
723 })
724}
725
726#[derive(Debug, Default)]
727struct PartialStreamCall {
728 id: Option<String>,
729 name: Option<String>,
730 arguments: String,
731}
732
733fn collect_stream_content_and_calls(
734 frame: &Value,
735 content: &mut String,
736 calls: &mut BTreeMap<String, PartialStreamCall>,
737) {
738 if let Some(text) = frame
739 .pointer("/message/content")
740 .or_else(|| frame.pointer("/choices/0/delta/content"))
741 .or_else(|| frame.pointer("/choices/0/message/content"))
742 .or_else(|| frame.get("response"))
743 .and_then(Value::as_str)
744 {
745 content.push_str(text);
746 }
747 for item in frame
748 .pointer("/message/tool_calls")
749 .or_else(|| frame.pointer("/choices/0/delta/tool_calls"))
750 .or_else(|| frame.pointer("/choices/0/message/tool_calls"))
751 .and_then(Value::as_array)
752 .into_iter()
753 .flatten()
754 {
755 let key = item
756 .get("index")
757 .and_then(Value::as_u64)
758 .map(|index| index.to_string())
759 .or_else(|| item.get("id").and_then(Value::as_str).map(str::to_string))
760 .unwrap_or_else(|| calls.len().to_string());
761 let slot = calls.entry(key).or_default();
762 if let Some(id) = item.get("id").and_then(Value::as_str) {
763 slot.id = Some(id.to_string());
764 }
765 if let Some(name) = item
766 .pointer("/function/name")
767 .or_else(|| item.get("name"))
768 .and_then(Value::as_str)
769 {
770 slot.name = Some(name.to_string());
771 }
772 if let Some(arguments) = item
773 .pointer("/function/arguments")
774 .or_else(|| item.get("arguments"))
775 {
776 match arguments {
777 Value::String(delta) => slot.arguments.push_str(delta),
778 Value::Object(_) => slot.arguments = arguments.to_string(),
779 _ => {}
780 }
781 }
782 }
783}
784
785fn probe_tool_registry() -> VmValue {
786 let mut value_param = BTreeMap::new();
787 value_param.insert("type".to_string(), vm_str("string"));
788 value_param.insert(
789 "description".to_string(),
790 vm_str("The marker value to echo."),
791 );
792 let mut params = BTreeMap::new();
793 params.insert("value".to_string(), VmValue::Dict(Rc::new(value_param)));
794 let tool = vm_dict(&[
795 ("name", vm_str(TOOL_PROBE_TOOL_NAME)),
796 ("description", vm_str("Echo the probe marker exactly.")),
797 ("parameters", VmValue::Dict(Rc::new(params))),
798 ]);
799 vm_dict(&[("tools", VmValue::List(Rc::new(vec![tool])))])
800}
801
802fn vm_str(value: &str) -> VmValue {
803 VmValue::String(Rc::from(value))
804}
805
806fn vm_dict(pairs: &[(&str, VmValue)]) -> VmValue {
807 let mut map = BTreeMap::new();
808 for (key, value) in pairs {
809 map.insert((*key).to_string(), value.clone());
810 }
811 VmValue::Dict(Rc::new(map))
812}
813
814fn has_raw_model_tool_tag(content: &str) -> bool {
815 let lowered = content.to_ascii_lowercase();
816 lowered.contains("<tool_call")
817 || lowered.contains("<toolcall")
818 || lowered.contains("tool_code:")
819 || lowered.contains("tool_call:")
820 || lowered.contains("call:")
821 || lowered.contains("<function")
822}
823
824fn content_sample(response: &Value) -> Option<String> {
825 sample_content(&extract_content(response))
826}
827
828fn sample_content(content: &str) -> Option<String> {
829 let trimmed = content.trim();
830 if trimmed.is_empty() {
831 None
832 } else {
833 Some(trimmed.chars().take(240).collect())
834 }
835}
836
837fn sample_failure(text: &str, fallback: &str) -> String {
838 let trimmed = text.trim();
839 if trimmed.is_empty() {
840 fallback.to_string()
841 } else {
842 format!(
843 "{fallback}: {}",
844 trimmed.chars().take(240).collect::<String>()
845 )
846 }
847}
848
849fn first_non_empty(value: Option<String>, fallback: &str) -> String {
850 value
851 .filter(|value| !value.trim().is_empty())
852 .unwrap_or_else(|| fallback.to_string())
853}
854
855fn elapsed_ms(clock: &dyn harn_clock::Clock, started_ms: i64) -> u64 {
856 clock.monotonic_ms().saturating_sub(started_ms).max(0) as u64
857}
858
859#[cfg(test)]
860mod tests {
861 use super::*;
862
863 #[test]
864 fn classify_openai_native_tool_call_as_pass() {
865 let report = classify_tool_conformance_fixture(
866 "local",
867 "model",
868 ToolProbeMode::NonStreaming,
869 DEFAULT_TOOL_PROBE_MARKER,
870 r#"{"choices":[{"message":{"tool_calls":[{"id":"call_1","type":"function","function":{"name":"echo_marker","arguments":"{\"value\":\"harn_tool_probe_marker\"}"}}]}}]}"#,
871 );
872 assert_eq!(report.tool_calling.native, ToolProbeStatus::Pass);
873 assert_eq!(
874 report.tool_calling.fallback_mode,
875 ToolProbeFallbackMode::Native
876 );
877 assert_eq!(
878 report.cases[0].classification,
879 ToolProbeClassification::StructuredNativeToolCall
880 );
881 }
882
883 #[test]
884 fn classify_gemma_raw_json_tool_call_content_as_text_fallback() {
885 let report = classify_tool_conformance_fixture(
886 "ollama",
887 "gemma4:26b",
888 ToolProbeMode::NonStreaming,
889 DEFAULT_TOOL_PROBE_MARKER,
890 r#"{"message":{"content":"<tool_call>{\"name\":\"echo_marker\",\"arguments\":{\"value\":\"harn_tool_probe_marker\"}}</tool_call>"}}"#,
891 );
892 assert_eq!(report.tool_calling.native, ToolProbeStatus::Fail);
893 assert_eq!(report.tool_calling.text, ToolProbeStatus::Pass);
894 assert_eq!(
895 report.tool_calling.fallback_mode,
896 ToolProbeFallbackMode::Text
897 );
898 assert_eq!(
899 report.cases[0].classification,
900 ToolProbeClassification::ParseableHarnTextToolCall
901 );
902 }
903
904 #[test]
905 fn classify_qwen_call_colon_marker_as_text_fallback() {
906 let report = classify_tool_conformance_fixture(
907 "llamacpp",
908 "qwen",
909 ToolProbeMode::NonStreaming,
910 DEFAULT_TOOL_PROBE_MARKER,
911 r#"{"content":"call:echo_marker{ value: \"harn_tool_probe_marker\" }"}"#,
912 );
913 assert_eq!(report.tool_calling.text, ToolProbeStatus::Pass);
914 assert_eq!(
915 report.tool_calling.fallback_mode,
916 ToolProbeFallbackMode::Text
917 );
918 }
919
920 #[test]
921 fn classify_prose_only_as_disabled() {
922 let report = classify_tool_conformance_fixture(
923 "ollama",
924 "gemma4:26b",
925 ToolProbeMode::NonStreaming,
926 DEFAULT_TOOL_PROBE_MARKER,
927 r#"{"message":{"content":"The comment has been added. I will now verify it."}}"#,
928 );
929 assert_eq!(
930 report.tool_calling.fallback_mode,
931 ToolProbeFallbackMode::Disabled
932 );
933 assert_eq!(
934 report.cases[0].classification,
935 ToolProbeClassification::ProseOnlyNonTool
936 );
937 assert_eq!(
938 report.cases[0].failure_reason.as_deref(),
939 Some("no_executable_tool_call")
940 );
941 }
942
943 #[test]
944 fn aggregates_openai_streaming_tool_call_deltas() {
945 let raw = "data: {\"choices\":[{\"delta\":{\"tool_calls\":[{\"index\":0,\"id\":\"call_1\",\"function\":{\"name\":\"echo_marker\",\"arguments\":\"{\\\"value\\\":\"}}]}}]}\n\
946 data: {\"choices\":[{\"delta\":{\"tool_calls\":[{\"index\":0,\"function\":{\"arguments\":\"\\\"harn_tool_probe_marker\\\"}\"}}]}}]}\n\
947 data: [DONE]\n";
948 let response = aggregate_stream_text(raw, "local");
949 let case = classify_tool_probe_response(
950 ToolProbeMode::Streaming,
951 &response,
952 DEFAULT_TOOL_PROBE_MARKER,
953 None,
954 None,
955 );
956 assert!(case.ok, "{case:?}");
957 assert_eq!(
958 case.classification,
959 ToolProbeClassification::StructuredNativeToolCall
960 );
961 }
962
963 #[test]
964 fn report_satisfies_tool_probe_when_text_fallback_passes() {
965 let report = classify_tool_conformance_fixture(
966 "llamacpp",
967 "qwen",
968 ToolProbeMode::NonStreaming,
969 DEFAULT_TOOL_PROBE_MARKER,
970 r#"{"content":"echo_marker({ value: \"harn_tool_probe_marker\" })"}"#,
971 );
972 assert!(report_satisfies_required_probe(&report, "tool_probe"));
973 assert!(!report_satisfies_required_probe(
974 &report,
975 "native_tool_probe"
976 ));
977 }
978}