1pub mod add_bos;
4pub mod llama_chat_message;
5pub mod llama_chat_template;
6pub mod llama_lora_adapter;
7pub mod llama_split_mode_parse_error;
8pub mod params;
9pub mod rope_type;
10pub mod split_mode;
11pub mod vocab_type;
12pub mod vocab_type_from_int_error;
13
14use std::ffi::{CStr, CString, c_char};
15use std::num::NonZeroU16;
16use std::os::raw::c_int;
17use std::path::Path;
18use std::ptr;
19use std::ptr::NonNull;
20use std::sync::Arc;
21use std::sync::OnceLock;
22
23use toktrie::ApproximateTokEnv;
24use toktrie::TokRxInfo;
25use toktrie::TokTrie;
26
27use llama_cpp_bindings_types::ParsedChatMessage;
28use llama_cpp_bindings_types::ParsedToolCall;
29use llama_cpp_bindings_types::ReasoningMarkers;
30use llama_cpp_bindings_types::ToolCallArguments;
31use llama_cpp_bindings_types::ToolCallMarkers;
32
33use crate::chat_message_parse_outcome::ChatMessageParseOutcome;
34use crate::llama_backend::LlamaBackend;
35use crate::llama_token_attrs::LlamaTokenAttrs;
36use crate::llama_token_attrs_from_int_error::LlamaTokenAttrsFromIntError;
37use crate::raw_chat_message::RawChatMessage;
38use crate::resolved_tool_call_markers::ResolvedToolCallMarkers;
39use crate::sampled_token::SampledToken;
40use crate::sampled_token_classifier::SampledTokenClassifier;
41use crate::streaming_markers::StreamingMarkers;
42use crate::token::LlamaToken;
43use crate::tool_call_format;
44use crate::tool_call_format::ToolCallFormatOutcome;
45use crate::tool_call_template_overrides;
46use crate::{
47 ApplyChatTemplateError, ChatTemplateError, LlamaLoraAdapterInitError, LlamaModelLoadError,
48 MarkerDetectionError, MetaValError, ParseChatMessageError, StringToTokenError,
49 TokenToStringError,
50};
51
52pub use add_bos::AddBos;
53pub use llama_chat_message::LlamaChatMessage;
54pub use llama_chat_template::LlamaChatTemplate;
55pub use llama_lora_adapter::LlamaLoraAdapter;
56pub use rope_type::RopeType;
57pub use vocab_type::VocabType;
58pub use vocab_type_from_int_error::VocabTypeFromIntError;
59
60use params::LlamaModelParams;
61
62fn truncated_buffer_to_string(
63 mut buffer: Vec<u8>,
64 length: usize,
65) -> Result<String, ApplyChatTemplateError> {
66 buffer.truncate(length);
67
68 Ok(String::from_utf8(buffer)?)
69}
70
71fn validate_string_length_for_tokenizer(length: usize) -> Result<c_int, StringToTokenError> {
72 Ok(c_int::try_from(length)?)
73}
74
75fn cstring_with_validated_len(str: &str) -> Result<(CString, c_int), StringToTokenError> {
76 let c_string = CString::new(str)?;
77 let len = validate_string_length_for_tokenizer(c_string.as_bytes().len())?;
78 Ok((c_string, len))
79}
80
81pub struct LlamaModel {
83 pub model: NonNull<llama_cpp_bindings_sys::llama_model>,
85 tok_env: OnceLock<Arc<ApproximateTokEnv>>,
86}
87
88impl std::fmt::Debug for LlamaModel {
89 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
90 f.debug_struct("LlamaModel")
91 .field("model", &self.model)
92 .finish_non_exhaustive()
93 }
94}
95
96unsafe impl Send for LlamaModel {}
97
98unsafe impl Sync for LlamaModel {}
99
100impl LlamaModel {
101 #[must_use]
103 pub fn vocab_ptr(&self) -> *const llama_cpp_bindings_sys::llama_vocab {
104 unsafe { llama_cpp_bindings_sys::llama_model_get_vocab(self.model.as_ptr()) }
105 }
106
107 pub fn n_ctx_train(&self) -> Result<u32, std::num::TryFromIntError> {
113 let n_ctx_train = unsafe { llama_cpp_bindings_sys::llama_n_ctx_train(self.model.as_ptr()) };
114
115 u32::try_from(n_ctx_train)
116 }
117
118 pub fn tokens(
120 &self,
121 decode_special: bool,
122 ) -> impl Iterator<Item = (LlamaToken, Result<String, TokenToStringError>)> + '_ {
123 (0..self.n_vocab())
124 .map(LlamaToken::new)
125 .map(move |llama_token| {
126 let mut decoder = encoding_rs::UTF_8.new_decoder();
127 (
128 llama_token,
129 self.token_to_piece(
130 &SampledToken::Content(llama_token),
131 &mut decoder,
132 decode_special,
133 None,
134 ),
135 )
136 })
137 }
138
139 #[must_use]
141 pub fn token_bos(&self) -> LlamaToken {
142 let token = unsafe { llama_cpp_bindings_sys::llama_token_bos(self.vocab_ptr()) };
143 LlamaToken(token)
144 }
145
146 #[must_use]
148 pub fn token_eos(&self) -> LlamaToken {
149 let token = unsafe { llama_cpp_bindings_sys::llama_token_eos(self.vocab_ptr()) };
150 LlamaToken(token)
151 }
152
153 #[must_use]
155 pub fn token_nl(&self) -> LlamaToken {
156 let token = unsafe { llama_cpp_bindings_sys::llama_token_nl(self.vocab_ptr()) };
157 LlamaToken(token)
158 }
159
160 #[must_use]
162 pub fn is_eog_token(&self, token: &SampledToken) -> bool {
163 let (SampledToken::Content(LlamaToken(id))
164 | SampledToken::Reasoning(LlamaToken(id))
165 | SampledToken::ToolCall(LlamaToken(id))
166 | SampledToken::Undeterminable(LlamaToken(id))) = *token;
167
168 unsafe { llama_cpp_bindings_sys::llama_token_is_eog(self.vocab_ptr(), id) }
169 }
170
171 #[must_use]
173 pub fn decode_start_token(&self) -> LlamaToken {
174 let token =
175 unsafe { llama_cpp_bindings_sys::llama_model_decoder_start_token(self.model.as_ptr()) };
176 LlamaToken(token)
177 }
178
179 #[must_use]
181 pub fn token_sep(&self) -> LlamaToken {
182 let token = unsafe { llama_cpp_bindings_sys::llama_vocab_sep(self.vocab_ptr()) };
183 LlamaToken(token)
184 }
185
186 pub fn str_to_token(
206 &self,
207 str: &str,
208 add_bos: AddBos,
209 ) -> Result<Vec<LlamaToken>, StringToTokenError> {
210 let add_bos = match add_bos {
211 AddBos::Always => true,
212 AddBos::Never => false,
213 };
214
215 let tokens_estimation = std::cmp::max(8, (str.len() / 2) + usize::from(add_bos));
216 let mut buffer: Vec<LlamaToken> = Vec::with_capacity(tokens_estimation);
217
218 let (c_string, c_string_len) = cstring_with_validated_len(str)?;
219 let buffer_capacity = c_int::try_from(buffer.capacity())?;
220
221 let size = invoke_rs_tokenize(
222 self.vocab_ptr(),
223 c_string.as_ptr(),
224 c_string_len,
225 buffer
226 .as_mut_ptr()
227 .cast::<llama_cpp_bindings_sys::llama_token>(),
228 buffer_capacity,
229 add_bos,
230 )?;
231
232 let size = if size.is_negative() {
233 buffer.reserve_exact(usize::try_from(-size)?);
234 invoke_rs_tokenize(
235 self.vocab_ptr(),
236 c_string.as_ptr(),
237 c_string_len,
238 buffer
239 .as_mut_ptr()
240 .cast::<llama_cpp_bindings_sys::llama_token>(),
241 -size,
242 add_bos,
243 )?
244 } else {
245 size
246 };
247
248 let size = usize::try_from(size)?;
249
250 unsafe { buffer.set_len(size) }
252
253 Ok(buffer)
254 }
255
256 pub fn token_attr(
262 &self,
263 LlamaToken(id): LlamaToken,
264 ) -> Result<LlamaTokenAttrs, LlamaTokenAttrsFromIntError> {
265 let token_type =
266 unsafe { llama_cpp_bindings_sys::llama_token_get_attr(self.vocab_ptr(), id) };
267
268 LlamaTokenAttrs::try_from(token_type)
269 }
270
271 pub fn token_to_piece(
287 &self,
288 token: &SampledToken,
289 decoder: &mut encoding_rs::Decoder,
290 special: bool,
291 lstrip: Option<NonZeroU16>,
292 ) -> Result<String, TokenToStringError> {
293 let (SampledToken::Content(inner)
294 | SampledToken::Reasoning(inner)
295 | SampledToken::ToolCall(inner)
296 | SampledToken::Undeterminable(inner)) = *token;
297 let bytes = match self.token_to_piece_bytes(inner, 8, special, lstrip) {
298 Err(TokenToStringError::InsufficientBufferSpace(required_size)) => {
299 let buffer_size: usize = (-required_size).try_into()?;
300
301 self.token_to_piece_bytes(inner, buffer_size, special, lstrip)
302 }
303 other => other,
304 }?;
305
306 let mut output_piece = String::with_capacity(bytes.len());
307 let (_result, _decoded_size, _had_replacements) =
308 decoder.decode_to_string(&bytes, &mut output_piece, false);
309
310 Ok(output_piece)
311 }
312
313 pub fn token_to_piece_bytes(
325 &self,
326 token: LlamaToken,
327 buffer_size: usize,
328 special: bool,
329 lstrip: Option<NonZeroU16>,
330 ) -> Result<Vec<u8>, TokenToStringError> {
331 let mut buffer: Vec<u8> = vec![0u8; buffer_size];
332 let buffer_len = c_int::try_from(buffer.len())?;
333 let lstrip = lstrip.map_or(0, |strip_count| i32::from(strip_count.get()));
334 let size = unsafe {
335 llama_cpp_bindings_sys::llama_token_to_piece(
336 self.vocab_ptr(),
337 token.0,
338 buffer.as_mut_ptr().cast::<c_char>(),
339 buffer_len,
340 lstrip,
341 special,
342 )
343 };
344
345 match size {
346 0 => Err(TokenToStringError::UnknownTokenType),
347 error_code if error_code.is_negative() => {
348 Err(TokenToStringError::InsufficientBufferSpace(error_code))
349 }
350 size => {
351 let written = usize::try_from(size)?;
352 buffer.truncate(written);
353
354 Ok(buffer)
355 }
356 }
357 }
358
359 #[must_use]
364 pub fn n_vocab(&self) -> i32 {
365 unsafe { llama_cpp_bindings_sys::llama_n_vocab(self.vocab_ptr()) }
366 }
367
368 pub fn vocab_type(&self) -> Result<VocabType, VocabTypeFromIntError> {
374 let vocab_type = unsafe { llama_cpp_bindings_sys::llama_vocab_type(self.vocab_ptr()) };
375
376 VocabType::try_from(vocab_type)
377 }
378
379 #[must_use]
382 pub fn n_embd(&self) -> c_int {
383 unsafe { llama_cpp_bindings_sys::llama_n_embd(self.model.as_ptr()) }
384 }
385
386 #[must_use]
388 pub fn size(&self) -> u64 {
389 unsafe { llama_cpp_bindings_sys::llama_model_size(self.model.as_ptr()) }
390 }
391
392 #[must_use]
394 pub fn n_params(&self) -> u64 {
395 unsafe { llama_cpp_bindings_sys::llama_model_n_params(self.model.as_ptr()) }
396 }
397
398 #[must_use]
400 pub fn is_recurrent(&self) -> bool {
401 unsafe { llama_cpp_bindings_sys::llama_model_is_recurrent(self.model.as_ptr()) }
402 }
403
404 pub fn n_layer(&self) -> Result<u32, std::num::TryFromIntError> {
410 u32::try_from(unsafe { llama_cpp_bindings_sys::llama_model_n_layer(self.model.as_ptr()) })
411 }
412
413 pub fn n_head(&self) -> Result<u32, std::num::TryFromIntError> {
419 u32::try_from(unsafe { llama_cpp_bindings_sys::llama_model_n_head(self.model.as_ptr()) })
420 }
421
422 pub fn n_head_kv(&self) -> Result<u32, std::num::TryFromIntError> {
428 u32::try_from(unsafe { llama_cpp_bindings_sys::llama_model_n_head_kv(self.model.as_ptr()) })
429 }
430
431 #[must_use]
435 pub fn is_hybrid(&self) -> bool {
436 unsafe { llama_cpp_bindings_sys::llama_model_is_hybrid(self.model.as_ptr()) }
437 }
438
439 pub fn meta_val_str(&self, key: &str) -> Result<String, MetaValError> {
444 let key_cstring = CString::new(key)?;
445 let key_ptr = key_cstring.as_ptr();
446
447 extract_meta_string(
448 |buf_ptr, buf_len| unsafe {
449 llama_cpp_bindings_sys::llama_model_meta_val_str(
450 self.model.as_ptr(),
451 key_ptr,
452 buf_ptr,
453 buf_len,
454 )
455 },
456 256,
457 )
458 }
459
460 #[must_use]
462 pub fn meta_count(&self) -> i32 {
463 unsafe { llama_cpp_bindings_sys::llama_model_meta_count(self.model.as_ptr()) }
464 }
465
466 pub fn meta_key_by_index(&self, index: i32) -> Result<String, MetaValError> {
471 extract_meta_string(
472 |buf_ptr, buf_len| unsafe {
473 llama_cpp_bindings_sys::llama_model_meta_key_by_index(
474 self.model.as_ptr(),
475 index,
476 buf_ptr,
477 buf_len,
478 )
479 },
480 256,
481 )
482 }
483
484 pub fn meta_val_str_by_index(&self, index: i32) -> Result<String, MetaValError> {
489 extract_meta_string(
490 |buf_ptr, buf_len| unsafe {
491 llama_cpp_bindings_sys::llama_model_meta_val_str_by_index(
492 self.model.as_ptr(),
493 index,
494 buf_ptr,
495 buf_len,
496 )
497 },
498 256,
499 )
500 }
501
502 #[must_use]
504 pub fn rope_type(&self) -> Option<RopeType> {
505 let raw = unsafe { llama_cpp_bindings_sys::llama_model_rope_type(self.model.as_ptr()) };
506
507 rope_type::rope_type_from_raw(raw)
508 }
509
510 pub fn chat_template(
528 &self,
529 name: Option<&str>,
530 ) -> Result<LlamaChatTemplate, ChatTemplateError> {
531 let name_cstr = name.map(CString::new);
532 let name_ptr = match name_cstr {
533 Some(Ok(name)) => name.as_ptr(),
534 _ => ptr::null(),
535 };
536 let result = unsafe {
537 llama_cpp_bindings_sys::llama_model_chat_template(self.model.as_ptr(), name_ptr)
538 };
539
540 if result.is_null() {
541 Err(ChatTemplateError::MissingTemplate)
542 } else {
543 let chat_template_cstr = unsafe { CStr::from_ptr(result) };
544
545 Ok(LlamaChatTemplate(chat_template_cstr.to_owned()))
546 }
547 }
548
549 pub fn load_from_file(
559 _: &LlamaBackend,
560 path: impl AsRef<Path>,
561 params: &LlamaModelParams,
562 ) -> Result<Self, LlamaModelLoadError> {
563 let path = path.as_ref();
564
565 let path_str = path
566 .to_str()
567 .ok_or_else(|| LlamaModelLoadError::PathToStrError(path.to_path_buf()))?;
568
569 if !path.exists() {
570 return Err(LlamaModelLoadError::FileNotFound(path.to_path_buf()));
571 }
572
573 let cstr = CString::new(path_str)?;
574 let mut out_model: *mut llama_cpp_bindings_sys::llama_model = ptr::null_mut();
575 let mut out_error: *mut c_char = ptr::null_mut();
576 let status = unsafe {
577 llama_cpp_bindings_sys::llama_rs_load_model_from_file(
578 cstr.as_ptr(),
579 params.params,
580 &raw mut out_model,
581 &raw mut out_error,
582 )
583 };
584 match status {
585 llama_cpp_bindings_sys::LLAMA_RS_LOAD_MODEL_FROM_FILE_OK => {
586 let model = NonNull::new(out_model)
587 .ok_or(LlamaModelLoadError::Unloadable)?;
588 Ok(Self {
589 model,
590 tok_env: OnceLock::new(),
591 })
592 }
593 llama_cpp_bindings_sys::LLAMA_RS_LOAD_MODEL_FROM_FILE_VENDORED_RETURNED_NULL => {
594 if path.exists() {
595 Err(LlamaModelLoadError::Unloadable)
596 } else {
597 Err(LlamaModelLoadError::FileNotFound(path.to_path_buf()))
598 }
599 }
600 llama_cpp_bindings_sys::LLAMA_RS_LOAD_MODEL_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED => {
601 Err(LlamaModelLoadError::NotEnoughMemory)
602 }
603 llama_cpp_bindings_sys::LLAMA_RS_LOAD_MODEL_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION => {
604 let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
605 Err(LlamaModelLoadError::Reported { message })
606 }
607 other => unreachable!(
608 "llama_rs_load_model_from_file returned unrecognized status {other}"
609 ),
610 }
611 }
612
613 pub fn lora_adapter_init(
619 &self,
620 path: impl AsRef<Path>,
621 ) -> Result<LlamaLoraAdapter, LlamaLoraAdapterInitError> {
622 let path = path.as_ref();
623
624 let path_str = path
625 .to_str()
626 .ok_or_else(|| LlamaLoraAdapterInitError::PathToStrError(path.to_path_buf()))?;
627
628 if !path.exists() {
629 return Err(LlamaLoraAdapterInitError::FileNotFound(path.to_path_buf()));
630 }
631
632 let cstr = CString::new(path_str)?;
633 let raw_adapter = unsafe {
634 llama_cpp_bindings_sys::llama_adapter_lora_init(self.model.as_ptr(), cstr.as_ptr())
635 };
636
637 let Some(adapter) = NonNull::new(raw_adapter) else {
638 return Err(LlamaLoraAdapterInitError::Unloadable);
639 };
640
641 Ok(LlamaLoraAdapter {
642 lora_adapter: adapter,
643 })
644 }
645
646 pub fn apply_chat_template(
664 &self,
665 tmpl: &LlamaChatTemplate,
666 chat: &[LlamaChatMessage],
667 add_ass: bool,
668 ) -> Result<String, ApplyChatTemplateError> {
669 let message_length = chat.iter().fold(0, |acc, chat_message| {
670 acc + chat_message.role.to_bytes().len() + chat_message.content.to_bytes().len()
671 });
672 let mut buff: Vec<u8> = vec![0; message_length * 2];
673
674 let chat: Vec<llama_cpp_bindings_sys::llama_chat_message> = chat
675 .iter()
676 .map(|chat_message| llama_cpp_bindings_sys::llama_chat_message {
677 role: chat_message.role.as_ptr(),
678 content: chat_message.content.as_ptr(),
679 })
680 .collect();
681
682 let tmpl_ptr = tmpl.0.as_ptr();
683
684 let buff_len: i32 = buff.len().try_into()?;
685
686 let res = unsafe {
687 llama_cpp_bindings_sys::llama_chat_apply_template(
688 tmpl_ptr,
689 chat.as_ptr(),
690 chat.len(),
691 add_ass,
692 buff.as_mut_ptr().cast::<c_char>(),
693 buff_len,
694 )
695 };
696
697 if res > buff_len {
698 let required_size: usize = res.try_into()?;
699 buff.resize(required_size, 0);
700
701 let new_buff_len: i32 = buff.len().try_into()?;
702
703 let res = unsafe {
704 llama_cpp_bindings_sys::llama_chat_apply_template(
705 tmpl_ptr,
706 chat.as_ptr(),
707 chat.len(),
708 add_ass,
709 buff.as_mut_ptr().cast::<c_char>(),
710 new_buff_len,
711 )
712 };
713 let final_size: usize = res.try_into()?;
714
715 return truncated_buffer_to_string(buff, final_size);
716 }
717
718 let final_size: usize = res.try_into()?;
719
720 truncated_buffer_to_string(buff, final_size)
721 }
722
723 pub fn sampled_token_classifier(&self) -> SampledTokenClassifier<'_> {
735 let markers = match self.streaming_markers() {
736 Ok(markers) => markers,
737 Err(detection_error) => {
738 log::warn!(
739 "streaming markers detection failed; classifier will run blind: {detection_error}",
740 );
741 StreamingMarkers::default()
742 }
743 };
744
745 SampledTokenClassifier::new(self, markers)
746 }
747
748 pub fn streaming_markers(&self) -> Result<StreamingMarkers, MarkerDetectionError> {
757 let (reasoning_open_str, reasoning_close_str) =
758 invoke_detect_reasoning_markers(self.model.as_ptr())?;
759
760 let tool_call_haystack = invoke_compute_tool_call_haystack(self.model.as_ptr())?;
761
762 let autoparser_pair = tool_call_haystack.as_deref().and_then(
763 crate::extract_tool_call_markers_from_haystack::extract_tool_call_markers_from_haystack,
764 );
765
766 let (autoparser_open, autoparser_close) = match autoparser_pair {
767 Some(crate::tool_call_marker_pair::ToolCallMarkerPair { open, close }) => {
768 (Some(open), Some(close))
769 }
770 None => (None, None),
771 };
772
773 let resolved_tool_call_markers =
774 self.resolve_tool_call_marker_strings(autoparser_open, autoparser_close);
775
776 Ok(StreamingMarkers {
777 reasoning_open: self.tokenize_marker(reasoning_open_str.as_deref()),
778 reasoning_close: self.tokenize_marker(reasoning_close_str.as_deref()),
779 tool_call_open: self.tokenize_marker(resolved_tool_call_markers.open.as_deref()),
780 tool_call_close: self.tokenize_marker(resolved_tool_call_markers.close.as_deref()),
781 })
782 }
783
784 fn resolve_tool_call_marker_strings(
788 &self,
789 autoparser_open: Option<String>,
790 autoparser_close: Option<String>,
791 ) -> ResolvedToolCallMarkers {
792 if autoparser_open
793 .as_deref()
794 .is_some_and(|raw| !raw.trim().is_empty())
795 {
796 return ResolvedToolCallMarkers {
797 open: autoparser_open,
798 close: autoparser_close,
799 };
800 }
801 let Some(markers) = self.tool_call_markers() else {
802 return ResolvedToolCallMarkers {
803 open: autoparser_open,
804 close: autoparser_close,
805 };
806 };
807 let close = if markers.close.is_empty() {
808 None
809 } else {
810 Some(markers.close)
811 };
812 ResolvedToolCallMarkers {
813 open: Some(markers.open),
814 close,
815 }
816 }
817
818 pub fn reasoning_markers(&self) -> Result<Option<ReasoningMarkers>, MarkerDetectionError> {
821 let (open, close) = invoke_detect_reasoning_markers(self.model.as_ptr())?;
822
823 match (open, close) {
824 (Some(open), Some(close)) if !open.is_empty() && !close.is_empty() => {
825 Ok(Some(ReasoningMarkers { open, close }))
826 }
827 _ => Ok(None),
828 }
829 }
830
831 #[must_use]
837 pub fn tool_call_markers(&self) -> Option<ToolCallMarkers> {
838 let template = match self.chat_template(None) {
839 Ok(template) => template,
840 Err(error) => {
841 log::debug!(
842 "tool-call markers unavailable: chat template missing or invalid: {error}",
843 );
844 return None;
845 }
846 };
847 let template_str = match template.to_str() {
848 Ok(template_str) => template_str,
849 Err(error) => {
850 log::debug!(
851 "tool-call markers unavailable: chat template is not valid UTF-8: {error}",
852 );
853 return None;
854 }
855 };
856 tool_call_template_overrides::detect(template_str)
857 }
858
859 fn tokenize_marker(&self, marker: Option<&str>) -> Option<Vec<LlamaToken>> {
860 let marker = marker?.trim();
861 if marker.is_empty() {
862 return None;
863 }
864 match self.str_to_token(marker, AddBos::Never) {
865 Ok(tokens) if !tokens.is_empty() => Some(tokens),
866 Ok(_) => None,
867 Err(tokenize_error) => {
868 log::debug!(
869 "marker {marker:?} failed to tokenise; classifier will ignore it: {tokenize_error}",
870 );
871 None
872 }
873 }
874 }
875
876 pub fn parse_chat_message(
903 &self,
904 tools_json: &str,
905 input: &str,
906 is_partial: bool,
907 ) -> Result<ChatMessageParseOutcome, ParseChatMessageError> {
908 let tools_value: serde_json::Value =
909 serde_json::from_str(tools_json).map_err(ParseChatMessageError::ToolsJsonInvalid)?;
910 if !tools_value.is_array() {
911 return Err(ParseChatMessageError::ToolsJsonNotArray);
912 }
913
914 let reasoning_markers = self.reasoning_markers().ok().flatten();
915
916 for candidate in tool_call_template_overrides::known_marker_candidates() {
917 if let ToolCallFormatOutcome::Parsed(calls) =
918 tool_call_format::try_parse(input, &candidate)
919 {
920 let split =
921 split_reasoning_prefix(input, reasoning_markers.as_ref(), &candidate.open);
922 let mut parsed = ParsedChatMessage::new(split.content, split.reasoning, calls);
923 synthesize_missing_tool_call_ids(&mut parsed.tool_calls);
924 return Ok(ChatMessageParseOutcome::Recognized(parsed));
925 }
926 }
927
928 match self.parse_chat_message_via_ffi(tools_json, input, is_partial) {
929 Ok(mut parsed) => {
930 synthesize_missing_tool_call_ids(&mut parsed.tool_calls);
931 Ok(ChatMessageParseOutcome::Recognized(parsed))
932 }
933 Err(ParseChatMessageError::ParseFailed { message }) => {
934 Ok(ChatMessageParseOutcome::Unrecognized(RawChatMessage {
935 tools_json: tools_json.to_owned(),
936 text: input.to_owned(),
937 is_partial,
938 ffi_error_message: message,
939 }))
940 }
941 Err(other) => Err(other),
942 }
943 }
944
945 fn parse_chat_message_via_ffi(
946 &self,
947 tools_json: &str,
948 input: &str,
949 is_partial: bool,
950 ) -> Result<ParsedChatMessage, ParseChatMessageError> {
951 let tools_cstring = CString::new(tools_json)
952 .map_err(|err| ParseChatMessageError::ToolsSerialization(err.to_string()))?;
953 let input_cstring = CString::new(input)
954 .map_err(|err| ParseChatMessageError::ToolsSerialization(err.to_string()))?;
955
956 let mut handle: *mut llama_cpp_bindings_sys::llama_rs_parsed_chat = ptr::null_mut();
957 let mut out_error: *mut c_char = ptr::null_mut();
958
959 let status = unsafe {
960 llama_cpp_bindings_sys::llama_rs_parse_chat_message(
961 self.model.as_ptr(),
962 tools_cstring.as_ptr(),
963 input_cstring.as_ptr(),
964 i32::from(is_partial),
965 &raw mut handle,
966 &raw mut out_error,
967 )
968 };
969
970 let parsed = match status {
971 llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_OK => {
972 collect_parsed_chat_message(handle)
973 }
974 llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_MODEL_HAS_NO_CHAT_TEMPLATE => {
975 Err(ParseChatMessageError::NoChatTemplate)
976 }
977 llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_MODEL_HAS_NO_VOCAB => {
978 Err(ParseChatMessageError::NoVocab)
979 }
980 llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_ERROR_STRING_ALLOCATION_FAILED => {
981 Err(ParseChatMessageError::NotEnoughMemory)
982 }
983 llama_cpp_bindings_sys::LLAMA_RS_PARSE_CHAT_MESSAGE_VENDORED_THREW_CXX_EXCEPTION => {
984 let message =
985 unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
986 out_error = ptr::null_mut();
987 Err(ParseChatMessageError::ParseFailed { message })
988 }
989 other => {
990 unreachable!("llama_rs_parse_chat_message returned unrecognized status {other}")
991 }
992 };
993
994 let mut free_error: *mut c_char = ptr::null_mut();
995 let free_status = unsafe {
996 llama_cpp_bindings_sys::llama_rs_parsed_chat_free(handle, &raw mut free_error)
997 };
998 match (parsed, free_status) {
999 (Ok(value), llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_FREE_OK) => {
1000 unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
1001 Ok(value)
1002 }
1003 (
1004 Ok(_),
1005 llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_FREE_DESTRUCTOR_THREW_CXX_EXCEPTION,
1006 ) => {
1007 unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
1008 let message =
1009 unsafe { crate::ffi_error_reader::read_and_free_cpp_error(free_error) };
1010 Err(ParseChatMessageError::DestructorFailed { message })
1011 }
1012 (
1013 Ok(_),
1014 llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_FREE_ERROR_STRING_ALLOCATION_FAILED,
1015 ) => {
1016 unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
1017 Err(ParseChatMessageError::NotEnoughMemory)
1018 }
1019 (Ok(_), other) => {
1020 unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
1021 unsafe { llama_cpp_bindings_sys::llama_rs_string_free(free_error) };
1022 unreachable!("llama_rs_parsed_chat_free returned unrecognized status {other}")
1023 }
1024 (Err(parse_err), _) => {
1025 unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
1026 unsafe { llama_cpp_bindings_sys::llama_rs_string_free(free_error) };
1027 Err(parse_err)
1028 }
1029 }
1030 }
1031
1032 pub fn diagnose_tool_call_synthetic_renders(
1042 &self,
1043 ) -> Result<(String, String), MarkerDetectionError> {
1044 let (no_tools, with_tools) =
1045 invoke_diagnose_tool_call_synthetic_renders(self.model.as_ptr())?;
1046
1047 Ok((no_tools.unwrap_or_default(), with_tools.unwrap_or_default()))
1048 }
1049}
1050
1051impl LlamaModel {
1052 pub fn approximate_tok_env(&self) -> Arc<ApproximateTokEnv> {
1057 Arc::clone(self.tok_env.get_or_init(|| build_approximate_tok_env(self)))
1058 }
1059}
1060
1061fn build_approximate_tok_env(model: &LlamaModel) -> Arc<ApproximateTokEnv> {
1062 let n_vocab = model.n_vocab().cast_unsigned();
1063 let tok_eos = {
1064 let eot = unsafe { llama_cpp_bindings_sys::llama_vocab_eot(model.vocab_ptr()) };
1065 if eot == -1 {
1066 model.token_eos().0.cast_unsigned()
1067 } else {
1068 eot.cast_unsigned()
1069 }
1070 };
1071 let info = TokRxInfo::new(n_vocab, tok_eos);
1072
1073 let mut words = Vec::with_capacity(n_vocab as usize);
1074
1075 for token_id in 0..n_vocab.cast_signed() {
1076 let token = LlamaToken(token_id);
1077 let bytes = model
1078 .token_to_piece_bytes(token, 32, false, None)
1079 .unwrap_or_default();
1080 if bytes.is_empty() {
1081 let special_bytes = model
1082 .token_to_piece_bytes(token, 32, true, None)
1083 .unwrap_or_default();
1084 if special_bytes.is_empty() {
1085 words.push(vec![]);
1086 } else {
1087 let mut marked = Vec::with_capacity(special_bytes.len() + 1);
1088 marked.push(0xFF);
1089 marked.extend(special_bytes);
1090 words.push(marked);
1091 }
1092 } else {
1093 words.push(bytes);
1094 }
1095 }
1096
1097 let trie = TokTrie::from(&info, &words);
1098 Arc::new(ApproximateTokEnv::new(trie))
1099}
1100
1101fn collect_parsed_chat_message(
1102 handle: *mut llama_cpp_bindings_sys::llama_rs_parsed_chat,
1103) -> Result<ParsedChatMessage, ParseChatMessageError> {
1104 if handle.is_null() {
1105 return Ok(ParsedChatMessage::default());
1106 }
1107
1108 let content = read_parsed_chat_content(handle)?;
1109 let reasoning_content = read_parsed_chat_reasoning_content(handle)?;
1110 let count = read_parsed_chat_tool_call_count(handle)?;
1111
1112 let mut tool_calls = Vec::with_capacity(count);
1113 for index in 0..count {
1114 let id = read_parsed_chat_tool_call_id(handle, index)?;
1115 let name = read_parsed_chat_tool_call_name(handle, index)?;
1116 let arguments_json = read_parsed_chat_tool_call_arguments(handle, index)?;
1117
1118 let arguments = ToolCallArguments::from_string(arguments_json);
1119 tool_calls.push(ParsedToolCall::new(id, name, arguments));
1120 }
1121
1122 Ok(ParsedChatMessage::new(
1123 content,
1124 reasoning_content,
1125 tool_calls,
1126 ))
1127}
1128
1129fn read_parsed_chat_content(
1130 handle: *mut llama_cpp_bindings_sys::llama_rs_parsed_chat,
1131) -> Result<String, ParseChatMessageError> {
1132 let mut out_string: *mut c_char = ptr::null_mut();
1133 let mut out_error: *mut c_char = ptr::null_mut();
1134 let status = unsafe {
1135 llama_cpp_bindings_sys::llama_rs_parsed_chat_content(
1136 handle,
1137 &raw mut out_string,
1138 &raw mut out_error,
1139 )
1140 };
1141 match status {
1142 llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_CONTENT_OK => {
1143 consume_accessor_string(out_string)
1144 }
1145 llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_CONTENT_ERROR_STRING_ALLOCATION_FAILED => {
1146 unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
1147 Err(ParseChatMessageError::NotEnoughMemory)
1148 }
1149 llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_CONTENT_VENDORED_THREW_CXX_EXCEPTION => {
1150 let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
1151 Err(ParseChatMessageError::Reported { message })
1152 }
1153 other => unreachable!("llama_rs_parsed_chat_content returned unrecognized status {other}"),
1154 }
1155}
1156
1157fn read_parsed_chat_reasoning_content(
1158 handle: *mut llama_cpp_bindings_sys::llama_rs_parsed_chat,
1159) -> Result<String, ParseChatMessageError> {
1160 let mut out_string: *mut c_char = ptr::null_mut();
1161 let mut out_error: *mut c_char = ptr::null_mut();
1162 let status = unsafe {
1163 llama_cpp_bindings_sys::llama_rs_parsed_chat_reasoning_content(
1164 handle,
1165 &raw mut out_string,
1166 &raw mut out_error,
1167 )
1168 };
1169 match status {
1170 llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_OK => {
1171 consume_accessor_string(out_string)
1172 }
1173 llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_ERROR_STRING_ALLOCATION_FAILED => {
1174 unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
1175 Err(ParseChatMessageError::NotEnoughMemory)
1176 }
1177 llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_VENDORED_THREW_CXX_EXCEPTION => {
1178 let message =
1179 unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
1180 Err(ParseChatMessageError::Reported { message })
1181 }
1182 other => unreachable!(
1183 "llama_rs_parsed_chat_reasoning_content returned unrecognized status {other}"
1184 ),
1185 }
1186}
1187
1188fn read_parsed_chat_tool_call_count(
1189 handle: *mut llama_cpp_bindings_sys::llama_rs_parsed_chat,
1190) -> Result<usize, ParseChatMessageError> {
1191 let mut out_count: usize = 0;
1192 let mut out_error: *mut c_char = ptr::null_mut();
1193 let status = unsafe {
1194 llama_cpp_bindings_sys::llama_rs_parsed_chat_tool_call_count(
1195 handle,
1196 &raw mut out_count,
1197 &raw mut out_error,
1198 )
1199 };
1200 match status {
1201 llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_OK => Ok(out_count),
1202 llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_ERROR_STRING_ALLOCATION_FAILED => {
1203 unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
1204 Err(ParseChatMessageError::NotEnoughMemory)
1205 }
1206 llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_VENDORED_THREW_CXX_EXCEPTION => {
1207 let message =
1208 unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
1209 Err(ParseChatMessageError::Reported { message })
1210 }
1211 other => unreachable!(
1212 "llama_rs_parsed_chat_tool_call_count returned unrecognized status {other}"
1213 ),
1214 }
1215}
1216
1217fn read_parsed_chat_tool_call_id(
1218 handle: *mut llama_cpp_bindings_sys::llama_rs_parsed_chat,
1219 index: usize,
1220) -> Result<String, ParseChatMessageError> {
1221 let mut out_string: *mut c_char = ptr::null_mut();
1222 let mut out_error: *mut c_char = ptr::null_mut();
1223 let status = unsafe {
1224 llama_cpp_bindings_sys::llama_rs_parsed_chat_tool_call_id(
1225 handle,
1226 index,
1227 &raw mut out_string,
1228 &raw mut out_error,
1229 )
1230 };
1231 match status {
1232 llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_OK => {
1233 consume_accessor_string(out_string)
1234 }
1235 llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_INDEX_OUT_OF_BOUNDS => {
1236 Err(ParseChatMessageError::ToolCallIdIndexOutOfBounds { index })
1237 }
1238 llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_ERROR_STRING_ALLOCATION_FAILED => {
1239 unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
1240 Err(ParseChatMessageError::NotEnoughMemory)
1241 }
1242 llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_VENDORED_THREW_CXX_EXCEPTION => {
1243 let message =
1244 unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
1245 Err(ParseChatMessageError::Reported { message })
1246 }
1247 other => unreachable!(
1248 "llama_rs_parsed_chat_tool_call_id returned unrecognized status {other}"
1249 ),
1250 }
1251}
1252
1253fn read_parsed_chat_tool_call_name(
1254 handle: *mut llama_cpp_bindings_sys::llama_rs_parsed_chat,
1255 index: usize,
1256) -> Result<String, ParseChatMessageError> {
1257 let mut out_string: *mut c_char = ptr::null_mut();
1258 let mut out_error: *mut c_char = ptr::null_mut();
1259 let status = unsafe {
1260 llama_cpp_bindings_sys::llama_rs_parsed_chat_tool_call_name(
1261 handle,
1262 index,
1263 &raw mut out_string,
1264 &raw mut out_error,
1265 )
1266 };
1267 match status {
1268 llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_OK => {
1269 consume_accessor_string(out_string)
1270 }
1271 llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_INDEX_OUT_OF_BOUNDS => {
1272 Err(ParseChatMessageError::ToolCallNameIndexOutOfBounds { index })
1273 }
1274 llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_ERROR_STRING_ALLOCATION_FAILED => {
1275 unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
1276 Err(ParseChatMessageError::NotEnoughMemory)
1277 }
1278 llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_VENDORED_THREW_CXX_EXCEPTION => {
1279 let message =
1280 unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
1281 Err(ParseChatMessageError::Reported { message })
1282 }
1283 other => unreachable!(
1284 "llama_rs_parsed_chat_tool_call_name returned unrecognized status {other}"
1285 ),
1286 }
1287}
1288
1289fn read_parsed_chat_tool_call_arguments(
1290 handle: *mut llama_cpp_bindings_sys::llama_rs_parsed_chat,
1291 index: usize,
1292) -> Result<String, ParseChatMessageError> {
1293 let mut out_string: *mut c_char = ptr::null_mut();
1294 let mut out_error: *mut c_char = ptr::null_mut();
1295 let status = unsafe {
1296 llama_cpp_bindings_sys::llama_rs_parsed_chat_tool_call_arguments(
1297 handle,
1298 index,
1299 &raw mut out_string,
1300 &raw mut out_error,
1301 )
1302 };
1303 match status {
1304 llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_OK => {
1305 consume_accessor_string(out_string)
1306 }
1307 llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_INDEX_OUT_OF_BOUNDS => {
1308 Err(ParseChatMessageError::ToolCallArgumentsIndexOutOfBounds { index })
1309 }
1310 llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_ERROR_STRING_ALLOCATION_FAILED => {
1311 unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
1312 Err(ParseChatMessageError::NotEnoughMemory)
1313 }
1314 llama_cpp_bindings_sys::LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_VENDORED_THREW_CXX_EXCEPTION => {
1315 let message =
1316 unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
1317 Err(ParseChatMessageError::Reported { message })
1318 }
1319 other => unreachable!(
1320 "llama_rs_parsed_chat_tool_call_arguments returned unrecognized status {other}"
1321 ),
1322 }
1323}
1324
1325fn consume_accessor_string(ptr: *mut c_char) -> Result<String, ParseChatMessageError> {
1326 if ptr.is_null() {
1327 return Ok(String::new());
1328 }
1329 let bytes = unsafe { CStr::from_ptr(ptr) }.to_bytes().to_vec();
1330 unsafe { llama_cpp_bindings_sys::llama_rs_string_free(ptr) };
1331 Ok(String::from_utf8(bytes)?)
1332}
1333
1334struct ReasoningSplit {
1335 reasoning: String,
1336 content: String,
1337}
1338
1339fn split_reasoning_prefix(
1340 input: &str,
1341 reasoning_markers: Option<&ReasoningMarkers>,
1342 tool_call_open: &str,
1343) -> ReasoningSplit {
1344 let content_only = || ReasoningSplit {
1345 reasoning: String::new(),
1346 content: prefix_before(input, tool_call_open),
1347 };
1348
1349 let Some(reasoning_markers) = reasoning_markers else {
1350 return content_only();
1351 };
1352 let Some(open_pos) = input.find(&reasoning_markers.open) else {
1353 return content_only();
1354 };
1355
1356 let after_open = &input[open_pos + reasoning_markers.open.len()..];
1357 let Some(close_offset) = after_open.find(&reasoning_markers.close) else {
1358 return content_only();
1359 };
1360
1361 let reasoning = after_open[..close_offset].to_owned();
1362 let after_close = &after_open[close_offset + reasoning_markers.close.len()..];
1363
1364 ReasoningSplit {
1365 reasoning,
1366 content: prefix_before(after_close, tool_call_open),
1367 }
1368}
1369
1370fn prefix_before(text: &str, marker: &str) -> String {
1371 text.find(marker)
1372 .map_or_else(|| text.to_owned(), |pos| text[..pos].to_owned())
1373}
1374
1375fn synthesize_missing_tool_call_ids(tool_calls: &mut [ParsedToolCall]) {
1376 for (index, call) in tool_calls.iter_mut().enumerate() {
1377 if call.id.is_empty() {
1378 call.id = format!("call_{index}");
1379 }
1380 }
1381}
1382
1383fn invoke_detect_reasoning_markers(
1384 model: *const llama_cpp_bindings_sys::llama_model,
1385) -> Result<(Option<String>, Option<String>), MarkerDetectionError> {
1386 let mut out_open: *mut c_char = ptr::null_mut();
1387 let mut out_close: *mut c_char = ptr::null_mut();
1388 let mut out_error: *mut c_char = ptr::null_mut();
1389
1390 let status = unsafe {
1391 llama_cpp_bindings_sys::llama_rs_detect_reasoning_markers(
1392 model,
1393 &raw mut out_open,
1394 &raw mut out_close,
1395 &raw mut out_error,
1396 )
1397 };
1398
1399 let parsed = match status {
1400 llama_cpp_bindings_sys::LLAMA_RS_DETECT_REASONING_MARKERS_OK => {
1401 collect_optional_cstr_pair(out_open, out_close)
1402 }
1403 llama_cpp_bindings_sys::LLAMA_RS_DETECT_REASONING_MARKERS_ERROR_STRING_ALLOCATION_FAILED => {
1404 Err(MarkerDetectionError::NotEnoughMemory)
1405 }
1406 llama_cpp_bindings_sys::LLAMA_RS_DETECT_REASONING_MARKERS_VENDORED_THREW_CXX_EXCEPTION => {
1407 let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
1408 Err(MarkerDetectionError::ReasoningMarkerDetectionFailed { message })
1409 }
1410 other => unreachable!(
1411 "llama_rs_detect_reasoning_markers returned unrecognized status {other}"
1412 ),
1413 };
1414
1415 unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_open) };
1416 unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_close) };
1417 if !matches!(
1418 parsed,
1419 Err(MarkerDetectionError::ReasoningMarkerDetectionFailed { .. })
1420 ) {
1421 unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
1422 }
1423
1424 parsed
1425}
1426
1427fn invoke_compute_tool_call_haystack(
1428 model: *const llama_cpp_bindings_sys::llama_model,
1429) -> Result<Option<String>, MarkerDetectionError> {
1430 let mut out_haystack: *mut c_char = ptr::null_mut();
1431 let mut out_error: *mut c_char = ptr::null_mut();
1432
1433 let status = unsafe {
1434 llama_cpp_bindings_sys::llama_rs_compute_tool_call_haystack(
1435 model,
1436 &raw mut out_haystack,
1437 &raw mut out_error,
1438 )
1439 };
1440
1441 let parsed = match status {
1442 llama_cpp_bindings_sys::LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_OK => {
1443 read_optional_owned_cstr(out_haystack)
1444 }
1445 llama_cpp_bindings_sys::LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_ERROR_STRING_ALLOCATION_FAILED => {
1446 Err(MarkerDetectionError::NotEnoughMemory)
1447 }
1448 llama_cpp_bindings_sys::LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_VENDORED_THREW_CXX_EXCEPTION => {
1449 let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
1450 Err(MarkerDetectionError::ToolCallHaystackComputationFailed { message })
1451 }
1452 other => unreachable!(
1453 "llama_rs_compute_tool_call_haystack returned unrecognized status {other}"
1454 ),
1455 };
1456
1457 unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_haystack) };
1458 if !matches!(
1459 parsed,
1460 Err(MarkerDetectionError::ToolCallHaystackComputationFailed { .. })
1461 ) {
1462 unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
1463 }
1464
1465 parsed
1466}
1467
1468fn invoke_diagnose_tool_call_synthetic_renders(
1469 model: *const llama_cpp_bindings_sys::llama_model,
1470) -> Result<(Option<String>, Option<String>), MarkerDetectionError> {
1471 let mut out_no_tools: *mut c_char = ptr::null_mut();
1472 let mut out_with_tools: *mut c_char = ptr::null_mut();
1473 let mut out_error: *mut c_char = ptr::null_mut();
1474
1475 let status = unsafe {
1476 llama_cpp_bindings_sys::llama_rs_diagnose_tool_call_synthetic_renders(
1477 model,
1478 &raw mut out_no_tools,
1479 &raw mut out_with_tools,
1480 &raw mut out_error,
1481 )
1482 };
1483
1484 let parsed = match status {
1485 llama_cpp_bindings_sys::LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_OK => {
1486 collect_optional_cstr_pair(out_no_tools, out_with_tools)
1487 }
1488 llama_cpp_bindings_sys::LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_ERROR_STRING_ALLOCATION_FAILED => {
1489 Err(MarkerDetectionError::NotEnoughMemory)
1490 }
1491 llama_cpp_bindings_sys::LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_VENDORED_THREW_CXX_EXCEPTION => {
1492 let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
1493 Err(MarkerDetectionError::ToolCallSyntheticRenderDiagnosisFailed { message })
1494 }
1495 other => unreachable!(
1496 "llama_rs_diagnose_tool_call_synthetic_renders returned unrecognized status {other}"
1497 ),
1498 };
1499
1500 unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_no_tools) };
1501 unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_with_tools) };
1502 if !matches!(
1503 parsed,
1504 Err(MarkerDetectionError::ToolCallSyntheticRenderDiagnosisFailed { .. })
1505 ) {
1506 unsafe { llama_cpp_bindings_sys::llama_rs_string_free(out_error) };
1507 }
1508
1509 parsed
1510}
1511
1512fn read_optional_owned_cstr(ptr: *const c_char) -> Result<Option<String>, MarkerDetectionError> {
1513 if ptr.is_null() {
1514 return Ok(None);
1515 }
1516
1517 let bytes = unsafe { CStr::from_ptr(ptr) }.to_bytes().to_vec();
1518
1519 Ok(Some(String::from_utf8(bytes)?))
1520}
1521
1522fn invoke_rs_tokenize(
1523 vocab: *const llama_cpp_bindings_sys::llama_vocab,
1524 text: *const c_char,
1525 text_len: c_int,
1526 tokens: *mut llama_cpp_bindings_sys::llama_token,
1527 n_tokens_max: c_int,
1528 add_bos: bool,
1529) -> Result<c_int, StringToTokenError> {
1530 let mut out_count: i32 = 0;
1531 let mut out_error: *mut c_char = ptr::null_mut();
1532 let status = unsafe {
1533 llama_cpp_bindings_sys::llama_rs_tokenize(
1534 vocab,
1535 text,
1536 text_len,
1537 tokens,
1538 n_tokens_max,
1539 add_bos,
1540 true,
1541 &raw mut out_count,
1542 &raw mut out_error,
1543 )
1544 };
1545 match status {
1546 llama_cpp_bindings_sys::LLAMA_RS_TOKENIZE_OK => Ok(out_count),
1547 llama_cpp_bindings_sys::LLAMA_RS_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED => {
1548 Err(StringToTokenError::NotEnoughMemory)
1549 }
1550 llama_cpp_bindings_sys::LLAMA_RS_TOKENIZE_VENDORED_THREW_CXX_EXCEPTION => {
1551 let message = unsafe { crate::ffi_error_reader::read_and_free_cpp_error(out_error) };
1552 Err(StringToTokenError::Reported { message })
1553 }
1554 other => unreachable!("llama_rs_tokenize returned unrecognized status {other}"),
1555 }
1556}
1557
1558fn collect_optional_cstr_pair(
1559 first_ptr: *const c_char,
1560 second_ptr: *const c_char,
1561) -> Result<(Option<String>, Option<String>), MarkerDetectionError> {
1562 let first = read_optional_owned_cstr(first_ptr)?;
1563 let second = read_optional_owned_cstr(second_ptr)?;
1564 Ok((first, second))
1565}
1566
1567fn extract_meta_string<TCFunction>(
1568 c_function: TCFunction,
1569 capacity: usize,
1570) -> Result<String, MetaValError>
1571where
1572 TCFunction: Fn(*mut c_char, usize) -> i32,
1573{
1574 let mut buffer = vec![0u8; capacity];
1575 let result = c_function(buffer.as_mut_ptr().cast::<c_char>(), buffer.len());
1576
1577 if result < 0 {
1578 return Err(MetaValError::NegativeReturn(result));
1579 }
1580
1581 let returned_len = result.cast_unsigned() as usize;
1582
1583 if returned_len >= capacity {
1584 return extract_meta_string(c_function, returned_len + 1);
1585 }
1586
1587 if buffer.get(returned_len) != Some(&0) {
1588 return Err(MetaValError::NegativeReturn(-1));
1589 }
1590
1591 buffer.truncate(returned_len);
1592
1593 Ok(String::from_utf8(buffer)?)
1594}
1595
1596impl Drop for LlamaModel {
1597 fn drop(&mut self) {
1598 unsafe { llama_cpp_bindings_sys::llama_free_model(self.model.as_ptr()) }
1599 }
1600}
1601
1602#[cfg(test)]
1603mod extract_meta_string_tests {
1604 use super::extract_meta_string;
1605 use crate::MetaValError;
1606
1607 #[test]
1608 fn returns_error_when_null_terminator_missing() {
1609 let result = extract_meta_string(
1610 |buf_ptr, buf_len| {
1611 let buffer =
1612 unsafe { std::slice::from_raw_parts_mut(buf_ptr.cast::<u8>(), buf_len) };
1613 buffer[0] = b'a';
1614 buffer[1] = b'b';
1615 buffer[2] = b'c';
1616 2
1617 },
1618 4,
1619 );
1620
1621 assert_eq!(result.unwrap_err(), MetaValError::NegativeReturn(-1));
1622 }
1623
1624 #[test]
1625 fn returns_error_for_negative_return_value() {
1626 let result = extract_meta_string(|_buf_ptr, _buf_len| -5, 4);
1627
1628 assert_eq!(result.unwrap_err(), MetaValError::NegativeReturn(-5));
1629 }
1630
1631 #[test]
1632 fn returns_error_for_invalid_utf8_data() {
1633 let result = extract_meta_string(
1634 |buf_ptr, buf_len| {
1635 let buffer =
1636 unsafe { std::slice::from_raw_parts_mut(buf_ptr.cast::<u8>(), buf_len) };
1637 buffer[0] = 0xFF;
1638 buffer[1] = 0xFE;
1639 buffer[2] = 0;
1640 2
1641 },
1642 4,
1643 );
1644
1645 assert!(result.is_err());
1646 assert!(result.unwrap_err().to_string().contains("FromUtf8Error"));
1647 }
1648
1649 #[test]
1650 fn triggers_buffer_resize_when_returned_len_exceeds_capacity() {
1651 let initial_capacity: usize = 4;
1652 let length_exceeding_initial_capacity = 10;
1653 let written_length = 2;
1654 let call_count = std::cell::Cell::new(0);
1655 let result = extract_meta_string(
1656 |buf_ptr, buf_len| {
1657 let count = call_count.get();
1658 call_count.set(count + 1);
1659 if count == 0 {
1660 length_exceeding_initial_capacity
1661 } else {
1662 let buffer =
1663 unsafe { std::slice::from_raw_parts_mut(buf_ptr.cast::<u8>(), buf_len) };
1664 buffer[0] = b'h';
1665 buffer[1] = b'i';
1666 buffer[2] = 0;
1667 written_length
1668 }
1669 },
1670 initial_capacity,
1671 );
1672
1673 assert_eq!(result.unwrap(), "hi");
1674 }
1675
1676 #[test]
1677 fn cstring_with_validated_len_null_byte_returns_error() {
1678 let result = super::cstring_with_validated_len("null\0byte");
1679
1680 assert!(result.is_err());
1681 }
1682
1683 #[test]
1684 fn validate_string_length_overflow_returns_error() {
1685 let result = super::validate_string_length_for_tokenizer(usize::MAX);
1686
1687 assert!(result.is_err());
1688 }
1689
1690 #[test]
1691 fn truncated_buffer_to_string_with_invalid_utf8_returns_error() {
1692 let invalid_utf8 = vec![0xff, 0xfe, 0xfd];
1693 let result = super::truncated_buffer_to_string(invalid_utf8, 3);
1694
1695 assert!(result.is_err());
1696 }
1697}