llama_cpp_4/context/params.rs
1//! A safe wrapper around `llama_context_params`.
2use std::fmt::Debug;
3use std::num::NonZeroU32;
4
5/// A rusty wrapper around `llama_context_type`.
6#[repr(u32)]
7#[derive(Copy, Clone, Debug, PartialEq, Eq)]
8pub enum LlamaContextType {
9 /// Default context (standard inference).
10 Default = llama_cpp_sys_4::LLAMA_CONTEXT_TYPE_DEFAULT,
11 /// Multi-token-prediction draft context, used as the draft side of speculative decoding.
12 Mtp = llama_cpp_sys_4::LLAMA_CONTEXT_TYPE_MTP,
13}
14
15impl From<llama_cpp_sys_4::llama_context_type> for LlamaContextType {
16 fn from(value: llama_cpp_sys_4::llama_context_type) -> Self {
17 match value {
18 llama_cpp_sys_4::LLAMA_CONTEXT_TYPE_MTP => Self::Mtp,
19 _ => Self::Default,
20 }
21 }
22}
23
24impl From<LlamaContextType> for llama_cpp_sys_4::llama_context_type {
25 fn from(value: LlamaContextType) -> Self {
26 value as Self
27 }
28}
29
30/// A rusty wrapper around `rope_scaling_type`.
31#[repr(i8)]
32#[derive(Copy, Clone, Debug, PartialEq, Eq)]
33pub enum RopeScalingType {
34 /// The scaling type is unspecified
35 Unspecified = -1,
36 /// No scaling
37 None = 0,
38 /// Linear scaling
39 Linear = 1,
40 /// Yarn scaling
41 Yarn = 2,
42}
43
44/// Create a `RopeScalingType` from a `c_int` - returns `RopeScalingType::ScalingUnspecified` if
45/// the value is not recognized.
46impl From<i32> for RopeScalingType {
47 fn from(value: i32) -> Self {
48 match value {
49 0 => Self::None,
50 1 => Self::Linear,
51 2 => Self::Yarn,
52 _ => Self::Unspecified,
53 }
54 }
55}
56
57/// Create a `c_int` from a `RopeScalingType`.
58impl From<RopeScalingType> for i32 {
59 fn from(value: RopeScalingType) -> Self {
60 match value {
61 RopeScalingType::None => 0,
62 RopeScalingType::Linear => 1,
63 RopeScalingType::Yarn => 2,
64 RopeScalingType::Unspecified => -1,
65 }
66 }
67}
68
69/// A rusty wrapper around `LLAMA_POOLING_TYPE`.
70#[repr(i8)]
71#[derive(Copy, Clone, Debug, PartialEq, Eq)]
72pub enum LlamaPoolingType {
73 /// The pooling type is unspecified
74 Unspecified = -1,
75 /// No pooling
76 None = 0,
77 /// Mean pooling
78 Mean = 1,
79 /// CLS pooling
80 Cls = 2,
81 /// Last pooling
82 Last = 3,
83}
84
85/// Create a `LlamaPoolingType` from a `c_int` - returns `LlamaPoolingType::Unspecified` if
86/// the value is not recognized.
87impl From<i32> for LlamaPoolingType {
88 fn from(value: i32) -> Self {
89 match value {
90 0 => Self::None,
91 1 => Self::Mean,
92 2 => Self::Cls,
93 3 => Self::Last,
94 _ => Self::Unspecified,
95 }
96 }
97}
98
99/// Create a `c_int` from a `LlamaPoolingType`.
100impl From<LlamaPoolingType> for i32 {
101 fn from(value: LlamaPoolingType) -> Self {
102 match value {
103 LlamaPoolingType::None => 0,
104 LlamaPoolingType::Mean => 1,
105 LlamaPoolingType::Cls => 2,
106 LlamaPoolingType::Last => 3,
107 LlamaPoolingType::Unspecified => -1,
108 }
109 }
110}
111
112/// A safe wrapper around `llama_context_params`.
113///
114/// Generally this should be created with [`Default::default()`] and then modified with `with_*` methods.
115///
116/// # Examples
117///
118/// ```rust
119/// # use std::num::NonZeroU32;
120/// use llama_cpp_4::context::params::LlamaContextParams;
121///
122/// let ctx_params = LlamaContextParams::default()
123/// .with_n_ctx(NonZeroU32::new(2048));
124///
125/// assert_eq!(ctx_params.n_ctx(), NonZeroU32::new(2048));
126/// ```
127#[derive(Debug, Clone)]
128#[allow(
129 missing_docs,
130 clippy::struct_excessive_bools,
131 clippy::module_name_repetitions
132)]
133pub struct LlamaContextParams {
134 pub(crate) context_params: llama_cpp_sys_4::llama_context_params,
135 /// When `true`, the `TurboQuant` attention rotation (PR #21038) will be
136 /// disabled for any context created from these params.
137 pub(crate) attn_rot_disabled: bool,
138}
139
140/// SAFETY: we do not currently allow setting or reading the pointers that cause this to not be automatically send or sync.
141unsafe impl Send for LlamaContextParams {}
142unsafe impl Sync for LlamaContextParams {}
143
144impl LlamaContextParams {
145 /// Set the side of the context
146 ///
147 /// # Examples
148 ///
149 /// ```rust
150 /// # use std::num::NonZeroU32;
151 /// use llama_cpp_4::context::params::LlamaContextParams;
152 /// let params = LlamaContextParams::default();
153 /// let params = params.with_n_ctx(NonZeroU32::new(2048));
154 /// assert_eq!(params.n_ctx(), NonZeroU32::new(2048));
155 /// ```
156 #[must_use]
157 pub fn with_n_ctx(mut self, n_ctx: Option<NonZeroU32>) -> Self {
158 self.context_params.n_ctx = n_ctx.map_or(0, std::num::NonZeroU32::get);
159 self
160 }
161
162 /// Get the size of the context.
163 ///
164 /// [`None`] if the context size is specified by the model and not the context.
165 ///
166 /// # Examples
167 ///
168 /// ```rust
169 /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
170 /// assert_eq!(params.n_ctx(), std::num::NonZeroU32::new(512));
171 #[must_use]
172 pub fn n_ctx(&self) -> Option<NonZeroU32> {
173 NonZeroU32::new(self.context_params.n_ctx)
174 }
175
176 /// Set the `n_batch`
177 ///
178 /// # Examples
179 ///
180 /// ```rust
181 /// # use std::num::NonZeroU32;
182 /// use llama_cpp_4::context::params::LlamaContextParams;
183 /// let params = LlamaContextParams::default()
184 /// .with_n_batch(2048);
185 /// assert_eq!(params.n_batch(), 2048);
186 /// ```
187 #[must_use]
188 pub fn with_n_batch(mut self, n_batch: u32) -> Self {
189 self.context_params.n_batch = n_batch;
190 self
191 }
192
193 /// Get the `n_batch`
194 ///
195 /// # Examples
196 ///
197 /// ```rust
198 /// use llama_cpp_4::context::params::LlamaContextParams;
199 /// let params = LlamaContextParams::default();
200 /// assert_eq!(params.n_batch(), 2048);
201 /// ```
202 #[must_use]
203 pub fn n_batch(&self) -> u32 {
204 self.context_params.n_batch
205 }
206
207 /// Set the `n_ubatch`
208 ///
209 /// # Examples
210 ///
211 /// ```rust
212 /// # use std::num::NonZeroU32;
213 /// use llama_cpp_4::context::params::LlamaContextParams;
214 /// let params = LlamaContextParams::default()
215 /// .with_n_ubatch(512);
216 /// assert_eq!(params.n_ubatch(), 512);
217 /// ```
218 #[must_use]
219 pub fn with_n_ubatch(mut self, n_ubatch: u32) -> Self {
220 self.context_params.n_ubatch = n_ubatch;
221 self
222 }
223
224 /// Get the `n_ubatch`
225 ///
226 /// # Examples
227 ///
228 /// ```rust
229 /// use llama_cpp_4::context::params::LlamaContextParams;
230 /// let params = LlamaContextParams::default();
231 /// assert_eq!(params.n_ubatch(), 512);
232 /// ```
233 #[must_use]
234 pub fn n_ubatch(&self) -> u32 {
235 self.context_params.n_ubatch
236 }
237
238 /// Set the context type (e.g. [`LlamaContextType::Mtp`] to load this context as a
239 /// multi-token-prediction draft head used by upstream's `draft-mtp` speculative decoder).
240 #[must_use]
241 pub fn with_ctx_type(mut self, ctx_type: LlamaContextType) -> Self {
242 self.context_params.ctx_type = ctx_type.into();
243 self
244 }
245
246 /// Get the configured context type.
247 #[must_use]
248 pub fn ctx_type(&self) -> LlamaContextType {
249 self.context_params.ctx_type.into()
250 }
251
252 /// Set the number of recurrent-state snapshots per sequence used for MTP rollback.
253 #[must_use]
254 pub fn with_n_rs_seq(mut self, n_rs_seq: u32) -> Self {
255 self.context_params.n_rs_seq = n_rs_seq;
256 self
257 }
258
259 /// Get the number of recurrent-state snapshots per sequence used for MTP rollback.
260 #[must_use]
261 pub fn n_rs_seq(&self) -> u32 {
262 self.context_params.n_rs_seq
263 }
264
265 /// Set the `flash_attention` parameter
266 ///
267 /// # Examples
268 ///
269 /// ```rust
270 /// use llama_cpp_4::context::params::LlamaContextParams;
271 /// let params = LlamaContextParams::default()
272 /// .with_flash_attention(true);
273 /// assert_eq!(params.flash_attention(), true);
274 /// ```
275 #[must_use]
276 pub fn with_flash_attention(mut self, enabled: bool) -> Self {
277 self.context_params.flash_attn_type = if enabled {
278 llama_cpp_sys_4::LLAMA_FLASH_ATTN_TYPE_ENABLED
279 } else {
280 llama_cpp_sys_4::LLAMA_FLASH_ATTN_TYPE_DISABLED
281 };
282 self
283 }
284
285 /// Get the `flash_attention` parameter
286 ///
287 /// # Examples
288 ///
289 /// ```rust
290 /// use llama_cpp_4::context::params::LlamaContextParams;
291 /// let params = LlamaContextParams::default();
292 /// assert_eq!(params.flash_attention(), false);
293 /// ```
294 #[must_use]
295 pub fn flash_attention(&self) -> bool {
296 self.context_params.flash_attn_type == llama_cpp_sys_4::LLAMA_FLASH_ATTN_TYPE_ENABLED
297 }
298
299 /// Set the `offload_kqv` parameter to control offloading KV cache & KQV ops to GPU
300 ///
301 /// # Examples
302 ///
303 /// ```rust
304 /// use llama_cpp_4::context::params::LlamaContextParams;
305 /// let params = LlamaContextParams::default()
306 /// .with_offload_kqv(false);
307 /// assert_eq!(params.offload_kqv(), false);
308 /// ```
309 #[must_use]
310 pub fn with_offload_kqv(mut self, enabled: bool) -> Self {
311 self.context_params.offload_kqv = enabled;
312 self
313 }
314
315 /// Get the `offload_kqv` parameter
316 ///
317 /// # Examples
318 ///
319 /// ```rust
320 /// use llama_cpp_4::context::params::LlamaContextParams;
321 /// let params = LlamaContextParams::default();
322 /// assert_eq!(params.offload_kqv(), true);
323 /// ```
324 #[must_use]
325 pub fn offload_kqv(&self) -> bool {
326 self.context_params.offload_kqv
327 }
328
329 /// Set the type of rope scaling.
330 ///
331 /// # Examples
332 ///
333 /// ```rust
334 /// use llama_cpp_4::context::params::{LlamaContextParams, RopeScalingType};
335 /// let params = LlamaContextParams::default()
336 /// .with_rope_scaling_type(RopeScalingType::Linear);
337 /// assert_eq!(params.rope_scaling_type(), RopeScalingType::Linear);
338 /// ```
339 #[must_use]
340 pub fn with_rope_scaling_type(mut self, rope_scaling_type: RopeScalingType) -> Self {
341 self.context_params.rope_scaling_type = i32::from(rope_scaling_type);
342 self
343 }
344
345 /// Get the type of rope scaling.
346 ///
347 /// # Examples
348 ///
349 /// ```rust
350 /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
351 /// assert_eq!(params.rope_scaling_type(), llama_cpp_4::context::params::RopeScalingType::Unspecified);
352 /// ```
353 #[must_use]
354 pub fn rope_scaling_type(&self) -> RopeScalingType {
355 RopeScalingType::from(self.context_params.rope_scaling_type)
356 }
357
358 /// Set the rope frequency base.
359 ///
360 /// # Examples
361 ///
362 /// ```rust
363 /// use llama_cpp_4::context::params::LlamaContextParams;
364 /// let params = LlamaContextParams::default()
365 /// .with_rope_freq_base(0.5);
366 /// assert_eq!(params.rope_freq_base(), 0.5);
367 /// ```
368 #[must_use]
369 pub fn with_rope_freq_base(mut self, rope_freq_base: f32) -> Self {
370 self.context_params.rope_freq_base = rope_freq_base;
371 self
372 }
373
374 /// Get the rope frequency base.
375 ///
376 /// # Examples
377 ///
378 /// ```rust
379 /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
380 /// assert_eq!(params.rope_freq_base(), 0.0);
381 /// ```
382 #[must_use]
383 pub fn rope_freq_base(&self) -> f32 {
384 self.context_params.rope_freq_base
385 }
386
387 /// Set the rope frequency scale.
388 ///
389 /// # Examples
390 ///
391 /// ```rust
392 /// use llama_cpp_4::context::params::LlamaContextParams;
393 /// let params = LlamaContextParams::default()
394 /// .with_rope_freq_scale(0.5);
395 /// assert_eq!(params.rope_freq_scale(), 0.5);
396 /// ```
397 #[must_use]
398 pub fn with_rope_freq_scale(mut self, rope_freq_scale: f32) -> Self {
399 self.context_params.rope_freq_scale = rope_freq_scale;
400 self
401 }
402
403 /// Get the rope frequency scale.
404 ///
405 /// # Examples
406 ///
407 /// ```rust
408 /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
409 /// assert_eq!(params.rope_freq_scale(), 0.0);
410 /// ```
411 #[must_use]
412 pub fn rope_freq_scale(&self) -> f32 {
413 self.context_params.rope_freq_scale
414 }
415
416 /// Get the number of threads.
417 ///
418 /// # Examples
419 ///
420 /// ```rust
421 /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
422 /// assert_eq!(params.n_threads(), 4);
423 /// ```
424 #[must_use]
425 pub fn n_threads(&self) -> i32 {
426 self.context_params.n_threads
427 }
428
429 /// Get the number of threads allocated for batches.
430 ///
431 /// # Examples
432 ///
433 /// ```rust
434 /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
435 /// assert_eq!(params.n_threads_batch(), 4);
436 /// ```
437 #[must_use]
438 pub fn n_threads_batch(&self) -> i32 {
439 self.context_params.n_threads_batch
440 }
441
442 /// Set the number of threads.
443 ///
444 /// # Examples
445 ///
446 /// ```rust
447 /// use llama_cpp_4::context::params::LlamaContextParams;
448 /// let params = LlamaContextParams::default()
449 /// .with_n_threads(8);
450 /// assert_eq!(params.n_threads(), 8);
451 /// ```
452 #[must_use]
453 pub fn with_n_threads(mut self, n_threads: i32) -> Self {
454 self.context_params.n_threads = n_threads;
455 self
456 }
457
458 /// Set the number of threads allocated for batches.
459 ///
460 /// # Examples
461 ///
462 /// ```rust
463 /// use llama_cpp_4::context::params::LlamaContextParams;
464 /// let params = LlamaContextParams::default()
465 /// .with_n_threads_batch(8);
466 /// assert_eq!(params.n_threads_batch(), 8);
467 /// ```
468 #[must_use]
469 pub fn with_n_threads_batch(mut self, n_threads: i32) -> Self {
470 self.context_params.n_threads_batch = n_threads;
471 self
472 }
473
474 /// Check whether embeddings are enabled
475 ///
476 /// # Examples
477 ///
478 /// ```rust
479 /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
480 /// assert!(!params.embeddings());
481 /// ```
482 #[must_use]
483 pub fn embeddings(&self) -> bool {
484 self.context_params.embeddings
485 }
486
487 /// Enable the use of embeddings
488 ///
489 /// # Examples
490 ///
491 /// ```rust
492 /// use llama_cpp_4::context::params::LlamaContextParams;
493 /// let params = LlamaContextParams::default()
494 /// .with_embeddings(true);
495 /// assert!(params.embeddings());
496 /// ```
497 #[must_use]
498 pub fn with_embeddings(mut self, embedding: bool) -> Self {
499 self.context_params.embeddings = embedding;
500 self
501 }
502
503 /// Set the evaluation callback.
504 ///
505 /// # Examples
506 ///
507 /// ```no_run
508 /// extern "C" fn cb_eval_fn(
509 /// t: *mut llama_cpp_sys_4::ggml_tensor,
510 /// ask: bool,
511 /// user_data: *mut std::ffi::c_void,
512 /// ) -> bool {
513 /// false
514 /// }
515 ///
516 /// use llama_cpp_4::context::params::LlamaContextParams;
517 /// let params = LlamaContextParams::default().with_cb_eval(Some(cb_eval_fn));
518 /// ```
519 #[must_use]
520 pub fn with_cb_eval(
521 mut self,
522 cb_eval: llama_cpp_sys_4::ggml_backend_sched_eval_callback,
523 ) -> Self {
524 self.context_params.cb_eval = cb_eval;
525 self
526 }
527
528 /// Set the evaluation callback user data.
529 ///
530 /// # Examples
531 ///
532 /// ```no_run
533 /// use llama_cpp_4::context::params::LlamaContextParams;
534 /// let params = LlamaContextParams::default();
535 /// let user_data = std::ptr::null_mut();
536 /// let params = params.with_cb_eval_user_data(user_data);
537 /// ```
538 #[must_use]
539 pub fn with_cb_eval_user_data(mut self, cb_eval_user_data: *mut std::ffi::c_void) -> Self {
540 self.context_params.cb_eval_user_data = cb_eval_user_data;
541 self
542 }
543
544 /// Attach a [`TensorCapture`](super::tensor_capture::TensorCapture) to
545 /// intercept intermediate tensor outputs during `decode()`.
546 ///
547 /// This sets up the `cb_eval` callback to capture tensors matching the
548 /// capture's filter (e.g. specific layer outputs). After `decode()` the
549 /// captured data can be read from the `TensorCapture`.
550 ///
551 /// # Example
552 ///
553 /// ```rust,ignore
554 /// use llama_cpp_4::context::params::LlamaContextParams;
555 /// use llama_cpp_4::context::tensor_capture::TensorCapture;
556 ///
557 /// let mut capture = TensorCapture::for_layers(&[13, 20, 27]);
558 /// let ctx_params = LlamaContextParams::default()
559 /// .with_embeddings(true)
560 /// .with_tensor_capture(&mut capture);
561 /// ```
562 #[must_use]
563 pub fn with_tensor_capture(self, capture: &mut super::tensor_capture::TensorCapture) -> Self {
564 self.with_cb_eval(Some(super::tensor_capture::tensor_capture_callback))
565 .with_cb_eval_user_data(
566 std::ptr::from_mut::<super::tensor_capture::TensorCapture>(capture)
567 .cast::<std::ffi::c_void>(),
568 )
569 }
570
571 /// Set the storage type for the **K** (key) KV cache tensors.
572 ///
573 /// The default is `GgmlType::F16`. Quantized types like `GgmlType::Q5_0`
574 /// or `GgmlType::Q4_0` reduce VRAM usage significantly; combining them with
575 /// `TurboQuant` attention rotation (the default) keeps quality high.
576 ///
577 /// # Examples
578 ///
579 /// ```rust
580 /// use llama_cpp_4::context::params::LlamaContextParams;
581 /// use llama_cpp_4::quantize::GgmlType;
582 /// let params = LlamaContextParams::default()
583 /// .with_cache_type_k(GgmlType::Q5_0);
584 /// ```
585 #[must_use]
586 pub fn with_cache_type_k(mut self, ty: crate::quantize::GgmlType) -> Self {
587 self.context_params.type_k = ty as llama_cpp_sys_4::ggml_type;
588 self
589 }
590
591 /// Get the K-cache storage type.
592 #[must_use]
593 pub fn cache_type_k(&self) -> llama_cpp_sys_4::ggml_type {
594 self.context_params.type_k
595 }
596
597 /// Set the storage type for the **V** (value) KV cache tensors.
598 ///
599 /// See [`with_cache_type_k`](Self::with_cache_type_k) for details.
600 ///
601 /// # Examples
602 ///
603 /// ```rust
604 /// use llama_cpp_4::context::params::LlamaContextParams;
605 /// use llama_cpp_4::quantize::GgmlType;
606 /// let params = LlamaContextParams::default()
607 /// .with_cache_type_v(GgmlType::Q5_0);
608 /// ```
609 #[must_use]
610 pub fn with_cache_type_v(mut self, ty: crate::quantize::GgmlType) -> Self {
611 self.context_params.type_v = ty as llama_cpp_sys_4::ggml_type;
612 self
613 }
614
615 /// Get the V-cache storage type.
616 #[must_use]
617 pub fn cache_type_v(&self) -> llama_cpp_sys_4::ggml_type {
618 self.context_params.type_v
619 }
620
621 /// Control the `TurboQuant` attention-rotation feature (llama.cpp PR #21038).
622 ///
623 /// By default, llama.cpp applies a Hadamard rotation to Q/K/V tensors
624 /// before writing them into the KV cache. This significantly improves
625 /// quantized KV-cache quality at near-zero overhead, and is enabled
626 /// automatically for models whose head dimension is a power of two.
627 ///
628 /// Set `disabled = true` to opt out (equivalent to `LLAMA_ATTN_ROT_DISABLE=1`).
629 /// The env-var is applied just before the context is created and restored
630 /// afterwards, so this is safe to call from a single thread.
631 ///
632 /// # Examples
633 ///
634 /// ```rust
635 /// use llama_cpp_4::context::params::LlamaContextParams;
636 /// // Disable rotation for this context only:
637 /// let params = LlamaContextParams::default().with_attn_rot_disabled(true);
638 /// assert!(params.attn_rot_disabled());
639 /// ```
640 #[must_use]
641 pub fn with_attn_rot_disabled(mut self, disabled: bool) -> Self {
642 self.attn_rot_disabled = disabled;
643 self
644 }
645
646 /// Returns `true` if `TurboQuant` attention rotation is disabled for this context.
647 ///
648 /// ```rust
649 /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
650 /// assert!(!params.attn_rot_disabled());
651 /// ```
652 #[must_use]
653 pub fn attn_rot_disabled(&self) -> bool {
654 self.attn_rot_disabled
655 }
656
657 /// Set the type of pooling.
658 ///
659 /// # Examples
660 ///
661 /// ```rust
662 /// use llama_cpp_4::context::params::{LlamaContextParams, LlamaPoolingType};
663 /// let params = LlamaContextParams::default()
664 /// .with_pooling_type(LlamaPoolingType::Last);
665 /// assert_eq!(params.pooling_type(), LlamaPoolingType::Last);
666 /// ```
667 #[must_use]
668 pub fn with_pooling_type(mut self, pooling_type: LlamaPoolingType) -> Self {
669 self.context_params.pooling_type = i32::from(pooling_type);
670 self
671 }
672
673 /// Get the type of pooling.
674 ///
675 /// # Examples
676 ///
677 /// ```rust
678 /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
679 /// assert_eq!(params.pooling_type(), llama_cpp_4::context::params::LlamaPoolingType::Unspecified);
680 /// ```
681 #[must_use]
682 pub fn pooling_type(&self) -> LlamaPoolingType {
683 LlamaPoolingType::from(self.context_params.pooling_type)
684 }
685}
686
687/// Default parameters for `LlamaContext`. (as defined in llama.cpp by `llama_context_default_params`)
688/// ```
689/// # use std::num::NonZeroU32;
690/// use llama_cpp_4::context::params::{LlamaContextParams, RopeScalingType};
691/// let params = LlamaContextParams::default();
692/// assert_eq!(params.n_ctx(), NonZeroU32::new(512), "n_ctx should be 512");
693/// assert_eq!(params.rope_scaling_type(), RopeScalingType::Unspecified);
694/// ```
695impl Default for LlamaContextParams {
696 fn default() -> Self {
697 let context_params = unsafe { llama_cpp_sys_4::llama_context_default_params() };
698 Self {
699 context_params,
700 attn_rot_disabled: false,
701 }
702 }
703}