llama_cpp_4/context/params.rs
1//! A safe wrapper around `llama_context_params`.
2use std::fmt::Debug;
3use std::num::NonZeroU32;
4
5/// A rusty wrapper around `llama_context_type`.
6//
7// Cast the sys constants to `u32` so the discriminants compile on both clang
8// (where bindgen emits `c_uint`) and MSVC (where it emits `c_int`).
9#[repr(u32)]
10#[derive(Copy, Clone, Debug, PartialEq, Eq)]
11pub enum LlamaContextType {
12 /// Default context (standard inference).
13 Default = llama_cpp_sys_4::LLAMA_CONTEXT_TYPE_DEFAULT as u32,
14 /// Multi-token-prediction draft context, used as the draft side of speculative decoding.
15 Mtp = llama_cpp_sys_4::LLAMA_CONTEXT_TYPE_MTP as u32,
16}
17
18impl From<llama_cpp_sys_4::llama_context_type> for LlamaContextType {
19 fn from(value: llama_cpp_sys_4::llama_context_type) -> Self {
20 if value == llama_cpp_sys_4::LLAMA_CONTEXT_TYPE_MTP {
21 Self::Mtp
22 } else {
23 Self::Default
24 }
25 }
26}
27
28impl From<LlamaContextType> for llama_cpp_sys_4::llama_context_type {
29 fn from(value: LlamaContextType) -> Self {
30 value as u32 as Self
31 }
32}
33
34/// A rusty wrapper around `rope_scaling_type`.
35#[repr(i8)]
36#[derive(Copy, Clone, Debug, PartialEq, Eq)]
37pub enum RopeScalingType {
38 /// The scaling type is unspecified
39 Unspecified = -1,
40 /// No scaling
41 None = 0,
42 /// Linear scaling
43 Linear = 1,
44 /// Yarn scaling
45 Yarn = 2,
46}
47
48/// Create a `RopeScalingType` from a `c_int` - returns `RopeScalingType::ScalingUnspecified` if
49/// the value is not recognized.
50impl From<i32> for RopeScalingType {
51 fn from(value: i32) -> Self {
52 match value {
53 0 => Self::None,
54 1 => Self::Linear,
55 2 => Self::Yarn,
56 _ => Self::Unspecified,
57 }
58 }
59}
60
61/// Create a `c_int` from a `RopeScalingType`.
62impl From<RopeScalingType> for i32 {
63 fn from(value: RopeScalingType) -> Self {
64 match value {
65 RopeScalingType::None => 0,
66 RopeScalingType::Linear => 1,
67 RopeScalingType::Yarn => 2,
68 RopeScalingType::Unspecified => -1,
69 }
70 }
71}
72
73/// A rusty wrapper around `LLAMA_POOLING_TYPE`.
74#[repr(i8)]
75#[derive(Copy, Clone, Debug, PartialEq, Eq)]
76pub enum LlamaPoolingType {
77 /// The pooling type is unspecified
78 Unspecified = -1,
79 /// No pooling
80 None = 0,
81 /// Mean pooling
82 Mean = 1,
83 /// CLS pooling
84 Cls = 2,
85 /// Last pooling
86 Last = 3,
87}
88
89/// Create a `LlamaPoolingType` from a `c_int` - returns `LlamaPoolingType::Unspecified` if
90/// the value is not recognized.
91impl From<i32> for LlamaPoolingType {
92 fn from(value: i32) -> Self {
93 match value {
94 0 => Self::None,
95 1 => Self::Mean,
96 2 => Self::Cls,
97 3 => Self::Last,
98 _ => Self::Unspecified,
99 }
100 }
101}
102
103/// Create a `c_int` from a `LlamaPoolingType`.
104impl From<LlamaPoolingType> for i32 {
105 fn from(value: LlamaPoolingType) -> Self {
106 match value {
107 LlamaPoolingType::None => 0,
108 LlamaPoolingType::Mean => 1,
109 LlamaPoolingType::Cls => 2,
110 LlamaPoolingType::Last => 3,
111 LlamaPoolingType::Unspecified => -1,
112 }
113 }
114}
115
116/// A safe wrapper around `llama_context_params`.
117///
118/// Generally this should be created with [`Default::default()`] and then modified with `with_*` methods.
119///
120/// # Examples
121///
122/// ```rust
123/// # use std::num::NonZeroU32;
124/// use llama_cpp_4::context::params::LlamaContextParams;
125///
126/// let ctx_params = LlamaContextParams::default()
127/// .with_n_ctx(NonZeroU32::new(2048));
128///
129/// assert_eq!(ctx_params.n_ctx(), NonZeroU32::new(2048));
130/// ```
131#[derive(Debug, Clone)]
132#[allow(
133 missing_docs,
134 clippy::struct_excessive_bools,
135 clippy::module_name_repetitions
136)]
137pub struct LlamaContextParams {
138 pub(crate) context_params: llama_cpp_sys_4::llama_context_params,
139 /// When `true`, the `TurboQuant` attention rotation (PR #21038) will be
140 /// disabled for any context created from these params.
141 pub(crate) attn_rot_disabled: bool,
142}
143
144/// SAFETY: we do not currently allow setting or reading the pointers that cause this to not be automatically send or sync.
145unsafe impl Send for LlamaContextParams {}
146unsafe impl Sync for LlamaContextParams {}
147
148impl LlamaContextParams {
149 /// Set the side of the context
150 ///
151 /// # Examples
152 ///
153 /// ```rust
154 /// # use std::num::NonZeroU32;
155 /// use llama_cpp_4::context::params::LlamaContextParams;
156 /// let params = LlamaContextParams::default();
157 /// let params = params.with_n_ctx(NonZeroU32::new(2048));
158 /// assert_eq!(params.n_ctx(), NonZeroU32::new(2048));
159 /// ```
160 #[must_use]
161 pub fn with_n_ctx(mut self, n_ctx: Option<NonZeroU32>) -> Self {
162 self.context_params.n_ctx = n_ctx.map_or(0, std::num::NonZeroU32::get);
163 self
164 }
165
166 /// Get the size of the context.
167 ///
168 /// [`None`] if the context size is specified by the model and not the context.
169 ///
170 /// # Examples
171 ///
172 /// ```rust
173 /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
174 /// assert_eq!(params.n_ctx(), std::num::NonZeroU32::new(512));
175 #[must_use]
176 pub fn n_ctx(&self) -> Option<NonZeroU32> {
177 NonZeroU32::new(self.context_params.n_ctx)
178 }
179
180 /// Set the `n_batch`
181 ///
182 /// # Examples
183 ///
184 /// ```rust
185 /// # use std::num::NonZeroU32;
186 /// use llama_cpp_4::context::params::LlamaContextParams;
187 /// let params = LlamaContextParams::default()
188 /// .with_n_batch(2048);
189 /// assert_eq!(params.n_batch(), 2048);
190 /// ```
191 #[must_use]
192 pub fn with_n_batch(mut self, n_batch: u32) -> Self {
193 self.context_params.n_batch = n_batch;
194 self
195 }
196
197 /// Get the `n_batch`
198 ///
199 /// # Examples
200 ///
201 /// ```rust
202 /// use llama_cpp_4::context::params::LlamaContextParams;
203 /// let params = LlamaContextParams::default();
204 /// assert_eq!(params.n_batch(), 2048);
205 /// ```
206 #[must_use]
207 pub fn n_batch(&self) -> u32 {
208 self.context_params.n_batch
209 }
210
211 /// Set the `n_ubatch`
212 ///
213 /// # Examples
214 ///
215 /// ```rust
216 /// # use std::num::NonZeroU32;
217 /// use llama_cpp_4::context::params::LlamaContextParams;
218 /// let params = LlamaContextParams::default()
219 /// .with_n_ubatch(512);
220 /// assert_eq!(params.n_ubatch(), 512);
221 /// ```
222 #[must_use]
223 pub fn with_n_ubatch(mut self, n_ubatch: u32) -> Self {
224 self.context_params.n_ubatch = n_ubatch;
225 self
226 }
227
228 /// Get the `n_ubatch`
229 ///
230 /// # Examples
231 ///
232 /// ```rust
233 /// use llama_cpp_4::context::params::LlamaContextParams;
234 /// let params = LlamaContextParams::default();
235 /// assert_eq!(params.n_ubatch(), 512);
236 /// ```
237 #[must_use]
238 pub fn n_ubatch(&self) -> u32 {
239 self.context_params.n_ubatch
240 }
241
242 /// Set the context type (e.g. [`LlamaContextType::Mtp`] to load this context as a
243 /// multi-token-prediction draft head used by upstream's `draft-mtp` speculative decoder).
244 #[must_use]
245 pub fn with_ctx_type(mut self, ctx_type: LlamaContextType) -> Self {
246 self.context_params.ctx_type = ctx_type.into();
247 self
248 }
249
250 /// Get the configured context type.
251 #[must_use]
252 pub fn ctx_type(&self) -> LlamaContextType {
253 self.context_params.ctx_type.into()
254 }
255
256 /// Set the number of recurrent-state snapshots per sequence used for MTP rollback.
257 #[must_use]
258 pub fn with_n_rs_seq(mut self, n_rs_seq: u32) -> Self {
259 self.context_params.n_rs_seq = n_rs_seq;
260 self
261 }
262
263 /// Get the number of recurrent-state snapshots per sequence used for MTP rollback.
264 #[must_use]
265 pub fn n_rs_seq(&self) -> u32 {
266 self.context_params.n_rs_seq
267 }
268
269 /// Set the `flash_attention` parameter
270 ///
271 /// # Examples
272 ///
273 /// ```rust
274 /// use llama_cpp_4::context::params::LlamaContextParams;
275 /// let params = LlamaContextParams::default()
276 /// .with_flash_attention(true);
277 /// assert_eq!(params.flash_attention(), true);
278 /// ```
279 #[must_use]
280 pub fn with_flash_attention(mut self, enabled: bool) -> Self {
281 self.context_params.flash_attn_type = if enabled {
282 llama_cpp_sys_4::LLAMA_FLASH_ATTN_TYPE_ENABLED
283 } else {
284 llama_cpp_sys_4::LLAMA_FLASH_ATTN_TYPE_DISABLED
285 };
286 self
287 }
288
289 /// Get the `flash_attention` parameter
290 ///
291 /// # Examples
292 ///
293 /// ```rust
294 /// use llama_cpp_4::context::params::LlamaContextParams;
295 /// let params = LlamaContextParams::default();
296 /// assert_eq!(params.flash_attention(), false);
297 /// ```
298 #[must_use]
299 pub fn flash_attention(&self) -> bool {
300 self.context_params.flash_attn_type == llama_cpp_sys_4::LLAMA_FLASH_ATTN_TYPE_ENABLED
301 }
302
303 /// Set the `offload_kqv` parameter to control offloading KV cache & KQV ops to GPU
304 ///
305 /// # Examples
306 ///
307 /// ```rust
308 /// use llama_cpp_4::context::params::LlamaContextParams;
309 /// let params = LlamaContextParams::default()
310 /// .with_offload_kqv(false);
311 /// assert_eq!(params.offload_kqv(), false);
312 /// ```
313 #[must_use]
314 pub fn with_offload_kqv(mut self, enabled: bool) -> Self {
315 self.context_params.offload_kqv = enabled;
316 self
317 }
318
319 /// Get the `offload_kqv` parameter
320 ///
321 /// # Examples
322 ///
323 /// ```rust
324 /// use llama_cpp_4::context::params::LlamaContextParams;
325 /// let params = LlamaContextParams::default();
326 /// assert_eq!(params.offload_kqv(), true);
327 /// ```
328 #[must_use]
329 pub fn offload_kqv(&self) -> bool {
330 self.context_params.offload_kqv
331 }
332
333 /// Set the type of rope scaling.
334 ///
335 /// # Examples
336 ///
337 /// ```rust
338 /// use llama_cpp_4::context::params::{LlamaContextParams, RopeScalingType};
339 /// let params = LlamaContextParams::default()
340 /// .with_rope_scaling_type(RopeScalingType::Linear);
341 /// assert_eq!(params.rope_scaling_type(), RopeScalingType::Linear);
342 /// ```
343 #[must_use]
344 pub fn with_rope_scaling_type(mut self, rope_scaling_type: RopeScalingType) -> Self {
345 self.context_params.rope_scaling_type = i32::from(rope_scaling_type);
346 self
347 }
348
349 /// Get the type of rope scaling.
350 ///
351 /// # Examples
352 ///
353 /// ```rust
354 /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
355 /// assert_eq!(params.rope_scaling_type(), llama_cpp_4::context::params::RopeScalingType::Unspecified);
356 /// ```
357 #[must_use]
358 pub fn rope_scaling_type(&self) -> RopeScalingType {
359 RopeScalingType::from(self.context_params.rope_scaling_type)
360 }
361
362 /// Set the rope frequency base.
363 ///
364 /// # Examples
365 ///
366 /// ```rust
367 /// use llama_cpp_4::context::params::LlamaContextParams;
368 /// let params = LlamaContextParams::default()
369 /// .with_rope_freq_base(0.5);
370 /// assert_eq!(params.rope_freq_base(), 0.5);
371 /// ```
372 #[must_use]
373 pub fn with_rope_freq_base(mut self, rope_freq_base: f32) -> Self {
374 self.context_params.rope_freq_base = rope_freq_base;
375 self
376 }
377
378 /// Get the rope frequency base.
379 ///
380 /// # Examples
381 ///
382 /// ```rust
383 /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
384 /// assert_eq!(params.rope_freq_base(), 0.0);
385 /// ```
386 #[must_use]
387 pub fn rope_freq_base(&self) -> f32 {
388 self.context_params.rope_freq_base
389 }
390
391 /// Set the rope frequency scale.
392 ///
393 /// # Examples
394 ///
395 /// ```rust
396 /// use llama_cpp_4::context::params::LlamaContextParams;
397 /// let params = LlamaContextParams::default()
398 /// .with_rope_freq_scale(0.5);
399 /// assert_eq!(params.rope_freq_scale(), 0.5);
400 /// ```
401 #[must_use]
402 pub fn with_rope_freq_scale(mut self, rope_freq_scale: f32) -> Self {
403 self.context_params.rope_freq_scale = rope_freq_scale;
404 self
405 }
406
407 /// Get the rope frequency scale.
408 ///
409 /// # Examples
410 ///
411 /// ```rust
412 /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
413 /// assert_eq!(params.rope_freq_scale(), 0.0);
414 /// ```
415 #[must_use]
416 pub fn rope_freq_scale(&self) -> f32 {
417 self.context_params.rope_freq_scale
418 }
419
420 /// Get the number of threads.
421 ///
422 /// # Examples
423 ///
424 /// ```rust
425 /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
426 /// assert_eq!(params.n_threads(), 4);
427 /// ```
428 #[must_use]
429 pub fn n_threads(&self) -> i32 {
430 self.context_params.n_threads
431 }
432
433 /// Get the number of threads allocated for batches.
434 ///
435 /// # Examples
436 ///
437 /// ```rust
438 /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
439 /// assert_eq!(params.n_threads_batch(), 4);
440 /// ```
441 #[must_use]
442 pub fn n_threads_batch(&self) -> i32 {
443 self.context_params.n_threads_batch
444 }
445
446 /// Set the number of threads.
447 ///
448 /// # Examples
449 ///
450 /// ```rust
451 /// use llama_cpp_4::context::params::LlamaContextParams;
452 /// let params = LlamaContextParams::default()
453 /// .with_n_threads(8);
454 /// assert_eq!(params.n_threads(), 8);
455 /// ```
456 #[must_use]
457 pub fn with_n_threads(mut self, n_threads: i32) -> Self {
458 self.context_params.n_threads = n_threads;
459 self
460 }
461
462 /// Set the number of threads allocated for batches.
463 ///
464 /// # Examples
465 ///
466 /// ```rust
467 /// use llama_cpp_4::context::params::LlamaContextParams;
468 /// let params = LlamaContextParams::default()
469 /// .with_n_threads_batch(8);
470 /// assert_eq!(params.n_threads_batch(), 8);
471 /// ```
472 #[must_use]
473 pub fn with_n_threads_batch(mut self, n_threads: i32) -> Self {
474 self.context_params.n_threads_batch = n_threads;
475 self
476 }
477
478 /// Check whether embeddings are enabled
479 ///
480 /// # Examples
481 ///
482 /// ```rust
483 /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
484 /// assert!(!params.embeddings());
485 /// ```
486 #[must_use]
487 pub fn embeddings(&self) -> bool {
488 self.context_params.embeddings
489 }
490
491 /// Enable the use of embeddings
492 ///
493 /// # Examples
494 ///
495 /// ```rust
496 /// use llama_cpp_4::context::params::LlamaContextParams;
497 /// let params = LlamaContextParams::default()
498 /// .with_embeddings(true);
499 /// assert!(params.embeddings());
500 /// ```
501 #[must_use]
502 pub fn with_embeddings(mut self, embedding: bool) -> Self {
503 self.context_params.embeddings = embedding;
504 self
505 }
506
507 /// Set the evaluation callback.
508 ///
509 /// # Examples
510 ///
511 /// ```no_run
512 /// extern "C" fn cb_eval_fn(
513 /// t: *mut llama_cpp_sys_4::ggml_tensor,
514 /// ask: bool,
515 /// user_data: *mut std::ffi::c_void,
516 /// ) -> bool {
517 /// false
518 /// }
519 ///
520 /// use llama_cpp_4::context::params::LlamaContextParams;
521 /// let params = LlamaContextParams::default().with_cb_eval(Some(cb_eval_fn));
522 /// ```
523 #[must_use]
524 pub fn with_cb_eval(
525 mut self,
526 cb_eval: llama_cpp_sys_4::ggml_backend_sched_eval_callback,
527 ) -> Self {
528 self.context_params.cb_eval = cb_eval;
529 self
530 }
531
532 /// Set the evaluation callback user data.
533 ///
534 /// # Examples
535 ///
536 /// ```no_run
537 /// use llama_cpp_4::context::params::LlamaContextParams;
538 /// let params = LlamaContextParams::default();
539 /// let user_data = std::ptr::null_mut();
540 /// let params = params.with_cb_eval_user_data(user_data);
541 /// ```
542 #[must_use]
543 pub fn with_cb_eval_user_data(mut self, cb_eval_user_data: *mut std::ffi::c_void) -> Self {
544 self.context_params.cb_eval_user_data = cb_eval_user_data;
545 self
546 }
547
548 /// Attach a [`TensorCapture`](super::tensor_capture::TensorCapture) to
549 /// intercept intermediate tensor outputs during `decode()`.
550 ///
551 /// This sets up the `cb_eval` callback to capture tensors matching the
552 /// capture's filter (e.g. specific layer outputs). After `decode()` the
553 /// captured data can be read from the `TensorCapture`.
554 ///
555 /// # Example
556 ///
557 /// ```rust,ignore
558 /// use llama_cpp_4::context::params::LlamaContextParams;
559 /// use llama_cpp_4::context::tensor_capture::TensorCapture;
560 ///
561 /// let mut capture = TensorCapture::for_layers(&[13, 20, 27]);
562 /// let ctx_params = LlamaContextParams::default()
563 /// .with_embeddings(true)
564 /// .with_tensor_capture(&mut capture);
565 /// ```
566 #[must_use]
567 pub fn with_tensor_capture(self, capture: &mut super::tensor_capture::TensorCapture) -> Self {
568 self.with_cb_eval(Some(super::tensor_capture::tensor_capture_callback))
569 .with_cb_eval_user_data(
570 std::ptr::from_mut::<super::tensor_capture::TensorCapture>(capture)
571 .cast::<std::ffi::c_void>(),
572 )
573 }
574
575 /// Set the storage type for the **K** (key) KV cache tensors.
576 ///
577 /// The default is `GgmlType::F16`. Quantized types like `GgmlType::Q5_0`
578 /// or `GgmlType::Q4_0` reduce VRAM usage significantly; combining them with
579 /// `TurboQuant` attention rotation (the default) keeps quality high.
580 ///
581 /// # Examples
582 ///
583 /// ```rust
584 /// use llama_cpp_4::context::params::LlamaContextParams;
585 /// use llama_cpp_4::quantize::GgmlType;
586 /// let params = LlamaContextParams::default()
587 /// .with_cache_type_k(GgmlType::Q5_0);
588 /// ```
589 #[must_use]
590 pub fn with_cache_type_k(mut self, ty: crate::quantize::GgmlType) -> Self {
591 self.context_params.type_k = ty as llama_cpp_sys_4::ggml_type;
592 self
593 }
594
595 /// Get the K-cache storage type.
596 #[must_use]
597 pub fn cache_type_k(&self) -> llama_cpp_sys_4::ggml_type {
598 self.context_params.type_k
599 }
600
601 /// Set the storage type for the **V** (value) KV cache tensors.
602 ///
603 /// See [`with_cache_type_k`](Self::with_cache_type_k) for details.
604 ///
605 /// # Examples
606 ///
607 /// ```rust
608 /// use llama_cpp_4::context::params::LlamaContextParams;
609 /// use llama_cpp_4::quantize::GgmlType;
610 /// let params = LlamaContextParams::default()
611 /// .with_cache_type_v(GgmlType::Q5_0);
612 /// ```
613 #[must_use]
614 pub fn with_cache_type_v(mut self, ty: crate::quantize::GgmlType) -> Self {
615 self.context_params.type_v = ty as llama_cpp_sys_4::ggml_type;
616 self
617 }
618
619 /// Get the V-cache storage type.
620 #[must_use]
621 pub fn cache_type_v(&self) -> llama_cpp_sys_4::ggml_type {
622 self.context_params.type_v
623 }
624
625 /// Control the `TurboQuant` attention-rotation feature (llama.cpp PR #21038).
626 ///
627 /// By default, llama.cpp applies a Hadamard rotation to Q/K/V tensors
628 /// before writing them into the KV cache. This significantly improves
629 /// quantized KV-cache quality at near-zero overhead, and is enabled
630 /// automatically for models whose head dimension is a power of two.
631 ///
632 /// Set `disabled = true` to opt out (equivalent to `LLAMA_ATTN_ROT_DISABLE=1`).
633 /// The env-var is applied just before the context is created and restored
634 /// afterwards, so this is safe to call from a single thread.
635 ///
636 /// # Examples
637 ///
638 /// ```rust
639 /// use llama_cpp_4::context::params::LlamaContextParams;
640 /// // Disable rotation for this context only:
641 /// let params = LlamaContextParams::default().with_attn_rot_disabled(true);
642 /// assert!(params.attn_rot_disabled());
643 /// ```
644 #[must_use]
645 pub fn with_attn_rot_disabled(mut self, disabled: bool) -> Self {
646 self.attn_rot_disabled = disabled;
647 self
648 }
649
650 /// Returns `true` if `TurboQuant` attention rotation is disabled for this context.
651 ///
652 /// ```rust
653 /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
654 /// assert!(!params.attn_rot_disabled());
655 /// ```
656 #[must_use]
657 pub fn attn_rot_disabled(&self) -> bool {
658 self.attn_rot_disabled
659 }
660
661 /// Set the type of pooling.
662 ///
663 /// # Examples
664 ///
665 /// ```rust
666 /// use llama_cpp_4::context::params::{LlamaContextParams, LlamaPoolingType};
667 /// let params = LlamaContextParams::default()
668 /// .with_pooling_type(LlamaPoolingType::Last);
669 /// assert_eq!(params.pooling_type(), LlamaPoolingType::Last);
670 /// ```
671 #[must_use]
672 pub fn with_pooling_type(mut self, pooling_type: LlamaPoolingType) -> Self {
673 self.context_params.pooling_type = i32::from(pooling_type);
674 self
675 }
676
677 /// Get the type of pooling.
678 ///
679 /// # Examples
680 ///
681 /// ```rust
682 /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
683 /// assert_eq!(params.pooling_type(), llama_cpp_4::context::params::LlamaPoolingType::Unspecified);
684 /// ```
685 #[must_use]
686 pub fn pooling_type(&self) -> LlamaPoolingType {
687 LlamaPoolingType::from(self.context_params.pooling_type)
688 }
689}
690
691/// Default parameters for `LlamaContext`. (as defined in llama.cpp by `llama_context_default_params`)
692/// ```
693/// # use std::num::NonZeroU32;
694/// use llama_cpp_4::context::params::{LlamaContextParams, RopeScalingType};
695/// let params = LlamaContextParams::default();
696/// assert_eq!(params.n_ctx(), NonZeroU32::new(512), "n_ctx should be 512");
697/// assert_eq!(params.rope_scaling_type(), RopeScalingType::Unspecified);
698/// ```
699impl Default for LlamaContextParams {
700 fn default() -> Self {
701 let context_params = unsafe { llama_cpp_sys_4::llama_context_default_params() };
702 Self {
703 context_params,
704 attn_rot_disabled: false,
705 }
706 }
707}