Skip to main content

llama_cpp_4/context/
params.rs

1//! A safe wrapper around `llama_context_params`.
2use std::fmt::Debug;
3use std::num::NonZeroU32;
4
5/// A rusty wrapper around `llama_context_type`.
6//
7// Cast the sys constants to `u32` so the discriminants compile on both clang
8// (where bindgen emits `c_uint`) and MSVC (where it emits `c_int`).
9#[repr(u32)]
10#[derive(Copy, Clone, Debug, PartialEq, Eq)]
11pub enum LlamaContextType {
12    /// Default context (standard inference).
13    Default = llama_cpp_sys_4::LLAMA_CONTEXT_TYPE_DEFAULT as u32,
14    /// Multi-token-prediction draft context, used as the draft side of speculative decoding.
15    Mtp = llama_cpp_sys_4::LLAMA_CONTEXT_TYPE_MTP as u32,
16}
17
18impl From<llama_cpp_sys_4::llama_context_type> for LlamaContextType {
19    fn from(value: llama_cpp_sys_4::llama_context_type) -> Self {
20        if value == llama_cpp_sys_4::LLAMA_CONTEXT_TYPE_MTP {
21            Self::Mtp
22        } else {
23            Self::Default
24        }
25    }
26}
27
28impl From<LlamaContextType> for llama_cpp_sys_4::llama_context_type {
29    fn from(value: LlamaContextType) -> Self {
30        value as u32 as Self
31    }
32}
33
34/// A rusty wrapper around `rope_scaling_type`.
35#[repr(i8)]
36#[derive(Copy, Clone, Debug, PartialEq, Eq)]
37pub enum RopeScalingType {
38    /// The scaling type is unspecified
39    Unspecified = -1,
40    /// No scaling
41    None = 0,
42    /// Linear scaling
43    Linear = 1,
44    /// Yarn scaling
45    Yarn = 2,
46}
47
48/// Create a `RopeScalingType` from a `c_int` - returns `RopeScalingType::ScalingUnspecified` if
49/// the value is not recognized.
50impl From<i32> for RopeScalingType {
51    fn from(value: i32) -> Self {
52        match value {
53            0 => Self::None,
54            1 => Self::Linear,
55            2 => Self::Yarn,
56            _ => Self::Unspecified,
57        }
58    }
59}
60
61/// Create a `c_int` from a `RopeScalingType`.
62impl From<RopeScalingType> for i32 {
63    fn from(value: RopeScalingType) -> Self {
64        match value {
65            RopeScalingType::None => 0,
66            RopeScalingType::Linear => 1,
67            RopeScalingType::Yarn => 2,
68            RopeScalingType::Unspecified => -1,
69        }
70    }
71}
72
73/// A rusty wrapper around `LLAMA_POOLING_TYPE`.
74#[repr(i8)]
75#[derive(Copy, Clone, Debug, PartialEq, Eq)]
76pub enum LlamaPoolingType {
77    /// The pooling type is unspecified
78    Unspecified = -1,
79    /// No pooling    
80    None = 0,
81    /// Mean pooling
82    Mean = 1,
83    /// CLS pooling
84    Cls = 2,
85    /// Last pooling
86    Last = 3,
87}
88
89/// Create a `LlamaPoolingType` from a `c_int` - returns `LlamaPoolingType::Unspecified` if
90/// the value is not recognized.
91impl From<i32> for LlamaPoolingType {
92    fn from(value: i32) -> Self {
93        match value {
94            0 => Self::None,
95            1 => Self::Mean,
96            2 => Self::Cls,
97            3 => Self::Last,
98            _ => Self::Unspecified,
99        }
100    }
101}
102
103/// Create a `c_int` from a `LlamaPoolingType`.
104impl From<LlamaPoolingType> for i32 {
105    fn from(value: LlamaPoolingType) -> Self {
106        match value {
107            LlamaPoolingType::None => 0,
108            LlamaPoolingType::Mean => 1,
109            LlamaPoolingType::Cls => 2,
110            LlamaPoolingType::Last => 3,
111            LlamaPoolingType::Unspecified => -1,
112        }
113    }
114}
115
116/// A safe wrapper around `llama_context_params`.
117///
118/// Generally this should be created with [`Default::default()`] and then modified with `with_*` methods.
119///
120/// # Examples
121///
122/// ```rust
123/// # use std::num::NonZeroU32;
124/// use llama_cpp_4::context::params::LlamaContextParams;
125///
126/// let ctx_params = LlamaContextParams::default()
127///     .with_n_ctx(NonZeroU32::new(2048));
128///
129/// assert_eq!(ctx_params.n_ctx(), NonZeroU32::new(2048));
130/// ```
131#[derive(Debug, Clone)]
132#[allow(
133    missing_docs,
134    clippy::struct_excessive_bools,
135    clippy::module_name_repetitions
136)]
137pub struct LlamaContextParams {
138    pub(crate) context_params: llama_cpp_sys_4::llama_context_params,
139    /// When `true`, the `TurboQuant` attention rotation (PR #21038) will be
140    /// disabled for any context created from these params.
141    pub(crate) attn_rot_disabled: bool,
142}
143
144/// SAFETY: we do not currently allow setting or reading the pointers that cause this to not be automatically send or sync.
145unsafe impl Send for LlamaContextParams {}
146unsafe impl Sync for LlamaContextParams {}
147
148impl LlamaContextParams {
149    /// Set the side of the context
150    ///
151    /// # Examples
152    ///
153    /// ```rust
154    /// # use std::num::NonZeroU32;
155    /// use llama_cpp_4::context::params::LlamaContextParams;
156    /// let params = LlamaContextParams::default();
157    /// let params = params.with_n_ctx(NonZeroU32::new(2048));
158    /// assert_eq!(params.n_ctx(), NonZeroU32::new(2048));
159    /// ```
160    #[must_use]
161    pub fn with_n_ctx(mut self, n_ctx: Option<NonZeroU32>) -> Self {
162        self.context_params.n_ctx = n_ctx.map_or(0, std::num::NonZeroU32::get);
163        self
164    }
165
166    /// Get the size of the context.
167    ///
168    /// [`None`] if the context size is specified by the model and not the context.
169    ///
170    /// # Examples
171    ///
172    /// ```rust
173    /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
174    /// assert_eq!(params.n_ctx(), std::num::NonZeroU32::new(512));
175    #[must_use]
176    pub fn n_ctx(&self) -> Option<NonZeroU32> {
177        NonZeroU32::new(self.context_params.n_ctx)
178    }
179
180    /// Set the `n_batch`
181    ///
182    /// # Examples
183    ///
184    /// ```rust
185    /// # use std::num::NonZeroU32;
186    /// use llama_cpp_4::context::params::LlamaContextParams;
187    /// let params = LlamaContextParams::default()
188    ///     .with_n_batch(2048);
189    /// assert_eq!(params.n_batch(), 2048);
190    /// ```
191    #[must_use]
192    pub fn with_n_batch(mut self, n_batch: u32) -> Self {
193        self.context_params.n_batch = n_batch;
194        self
195    }
196
197    /// Get the `n_batch`
198    ///
199    /// # Examples
200    ///
201    /// ```rust
202    /// use llama_cpp_4::context::params::LlamaContextParams;
203    /// let params = LlamaContextParams::default();
204    /// assert_eq!(params.n_batch(), 2048);
205    /// ```
206    #[must_use]
207    pub fn n_batch(&self) -> u32 {
208        self.context_params.n_batch
209    }
210
211    /// Set the `n_ubatch`
212    ///
213    /// # Examples
214    ///
215    /// ```rust
216    /// # use std::num::NonZeroU32;
217    /// use llama_cpp_4::context::params::LlamaContextParams;
218    /// let params = LlamaContextParams::default()
219    ///     .with_n_ubatch(512);
220    /// assert_eq!(params.n_ubatch(), 512);
221    /// ```
222    #[must_use]
223    pub fn with_n_ubatch(mut self, n_ubatch: u32) -> Self {
224        self.context_params.n_ubatch = n_ubatch;
225        self
226    }
227
228    /// Get the `n_ubatch`
229    ///
230    /// # Examples
231    ///
232    /// ```rust
233    /// use llama_cpp_4::context::params::LlamaContextParams;
234    /// let params = LlamaContextParams::default();
235    /// assert_eq!(params.n_ubatch(), 512);
236    /// ```
237    #[must_use]
238    pub fn n_ubatch(&self) -> u32 {
239        self.context_params.n_ubatch
240    }
241
242    /// Set the context type (e.g. [`LlamaContextType::Mtp`] to load this context as a
243    /// multi-token-prediction draft head used by upstream's `draft-mtp` speculative decoder).
244    #[must_use]
245    pub fn with_ctx_type(mut self, ctx_type: LlamaContextType) -> Self {
246        self.context_params.ctx_type = ctx_type.into();
247        self
248    }
249
250    /// Get the configured context type.
251    #[must_use]
252    pub fn ctx_type(&self) -> LlamaContextType {
253        self.context_params.ctx_type.into()
254    }
255
256    /// Set the number of recurrent-state snapshots per sequence used for MTP rollback.
257    #[must_use]
258    pub fn with_n_rs_seq(mut self, n_rs_seq: u32) -> Self {
259        self.context_params.n_rs_seq = n_rs_seq;
260        self
261    }
262
263    /// Get the number of recurrent-state snapshots per sequence used for MTP rollback.
264    #[must_use]
265    pub fn n_rs_seq(&self) -> u32 {
266        self.context_params.n_rs_seq
267    }
268
269    /// Set the `flash_attention` parameter
270    ///
271    /// # Examples
272    ///
273    /// ```rust
274    /// use llama_cpp_4::context::params::LlamaContextParams;
275    /// let params = LlamaContextParams::default()
276    ///     .with_flash_attention(true);
277    /// assert_eq!(params.flash_attention(), true);
278    /// ```
279    #[must_use]
280    pub fn with_flash_attention(mut self, enabled: bool) -> Self {
281        self.context_params.flash_attn_type = if enabled {
282            llama_cpp_sys_4::LLAMA_FLASH_ATTN_TYPE_ENABLED
283        } else {
284            llama_cpp_sys_4::LLAMA_FLASH_ATTN_TYPE_DISABLED
285        };
286        self
287    }
288
289    /// Get the `flash_attention` parameter
290    ///
291    /// # Examples
292    ///
293    /// ```rust
294    /// use llama_cpp_4::context::params::LlamaContextParams;
295    /// let params = LlamaContextParams::default();
296    /// assert_eq!(params.flash_attention(), false);
297    /// ```
298    #[must_use]
299    pub fn flash_attention(&self) -> bool {
300        self.context_params.flash_attn_type == llama_cpp_sys_4::LLAMA_FLASH_ATTN_TYPE_ENABLED
301    }
302
303    /// Set the `offload_kqv` parameter to control offloading KV cache & KQV ops to GPU
304    ///
305    /// # Examples
306    ///
307    /// ```rust
308    /// use llama_cpp_4::context::params::LlamaContextParams;
309    /// let params = LlamaContextParams::default()
310    ///     .with_offload_kqv(false);
311    /// assert_eq!(params.offload_kqv(), false);
312    /// ```
313    #[must_use]
314    pub fn with_offload_kqv(mut self, enabled: bool) -> Self {
315        self.context_params.offload_kqv = enabled;
316        self
317    }
318
319    /// Get the `offload_kqv` parameter
320    ///
321    /// # Examples
322    ///
323    /// ```rust
324    /// use llama_cpp_4::context::params::LlamaContextParams;
325    /// let params = LlamaContextParams::default();
326    /// assert_eq!(params.offload_kqv(), true);
327    /// ```
328    #[must_use]
329    pub fn offload_kqv(&self) -> bool {
330        self.context_params.offload_kqv
331    }
332
333    /// Set the type of rope scaling.
334    ///
335    /// # Examples
336    ///
337    /// ```rust
338    /// use llama_cpp_4::context::params::{LlamaContextParams, RopeScalingType};
339    /// let params = LlamaContextParams::default()
340    ///     .with_rope_scaling_type(RopeScalingType::Linear);
341    /// assert_eq!(params.rope_scaling_type(), RopeScalingType::Linear);
342    /// ```
343    #[must_use]
344    pub fn with_rope_scaling_type(mut self, rope_scaling_type: RopeScalingType) -> Self {
345        self.context_params.rope_scaling_type = i32::from(rope_scaling_type);
346        self
347    }
348
349    /// Get the type of rope scaling.
350    ///
351    /// # Examples
352    ///
353    /// ```rust
354    /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
355    /// assert_eq!(params.rope_scaling_type(), llama_cpp_4::context::params::RopeScalingType::Unspecified);
356    /// ```
357    #[must_use]
358    pub fn rope_scaling_type(&self) -> RopeScalingType {
359        RopeScalingType::from(self.context_params.rope_scaling_type)
360    }
361
362    /// Set the rope frequency base.
363    ///
364    /// # Examples
365    ///
366    /// ```rust
367    /// use llama_cpp_4::context::params::LlamaContextParams;
368    /// let params = LlamaContextParams::default()
369    ///    .with_rope_freq_base(0.5);
370    /// assert_eq!(params.rope_freq_base(), 0.5);
371    /// ```
372    #[must_use]
373    pub fn with_rope_freq_base(mut self, rope_freq_base: f32) -> Self {
374        self.context_params.rope_freq_base = rope_freq_base;
375        self
376    }
377
378    /// Get the rope frequency base.
379    ///
380    /// # Examples
381    ///
382    /// ```rust
383    /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
384    /// assert_eq!(params.rope_freq_base(), 0.0);
385    /// ```
386    #[must_use]
387    pub fn rope_freq_base(&self) -> f32 {
388        self.context_params.rope_freq_base
389    }
390
391    /// Set the rope frequency scale.
392    ///
393    /// # Examples
394    ///
395    /// ```rust
396    /// use llama_cpp_4::context::params::LlamaContextParams;
397    /// let params = LlamaContextParams::default()
398    ///   .with_rope_freq_scale(0.5);
399    /// assert_eq!(params.rope_freq_scale(), 0.5);
400    /// ```
401    #[must_use]
402    pub fn with_rope_freq_scale(mut self, rope_freq_scale: f32) -> Self {
403        self.context_params.rope_freq_scale = rope_freq_scale;
404        self
405    }
406
407    /// Get the rope frequency scale.
408    ///
409    /// # Examples
410    ///
411    /// ```rust
412    /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
413    /// assert_eq!(params.rope_freq_scale(), 0.0);
414    /// ```
415    #[must_use]
416    pub fn rope_freq_scale(&self) -> f32 {
417        self.context_params.rope_freq_scale
418    }
419
420    /// Get the number of threads.
421    ///
422    /// # Examples
423    ///
424    /// ```rust
425    /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
426    /// assert_eq!(params.n_threads(), 4);
427    /// ```
428    #[must_use]
429    pub fn n_threads(&self) -> i32 {
430        self.context_params.n_threads
431    }
432
433    /// Get the number of threads allocated for batches.
434    ///
435    /// # Examples
436    ///
437    /// ```rust
438    /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
439    /// assert_eq!(params.n_threads_batch(), 4);
440    /// ```
441    #[must_use]
442    pub fn n_threads_batch(&self) -> i32 {
443        self.context_params.n_threads_batch
444    }
445
446    /// Set the number of threads.
447    ///
448    /// # Examples
449    ///
450    /// ```rust
451    /// use llama_cpp_4::context::params::LlamaContextParams;
452    /// let params = LlamaContextParams::default()
453    ///    .with_n_threads(8);
454    /// assert_eq!(params.n_threads(), 8);
455    /// ```
456    #[must_use]
457    pub fn with_n_threads(mut self, n_threads: i32) -> Self {
458        self.context_params.n_threads = n_threads;
459        self
460    }
461
462    /// Set the number of threads allocated for batches.
463    ///
464    /// # Examples
465    ///
466    /// ```rust
467    /// use llama_cpp_4::context::params::LlamaContextParams;
468    /// let params = LlamaContextParams::default()
469    ///    .with_n_threads_batch(8);
470    /// assert_eq!(params.n_threads_batch(), 8);
471    /// ```
472    #[must_use]
473    pub fn with_n_threads_batch(mut self, n_threads: i32) -> Self {
474        self.context_params.n_threads_batch = n_threads;
475        self
476    }
477
478    /// Check whether embeddings are enabled
479    ///
480    /// # Examples
481    ///
482    /// ```rust
483    /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
484    /// assert!(!params.embeddings());
485    /// ```
486    #[must_use]
487    pub fn embeddings(&self) -> bool {
488        self.context_params.embeddings
489    }
490
491    /// Enable the use of embeddings
492    ///
493    /// # Examples
494    ///
495    /// ```rust
496    /// use llama_cpp_4::context::params::LlamaContextParams;
497    /// let params = LlamaContextParams::default()
498    ///    .with_embeddings(true);
499    /// assert!(params.embeddings());
500    /// ```
501    #[must_use]
502    pub fn with_embeddings(mut self, embedding: bool) -> Self {
503        self.context_params.embeddings = embedding;
504        self
505    }
506
507    /// Set the evaluation callback.
508    ///
509    /// # Examples
510    ///
511    /// ```no_run
512    /// extern "C" fn cb_eval_fn(
513    ///     t: *mut llama_cpp_sys_4::ggml_tensor,
514    ///     ask: bool,
515    ///     user_data: *mut std::ffi::c_void,
516    /// ) -> bool {
517    ///     false
518    /// }
519    ///
520    /// use llama_cpp_4::context::params::LlamaContextParams;
521    /// let params = LlamaContextParams::default().with_cb_eval(Some(cb_eval_fn));
522    /// ```
523    #[must_use]
524    pub fn with_cb_eval(
525        mut self,
526        cb_eval: llama_cpp_sys_4::ggml_backend_sched_eval_callback,
527    ) -> Self {
528        self.context_params.cb_eval = cb_eval;
529        self
530    }
531
532    /// Set the evaluation callback user data.
533    ///
534    /// # Examples
535    ///
536    /// ```no_run
537    /// use llama_cpp_4::context::params::LlamaContextParams;
538    /// let params = LlamaContextParams::default();
539    /// let user_data = std::ptr::null_mut();
540    /// let params = params.with_cb_eval_user_data(user_data);
541    /// ```
542    #[must_use]
543    pub fn with_cb_eval_user_data(mut self, cb_eval_user_data: *mut std::ffi::c_void) -> Self {
544        self.context_params.cb_eval_user_data = cb_eval_user_data;
545        self
546    }
547
548    /// Attach a [`TensorCapture`](super::tensor_capture::TensorCapture) to
549    /// intercept intermediate tensor outputs during `decode()`.
550    ///
551    /// This sets up the `cb_eval` callback to capture tensors matching the
552    /// capture's filter (e.g. specific layer outputs). After `decode()` the
553    /// captured data can be read from the `TensorCapture`.
554    ///
555    /// # Example
556    ///
557    /// ```rust,ignore
558    /// use llama_cpp_4::context::params::LlamaContextParams;
559    /// use llama_cpp_4::context::tensor_capture::TensorCapture;
560    ///
561    /// let mut capture = TensorCapture::for_layers(&[13, 20, 27]);
562    /// let ctx_params = LlamaContextParams::default()
563    ///     .with_embeddings(true)
564    ///     .with_tensor_capture(&mut capture);
565    /// ```
566    #[must_use]
567    pub fn with_tensor_capture(self, capture: &mut super::tensor_capture::TensorCapture) -> Self {
568        self.with_cb_eval(Some(super::tensor_capture::tensor_capture_callback))
569            .with_cb_eval_user_data(
570                std::ptr::from_mut::<super::tensor_capture::TensorCapture>(capture)
571                    .cast::<std::ffi::c_void>(),
572            )
573    }
574
575    /// Set the storage type for the **K** (key) KV cache tensors.
576    ///
577    /// The default is `GgmlType::F16`.  Quantized types like `GgmlType::Q5_0`
578    /// or `GgmlType::Q4_0` reduce VRAM usage significantly; combining them with
579    /// `TurboQuant` attention rotation (the default) keeps quality high.
580    ///
581    /// # Examples
582    ///
583    /// ```rust
584    /// use llama_cpp_4::context::params::LlamaContextParams;
585    /// use llama_cpp_4::quantize::GgmlType;
586    /// let params = LlamaContextParams::default()
587    ///     .with_cache_type_k(GgmlType::Q5_0);
588    /// ```
589    #[must_use]
590    pub fn with_cache_type_k(mut self, ty: crate::quantize::GgmlType) -> Self {
591        self.context_params.type_k = ty as llama_cpp_sys_4::ggml_type;
592        self
593    }
594
595    /// Get the K-cache storage type.
596    #[must_use]
597    pub fn cache_type_k(&self) -> llama_cpp_sys_4::ggml_type {
598        self.context_params.type_k
599    }
600
601    /// Set the storage type for the **V** (value) KV cache tensors.
602    ///
603    /// See [`with_cache_type_k`](Self::with_cache_type_k) for details.
604    ///
605    /// # Examples
606    ///
607    /// ```rust
608    /// use llama_cpp_4::context::params::LlamaContextParams;
609    /// use llama_cpp_4::quantize::GgmlType;
610    /// let params = LlamaContextParams::default()
611    ///     .with_cache_type_v(GgmlType::Q5_0);
612    /// ```
613    #[must_use]
614    pub fn with_cache_type_v(mut self, ty: crate::quantize::GgmlType) -> Self {
615        self.context_params.type_v = ty as llama_cpp_sys_4::ggml_type;
616        self
617    }
618
619    /// Get the V-cache storage type.
620    #[must_use]
621    pub fn cache_type_v(&self) -> llama_cpp_sys_4::ggml_type {
622        self.context_params.type_v
623    }
624
625    /// Control the `TurboQuant` attention-rotation feature (llama.cpp PR #21038).
626    ///
627    /// By default, llama.cpp applies a Hadamard rotation to Q/K/V tensors
628    /// before writing them into the KV cache.  This significantly improves
629    /// quantized KV-cache quality at near-zero overhead, and is enabled
630    /// automatically for models whose head dimension is a power of two.
631    ///
632    /// Set `disabled = true` to opt out (equivalent to `LLAMA_ATTN_ROT_DISABLE=1`).
633    /// The env-var is applied just before the context is created and restored
634    /// afterwards, so this is safe to call from a single thread.
635    ///
636    /// # Examples
637    ///
638    /// ```rust
639    /// use llama_cpp_4::context::params::LlamaContextParams;
640    /// // Disable rotation for this context only:
641    /// let params = LlamaContextParams::default().with_attn_rot_disabled(true);
642    /// assert!(params.attn_rot_disabled());
643    /// ```
644    #[must_use]
645    pub fn with_attn_rot_disabled(mut self, disabled: bool) -> Self {
646        self.attn_rot_disabled = disabled;
647        self
648    }
649
650    /// Returns `true` if `TurboQuant` attention rotation is disabled for this context.
651    ///
652    /// ```rust
653    /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
654    /// assert!(!params.attn_rot_disabled());
655    /// ```
656    #[must_use]
657    pub fn attn_rot_disabled(&self) -> bool {
658        self.attn_rot_disabled
659    }
660
661    /// Set the type of pooling.
662    ///
663    /// # Examples
664    ///
665    /// ```rust
666    /// use llama_cpp_4::context::params::{LlamaContextParams, LlamaPoolingType};
667    /// let params = LlamaContextParams::default()
668    ///     .with_pooling_type(LlamaPoolingType::Last);
669    /// assert_eq!(params.pooling_type(), LlamaPoolingType::Last);
670    /// ```
671    #[must_use]
672    pub fn with_pooling_type(mut self, pooling_type: LlamaPoolingType) -> Self {
673        self.context_params.pooling_type = i32::from(pooling_type);
674        self
675    }
676
677    /// Get the type of pooling.
678    ///
679    /// # Examples
680    ///
681    /// ```rust
682    /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
683    /// assert_eq!(params.pooling_type(), llama_cpp_4::context::params::LlamaPoolingType::Unspecified);
684    /// ```
685    #[must_use]
686    pub fn pooling_type(&self) -> LlamaPoolingType {
687        LlamaPoolingType::from(self.context_params.pooling_type)
688    }
689}
690
691/// Default parameters for `LlamaContext`. (as defined in llama.cpp by `llama_context_default_params`)
692/// ```
693/// # use std::num::NonZeroU32;
694/// use llama_cpp_4::context::params::{LlamaContextParams, RopeScalingType};
695/// let params = LlamaContextParams::default();
696/// assert_eq!(params.n_ctx(), NonZeroU32::new(512), "n_ctx should be 512");
697/// assert_eq!(params.rope_scaling_type(), RopeScalingType::Unspecified);
698/// ```
699impl Default for LlamaContextParams {
700    fn default() -> Self {
701        let context_params = unsafe { llama_cpp_sys_4::llama_context_default_params() };
702        Self {
703            context_params,
704            attn_rot_disabled: false,
705        }
706    }
707}