Skip to main content

llama_cpp_4/context/
params.rs

1//! A safe wrapper around `llama_context_params`.
2use std::fmt::Debug;
3use std::num::NonZeroU32;
4
5/// A rusty wrapper around `llama_context_type`.
6//
7// Cast the sys constants to `u32` so the discriminants compile on both clang
8// (where bindgen emits `c_uint`) and MSVC (where it emits `c_int`).
9#[repr(u32)]
10#[derive(Copy, Clone, Debug, PartialEq, Eq)]
11pub enum LlamaContextType {
12    /// Default context (standard inference).
13    Default = llama_cpp_sys_4::LLAMA_CONTEXT_TYPE_DEFAULT as u32,
14    /// Multi-token-prediction draft context, used as the draft side of
15    /// speculative decoding. Pair with [`crate::mtp::MtpSession`].
16    Mtp = llama_cpp_sys_4::LLAMA_CONTEXT_TYPE_MTP as u32,
17}
18
19impl From<llama_cpp_sys_4::llama_context_type> for LlamaContextType {
20    fn from(value: llama_cpp_sys_4::llama_context_type) -> Self {
21        if value == llama_cpp_sys_4::LLAMA_CONTEXT_TYPE_MTP {
22            Self::Mtp
23        } else {
24            Self::Default
25        }
26    }
27}
28
29impl From<LlamaContextType> for llama_cpp_sys_4::llama_context_type {
30    fn from(value: LlamaContextType) -> Self {
31        value as u32 as Self
32    }
33}
34
35/// A rusty wrapper around `rope_scaling_type`.
36#[repr(i8)]
37#[derive(Copy, Clone, Debug, PartialEq, Eq)]
38pub enum RopeScalingType {
39    /// The scaling type is unspecified
40    Unspecified = -1,
41    /// No scaling
42    None = 0,
43    /// Linear scaling
44    Linear = 1,
45    /// Yarn scaling
46    Yarn = 2,
47}
48
49/// Create a `RopeScalingType` from a `c_int` - returns `RopeScalingType::ScalingUnspecified` if
50/// the value is not recognized.
51impl From<i32> for RopeScalingType {
52    fn from(value: i32) -> Self {
53        match value {
54            0 => Self::None,
55            1 => Self::Linear,
56            2 => Self::Yarn,
57            _ => Self::Unspecified,
58        }
59    }
60}
61
62/// Create a `c_int` from a `RopeScalingType`.
63impl From<RopeScalingType> for i32 {
64    fn from(value: RopeScalingType) -> Self {
65        match value {
66            RopeScalingType::None => 0,
67            RopeScalingType::Linear => 1,
68            RopeScalingType::Yarn => 2,
69            RopeScalingType::Unspecified => -1,
70        }
71    }
72}
73
74/// A rusty wrapper around `LLAMA_POOLING_TYPE`.
75#[repr(i8)]
76#[derive(Copy, Clone, Debug, PartialEq, Eq)]
77pub enum LlamaPoolingType {
78    /// The pooling type is unspecified
79    Unspecified = -1,
80    /// No pooling    
81    None = 0,
82    /// Mean pooling
83    Mean = 1,
84    /// CLS pooling
85    Cls = 2,
86    /// Last pooling
87    Last = 3,
88}
89
90/// Create a `LlamaPoolingType` from a `c_int` - returns `LlamaPoolingType::Unspecified` if
91/// the value is not recognized.
92impl From<i32> for LlamaPoolingType {
93    fn from(value: i32) -> Self {
94        match value {
95            0 => Self::None,
96            1 => Self::Mean,
97            2 => Self::Cls,
98            3 => Self::Last,
99            _ => Self::Unspecified,
100        }
101    }
102}
103
104/// Create a `c_int` from a `LlamaPoolingType`.
105impl From<LlamaPoolingType> for i32 {
106    fn from(value: LlamaPoolingType) -> Self {
107        match value {
108            LlamaPoolingType::None => 0,
109            LlamaPoolingType::Mean => 1,
110            LlamaPoolingType::Cls => 2,
111            LlamaPoolingType::Last => 3,
112            LlamaPoolingType::Unspecified => -1,
113        }
114    }
115}
116
117/// A safe wrapper around `llama_context_params`.
118///
119/// Generally this should be created with [`Default::default()`] and then modified with `with_*` methods.
120///
121/// # Examples
122///
123/// ```rust
124/// # use std::num::NonZeroU32;
125/// use llama_cpp_4::context::params::LlamaContextParams;
126///
127/// let ctx_params = LlamaContextParams::default()
128///     .with_n_ctx(NonZeroU32::new(2048));
129///
130/// assert_eq!(ctx_params.n_ctx(), NonZeroU32::new(2048));
131/// ```
132#[derive(Debug, Clone)]
133#[allow(
134    missing_docs,
135    clippy::struct_excessive_bools,
136    clippy::module_name_repetitions
137)]
138pub struct LlamaContextParams {
139    pub(crate) context_params: llama_cpp_sys_4::llama_context_params,
140    /// When `true`, the `TurboQuant` attention rotation (PR #21038) will be
141    /// disabled for any context created from these params.
142    pub(crate) attn_rot_disabled: bool,
143}
144
145/// SAFETY: we do not currently allow setting or reading the pointers that cause this to not be automatically send or sync.
146unsafe impl Send for LlamaContextParams {}
147unsafe impl Sync for LlamaContextParams {}
148
149impl LlamaContextParams {
150    /// Set the side of the context
151    ///
152    /// # Examples
153    ///
154    /// ```rust
155    /// # use std::num::NonZeroU32;
156    /// use llama_cpp_4::context::params::LlamaContextParams;
157    /// let params = LlamaContextParams::default();
158    /// let params = params.with_n_ctx(NonZeroU32::new(2048));
159    /// assert_eq!(params.n_ctx(), NonZeroU32::new(2048));
160    /// ```
161    #[must_use]
162    pub fn with_n_ctx(mut self, n_ctx: Option<NonZeroU32>) -> Self {
163        self.context_params.n_ctx = n_ctx.map_or(0, std::num::NonZeroU32::get);
164        self
165    }
166
167    /// Get the size of the context.
168    ///
169    /// [`None`] if the context size is specified by the model and not the context.
170    ///
171    /// # Examples
172    ///
173    /// ```rust
174    /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
175    /// assert_eq!(params.n_ctx(), std::num::NonZeroU32::new(512));
176    #[must_use]
177    pub fn n_ctx(&self) -> Option<NonZeroU32> {
178        NonZeroU32::new(self.context_params.n_ctx)
179    }
180
181    /// Set the `n_batch`
182    ///
183    /// # Examples
184    ///
185    /// ```rust
186    /// # use std::num::NonZeroU32;
187    /// use llama_cpp_4::context::params::LlamaContextParams;
188    /// let params = LlamaContextParams::default()
189    ///     .with_n_batch(2048);
190    /// assert_eq!(params.n_batch(), 2048);
191    /// ```
192    #[must_use]
193    pub fn with_n_batch(mut self, n_batch: u32) -> Self {
194        self.context_params.n_batch = n_batch;
195        self
196    }
197
198    /// Get the `n_batch`
199    ///
200    /// # Examples
201    ///
202    /// ```rust
203    /// use llama_cpp_4::context::params::LlamaContextParams;
204    /// let params = LlamaContextParams::default();
205    /// assert_eq!(params.n_batch(), 2048);
206    /// ```
207    #[must_use]
208    pub fn n_batch(&self) -> u32 {
209        self.context_params.n_batch
210    }
211
212    /// Set the `n_ubatch`
213    ///
214    /// # Examples
215    ///
216    /// ```rust
217    /// # use std::num::NonZeroU32;
218    /// use llama_cpp_4::context::params::LlamaContextParams;
219    /// let params = LlamaContextParams::default()
220    ///     .with_n_ubatch(512);
221    /// assert_eq!(params.n_ubatch(), 512);
222    /// ```
223    #[must_use]
224    pub fn with_n_ubatch(mut self, n_ubatch: u32) -> Self {
225        self.context_params.n_ubatch = n_ubatch;
226        self
227    }
228
229    /// Get the `n_ubatch`
230    ///
231    /// # Examples
232    ///
233    /// ```rust
234    /// use llama_cpp_4::context::params::LlamaContextParams;
235    /// let params = LlamaContextParams::default();
236    /// assert_eq!(params.n_ubatch(), 512);
237    /// ```
238    #[must_use]
239    pub fn n_ubatch(&self) -> u32 {
240        self.context_params.n_ubatch
241    }
242
243    /// Set the context type (e.g. [`LlamaContextType::Mtp`] for the draft context in
244    /// [`crate::mtp::MtpSession`]).
245    #[must_use]
246    pub fn with_ctx_type(mut self, ctx_type: LlamaContextType) -> Self {
247        self.context_params.ctx_type = ctx_type.into();
248        self
249    }
250
251    /// Get the configured context type.
252    #[must_use]
253    pub fn ctx_type(&self) -> LlamaContextType {
254        self.context_params.ctx_type.into()
255    }
256
257    /// Set the number of recurrent-state snapshots per sequence (MTP rollback).
258    ///
259    /// Must be `>=` [`MtpSessionConfig::n_draft_max`](crate::mtp::MtpSessionConfig::n_draft_max)
260    /// on the draft context. See [`crate::mtp`].
261    #[must_use]
262    pub fn with_n_rs_seq(mut self, n_rs_seq: u32) -> Self {
263        self.context_params.n_rs_seq = n_rs_seq;
264        self
265    }
266
267    /// Get the number of recurrent-state snapshots per sequence used for MTP rollback.
268    #[must_use]
269    pub fn n_rs_seq(&self) -> u32 {
270        self.context_params.n_rs_seq
271    }
272
273    /// Set the `flash_attention` parameter
274    ///
275    /// # Examples
276    ///
277    /// ```rust
278    /// use llama_cpp_4::context::params::LlamaContextParams;
279    /// let params = LlamaContextParams::default()
280    ///     .with_flash_attention(true);
281    /// assert_eq!(params.flash_attention(), true);
282    /// ```
283    #[must_use]
284    pub fn with_flash_attention(mut self, enabled: bool) -> Self {
285        self.context_params.flash_attn_type = if enabled {
286            llama_cpp_sys_4::LLAMA_FLASH_ATTN_TYPE_ENABLED
287        } else {
288            llama_cpp_sys_4::LLAMA_FLASH_ATTN_TYPE_DISABLED
289        };
290        self
291    }
292
293    /// Get the `flash_attention` parameter
294    ///
295    /// # Examples
296    ///
297    /// ```rust
298    /// use llama_cpp_4::context::params::LlamaContextParams;
299    /// let params = LlamaContextParams::default();
300    /// assert_eq!(params.flash_attention(), false);
301    /// ```
302    #[must_use]
303    pub fn flash_attention(&self) -> bool {
304        self.context_params.flash_attn_type == llama_cpp_sys_4::LLAMA_FLASH_ATTN_TYPE_ENABLED
305    }
306
307    /// Set the `offload_kqv` parameter to control offloading KV cache & KQV ops to GPU
308    ///
309    /// # Examples
310    ///
311    /// ```rust
312    /// use llama_cpp_4::context::params::LlamaContextParams;
313    /// let params = LlamaContextParams::default()
314    ///     .with_offload_kqv(false);
315    /// assert_eq!(params.offload_kqv(), false);
316    /// ```
317    #[must_use]
318    pub fn with_offload_kqv(mut self, enabled: bool) -> Self {
319        self.context_params.offload_kqv = enabled;
320        self
321    }
322
323    /// Get the `offload_kqv` parameter
324    ///
325    /// # Examples
326    ///
327    /// ```rust
328    /// use llama_cpp_4::context::params::LlamaContextParams;
329    /// let params = LlamaContextParams::default();
330    /// assert_eq!(params.offload_kqv(), true);
331    /// ```
332    #[must_use]
333    pub fn offload_kqv(&self) -> bool {
334        self.context_params.offload_kqv
335    }
336
337    /// Set the type of rope scaling.
338    ///
339    /// # Examples
340    ///
341    /// ```rust
342    /// use llama_cpp_4::context::params::{LlamaContextParams, RopeScalingType};
343    /// let params = LlamaContextParams::default()
344    ///     .with_rope_scaling_type(RopeScalingType::Linear);
345    /// assert_eq!(params.rope_scaling_type(), RopeScalingType::Linear);
346    /// ```
347    #[must_use]
348    pub fn with_rope_scaling_type(mut self, rope_scaling_type: RopeScalingType) -> Self {
349        self.context_params.rope_scaling_type = i32::from(rope_scaling_type);
350        self
351    }
352
353    /// Get the type of rope scaling.
354    ///
355    /// # Examples
356    ///
357    /// ```rust
358    /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
359    /// assert_eq!(params.rope_scaling_type(), llama_cpp_4::context::params::RopeScalingType::Unspecified);
360    /// ```
361    #[must_use]
362    pub fn rope_scaling_type(&self) -> RopeScalingType {
363        RopeScalingType::from(self.context_params.rope_scaling_type)
364    }
365
366    /// Set the rope frequency base.
367    ///
368    /// # Examples
369    ///
370    /// ```rust
371    /// use llama_cpp_4::context::params::LlamaContextParams;
372    /// let params = LlamaContextParams::default()
373    ///    .with_rope_freq_base(0.5);
374    /// assert_eq!(params.rope_freq_base(), 0.5);
375    /// ```
376    #[must_use]
377    pub fn with_rope_freq_base(mut self, rope_freq_base: f32) -> Self {
378        self.context_params.rope_freq_base = rope_freq_base;
379        self
380    }
381
382    /// Get the rope frequency base.
383    ///
384    /// # Examples
385    ///
386    /// ```rust
387    /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
388    /// assert_eq!(params.rope_freq_base(), 0.0);
389    /// ```
390    #[must_use]
391    pub fn rope_freq_base(&self) -> f32 {
392        self.context_params.rope_freq_base
393    }
394
395    /// Set the rope frequency scale.
396    ///
397    /// # Examples
398    ///
399    /// ```rust
400    /// use llama_cpp_4::context::params::LlamaContextParams;
401    /// let params = LlamaContextParams::default()
402    ///   .with_rope_freq_scale(0.5);
403    /// assert_eq!(params.rope_freq_scale(), 0.5);
404    /// ```
405    #[must_use]
406    pub fn with_rope_freq_scale(mut self, rope_freq_scale: f32) -> Self {
407        self.context_params.rope_freq_scale = rope_freq_scale;
408        self
409    }
410
411    /// Get the rope frequency scale.
412    ///
413    /// # Examples
414    ///
415    /// ```rust
416    /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
417    /// assert_eq!(params.rope_freq_scale(), 0.0);
418    /// ```
419    #[must_use]
420    pub fn rope_freq_scale(&self) -> f32 {
421        self.context_params.rope_freq_scale
422    }
423
424    /// Get the number of threads.
425    ///
426    /// # Examples
427    ///
428    /// ```rust
429    /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
430    /// assert_eq!(params.n_threads(), 4);
431    /// ```
432    #[must_use]
433    pub fn n_threads(&self) -> i32 {
434        self.context_params.n_threads
435    }
436
437    /// Get the number of threads allocated for batches.
438    ///
439    /// # Examples
440    ///
441    /// ```rust
442    /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
443    /// assert_eq!(params.n_threads_batch(), 4);
444    /// ```
445    #[must_use]
446    pub fn n_threads_batch(&self) -> i32 {
447        self.context_params.n_threads_batch
448    }
449
450    /// Set the number of threads.
451    ///
452    /// # Examples
453    ///
454    /// ```rust
455    /// use llama_cpp_4::context::params::LlamaContextParams;
456    /// let params = LlamaContextParams::default()
457    ///    .with_n_threads(8);
458    /// assert_eq!(params.n_threads(), 8);
459    /// ```
460    #[must_use]
461    pub fn with_n_threads(mut self, n_threads: i32) -> Self {
462        self.context_params.n_threads = n_threads;
463        self
464    }
465
466    /// Set the number of threads allocated for batches.
467    ///
468    /// # Examples
469    ///
470    /// ```rust
471    /// use llama_cpp_4::context::params::LlamaContextParams;
472    /// let params = LlamaContextParams::default()
473    ///    .with_n_threads_batch(8);
474    /// assert_eq!(params.n_threads_batch(), 8);
475    /// ```
476    #[must_use]
477    pub fn with_n_threads_batch(mut self, n_threads: i32) -> Self {
478        self.context_params.n_threads_batch = n_threads;
479        self
480    }
481
482    /// Check whether embeddings are enabled
483    ///
484    /// # Examples
485    ///
486    /// ```rust
487    /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
488    /// assert!(!params.embeddings());
489    /// ```
490    #[must_use]
491    pub fn embeddings(&self) -> bool {
492        self.context_params.embeddings
493    }
494
495    /// Enable the use of embeddings
496    ///
497    /// # Examples
498    ///
499    /// ```rust
500    /// use llama_cpp_4::context::params::LlamaContextParams;
501    /// let params = LlamaContextParams::default()
502    ///    .with_embeddings(true);
503    /// assert!(params.embeddings());
504    /// ```
505    #[must_use]
506    pub fn with_embeddings(mut self, embedding: bool) -> Self {
507        self.context_params.embeddings = embedding;
508        self
509    }
510
511    /// Set the evaluation callback.
512    ///
513    /// # Examples
514    ///
515    /// ```no_run
516    /// extern "C" fn cb_eval_fn(
517    ///     t: *mut llama_cpp_sys_4::ggml_tensor,
518    ///     ask: bool,
519    ///     user_data: *mut std::ffi::c_void,
520    /// ) -> bool {
521    ///     false
522    /// }
523    ///
524    /// use llama_cpp_4::context::params::LlamaContextParams;
525    /// let params = LlamaContextParams::default().with_cb_eval(Some(cb_eval_fn));
526    /// ```
527    #[must_use]
528    pub fn with_cb_eval(
529        mut self,
530        cb_eval: llama_cpp_sys_4::ggml_backend_sched_eval_callback,
531    ) -> Self {
532        self.context_params.cb_eval = cb_eval;
533        self
534    }
535
536    /// Set the evaluation callback user data.
537    ///
538    /// # Examples
539    ///
540    /// ```no_run
541    /// use llama_cpp_4::context::params::LlamaContextParams;
542    /// let params = LlamaContextParams::default();
543    /// let user_data = std::ptr::null_mut();
544    /// let params = params.with_cb_eval_user_data(user_data);
545    /// ```
546    #[must_use]
547    pub fn with_cb_eval_user_data(mut self, cb_eval_user_data: *mut std::ffi::c_void) -> Self {
548        self.context_params.cb_eval_user_data = cb_eval_user_data;
549        self
550    }
551
552    /// Attach a [`TensorCapture`](super::tensor_capture::TensorCapture) to
553    /// intercept intermediate tensor outputs during `decode()`.
554    ///
555    /// This sets up the `cb_eval` callback to capture tensors matching the
556    /// capture's filter (e.g. specific layer outputs). After `decode()` the
557    /// captured data can be read from the `TensorCapture`.
558    ///
559    /// # Example
560    ///
561    /// ```rust,ignore
562    /// use llama_cpp_4::context::params::LlamaContextParams;
563    /// use llama_cpp_4::context::tensor_capture::TensorCapture;
564    ///
565    /// let mut capture = TensorCapture::for_layers(&[13, 20, 27]);
566    /// let ctx_params = LlamaContextParams::default()
567    ///     .with_embeddings(true)
568    ///     .with_tensor_capture(&mut capture);
569    /// ```
570    #[must_use]
571    pub fn with_tensor_capture(self, capture: &mut super::tensor_capture::TensorCapture) -> Self {
572        self.with_cb_eval(Some(super::tensor_capture::tensor_capture_callback))
573            .with_cb_eval_user_data(
574                std::ptr::from_mut::<super::tensor_capture::TensorCapture>(capture)
575                    .cast::<std::ffi::c_void>(),
576            )
577    }
578
579    /// Set the storage type for the **K** (key) KV cache tensors.
580    ///
581    /// The default is `GgmlType::F16`.  Quantized types like `GgmlType::Q5_0`
582    /// or `GgmlType::Q4_0` reduce VRAM usage significantly; combining them with
583    /// `TurboQuant` attention rotation (the default) keeps quality high.
584    ///
585    /// # Examples
586    ///
587    /// ```rust
588    /// use llama_cpp_4::context::params::LlamaContextParams;
589    /// use llama_cpp_4::quantize::GgmlType;
590    /// let params = LlamaContextParams::default()
591    ///     .with_cache_type_k(GgmlType::Q5_0);
592    /// ```
593    #[must_use]
594    pub fn with_cache_type_k(mut self, ty: crate::quantize::GgmlType) -> Self {
595        self.context_params.type_k = ty as llama_cpp_sys_4::ggml_type;
596        self
597    }
598
599    /// Get the K-cache storage type.
600    #[must_use]
601    pub fn cache_type_k(&self) -> llama_cpp_sys_4::ggml_type {
602        self.context_params.type_k
603    }
604
605    /// Set the storage type for the **V** (value) KV cache tensors.
606    ///
607    /// See [`with_cache_type_k`](Self::with_cache_type_k) for details.
608    ///
609    /// # Examples
610    ///
611    /// ```rust
612    /// use llama_cpp_4::context::params::LlamaContextParams;
613    /// use llama_cpp_4::quantize::GgmlType;
614    /// let params = LlamaContextParams::default()
615    ///     .with_cache_type_v(GgmlType::Q5_0);
616    /// ```
617    #[must_use]
618    pub fn with_cache_type_v(mut self, ty: crate::quantize::GgmlType) -> Self {
619        self.context_params.type_v = ty as llama_cpp_sys_4::ggml_type;
620        self
621    }
622
623    /// Get the V-cache storage type.
624    #[must_use]
625    pub fn cache_type_v(&self) -> llama_cpp_sys_4::ggml_type {
626        self.context_params.type_v
627    }
628
629    /// Control the `TurboQuant` attention-rotation feature (llama.cpp PR #21038).
630    ///
631    /// By default, llama.cpp applies a Hadamard rotation to Q/K/V tensors
632    /// before writing them into the KV cache.  This significantly improves
633    /// quantized KV-cache quality at near-zero overhead, and is enabled
634    /// automatically for models whose head dimension is a power of two.
635    ///
636    /// Set `disabled = true` to opt out (equivalent to `LLAMA_ATTN_ROT_DISABLE=1`).
637    /// The env-var is applied just before the context is created and restored
638    /// afterwards, so this is safe to call from a single thread.
639    ///
640    /// # Examples
641    ///
642    /// ```rust
643    /// use llama_cpp_4::context::params::LlamaContextParams;
644    /// // Disable rotation for this context only:
645    /// let params = LlamaContextParams::default().with_attn_rot_disabled(true);
646    /// assert!(params.attn_rot_disabled());
647    /// ```
648    #[must_use]
649    pub fn with_attn_rot_disabled(mut self, disabled: bool) -> Self {
650        self.attn_rot_disabled = disabled;
651        self
652    }
653
654    /// Returns `true` if `TurboQuant` attention rotation is disabled for this context.
655    ///
656    /// ```rust
657    /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
658    /// assert!(!params.attn_rot_disabled());
659    /// ```
660    #[must_use]
661    pub fn attn_rot_disabled(&self) -> bool {
662        self.attn_rot_disabled
663    }
664
665    /// Set the type of pooling.
666    ///
667    /// # Examples
668    ///
669    /// ```rust
670    /// use llama_cpp_4::context::params::{LlamaContextParams, LlamaPoolingType};
671    /// let params = LlamaContextParams::default()
672    ///     .with_pooling_type(LlamaPoolingType::Last);
673    /// assert_eq!(params.pooling_type(), LlamaPoolingType::Last);
674    /// ```
675    #[must_use]
676    pub fn with_pooling_type(mut self, pooling_type: LlamaPoolingType) -> Self {
677        self.context_params.pooling_type = i32::from(pooling_type);
678        self
679    }
680
681    /// Get the type of pooling.
682    ///
683    /// # Examples
684    ///
685    /// ```rust
686    /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
687    /// assert_eq!(params.pooling_type(), llama_cpp_4::context::params::LlamaPoolingType::Unspecified);
688    /// ```
689    #[must_use]
690    pub fn pooling_type(&self) -> LlamaPoolingType {
691        LlamaPoolingType::from(self.context_params.pooling_type)
692    }
693}
694
695/// Default parameters for `LlamaContext`. (as defined in llama.cpp by `llama_context_default_params`)
696/// ```
697/// # use std::num::NonZeroU32;
698/// use llama_cpp_4::context::params::{LlamaContextParams, RopeScalingType};
699/// let params = LlamaContextParams::default();
700/// assert_eq!(params.n_ctx(), NonZeroU32::new(512), "n_ctx should be 512");
701/// assert_eq!(params.rope_scaling_type(), RopeScalingType::Unspecified);
702/// ```
703impl Default for LlamaContextParams {
704    fn default() -> Self {
705        let context_params = unsafe { llama_cpp_sys_4::llama_context_default_params() };
706        Self {
707            context_params,
708            attn_rot_disabled: false,
709        }
710    }
711}