llama_cpp_4/context/params.rs
1//! A safe wrapper around `llama_context_params`.
2use std::fmt::Debug;
3use std::num::NonZeroU32;
4
5/// A rusty wrapper around `llama_context_type`.
6//
7// Cast the sys constants to `u32` so the discriminants compile on both clang
8// (where bindgen emits `c_uint`) and MSVC (where it emits `c_int`).
9#[repr(u32)]
10#[derive(Copy, Clone, Debug, PartialEq, Eq)]
11pub enum LlamaContextType {
12 /// Default context (standard inference).
13 Default = llama_cpp_sys_4::LLAMA_CONTEXT_TYPE_DEFAULT as u32,
14 /// Multi-token-prediction draft context, used as the draft side of
15 /// speculative decoding. Pair with [`crate::mtp::MtpSession`].
16 Mtp = llama_cpp_sys_4::LLAMA_CONTEXT_TYPE_MTP as u32,
17}
18
19impl From<llama_cpp_sys_4::llama_context_type> for LlamaContextType {
20 fn from(value: llama_cpp_sys_4::llama_context_type) -> Self {
21 if value == llama_cpp_sys_4::LLAMA_CONTEXT_TYPE_MTP {
22 Self::Mtp
23 } else {
24 Self::Default
25 }
26 }
27}
28
29impl From<LlamaContextType> for llama_cpp_sys_4::llama_context_type {
30 fn from(value: LlamaContextType) -> Self {
31 value as u32 as Self
32 }
33}
34
35/// A rusty wrapper around `rope_scaling_type`.
36#[repr(i8)]
37#[derive(Copy, Clone, Debug, PartialEq, Eq)]
38pub enum RopeScalingType {
39 /// The scaling type is unspecified
40 Unspecified = -1,
41 /// No scaling
42 None = 0,
43 /// Linear scaling
44 Linear = 1,
45 /// Yarn scaling
46 Yarn = 2,
47}
48
49/// Create a `RopeScalingType` from a `c_int` - returns `RopeScalingType::ScalingUnspecified` if
50/// the value is not recognized.
51impl From<i32> for RopeScalingType {
52 fn from(value: i32) -> Self {
53 match value {
54 0 => Self::None,
55 1 => Self::Linear,
56 2 => Self::Yarn,
57 _ => Self::Unspecified,
58 }
59 }
60}
61
62/// Create a `c_int` from a `RopeScalingType`.
63impl From<RopeScalingType> for i32 {
64 fn from(value: RopeScalingType) -> Self {
65 match value {
66 RopeScalingType::None => 0,
67 RopeScalingType::Linear => 1,
68 RopeScalingType::Yarn => 2,
69 RopeScalingType::Unspecified => -1,
70 }
71 }
72}
73
74/// A rusty wrapper around `LLAMA_POOLING_TYPE`.
75#[repr(i8)]
76#[derive(Copy, Clone, Debug, PartialEq, Eq)]
77pub enum LlamaPoolingType {
78 /// The pooling type is unspecified
79 Unspecified = -1,
80 /// No pooling
81 None = 0,
82 /// Mean pooling
83 Mean = 1,
84 /// CLS pooling
85 Cls = 2,
86 /// Last pooling
87 Last = 3,
88}
89
90/// Create a `LlamaPoolingType` from a `c_int` - returns `LlamaPoolingType::Unspecified` if
91/// the value is not recognized.
92impl From<i32> for LlamaPoolingType {
93 fn from(value: i32) -> Self {
94 match value {
95 0 => Self::None,
96 1 => Self::Mean,
97 2 => Self::Cls,
98 3 => Self::Last,
99 _ => Self::Unspecified,
100 }
101 }
102}
103
104/// Create a `c_int` from a `LlamaPoolingType`.
105impl From<LlamaPoolingType> for i32 {
106 fn from(value: LlamaPoolingType) -> Self {
107 match value {
108 LlamaPoolingType::None => 0,
109 LlamaPoolingType::Mean => 1,
110 LlamaPoolingType::Cls => 2,
111 LlamaPoolingType::Last => 3,
112 LlamaPoolingType::Unspecified => -1,
113 }
114 }
115}
116
117/// A safe wrapper around `llama_context_params`.
118///
119/// Generally this should be created with [`Default::default()`] and then modified with `with_*` methods.
120///
121/// # Examples
122///
123/// ```rust
124/// # use std::num::NonZeroU32;
125/// use llama_cpp_4::context::params::LlamaContextParams;
126///
127/// let ctx_params = LlamaContextParams::default()
128/// .with_n_ctx(NonZeroU32::new(2048));
129///
130/// assert_eq!(ctx_params.n_ctx(), NonZeroU32::new(2048));
131/// ```
132#[derive(Debug, Clone)]
133#[allow(
134 missing_docs,
135 clippy::struct_excessive_bools,
136 clippy::module_name_repetitions
137)]
138pub struct LlamaContextParams {
139 pub(crate) context_params: llama_cpp_sys_4::llama_context_params,
140 /// When `true`, the `TurboQuant` attention rotation (PR #21038) will be
141 /// disabled for any context created from these params.
142 pub(crate) attn_rot_disabled: bool,
143}
144
145/// SAFETY: we do not currently allow setting or reading the pointers that cause this to not be automatically send or sync.
146unsafe impl Send for LlamaContextParams {}
147unsafe impl Sync for LlamaContextParams {}
148
149impl LlamaContextParams {
150 /// Set the side of the context
151 ///
152 /// # Examples
153 ///
154 /// ```rust
155 /// # use std::num::NonZeroU32;
156 /// use llama_cpp_4::context::params::LlamaContextParams;
157 /// let params = LlamaContextParams::default();
158 /// let params = params.with_n_ctx(NonZeroU32::new(2048));
159 /// assert_eq!(params.n_ctx(), NonZeroU32::new(2048));
160 /// ```
161 #[must_use]
162 pub fn with_n_ctx(mut self, n_ctx: Option<NonZeroU32>) -> Self {
163 self.context_params.n_ctx = n_ctx.map_or(0, std::num::NonZeroU32::get);
164 self
165 }
166
167 /// Get the size of the context.
168 ///
169 /// [`None`] if the context size is specified by the model and not the context.
170 ///
171 /// # Examples
172 ///
173 /// ```rust
174 /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
175 /// assert_eq!(params.n_ctx(), std::num::NonZeroU32::new(512));
176 #[must_use]
177 pub fn n_ctx(&self) -> Option<NonZeroU32> {
178 NonZeroU32::new(self.context_params.n_ctx)
179 }
180
181 /// Set the `n_batch`
182 ///
183 /// # Examples
184 ///
185 /// ```rust
186 /// # use std::num::NonZeroU32;
187 /// use llama_cpp_4::context::params::LlamaContextParams;
188 /// let params = LlamaContextParams::default()
189 /// .with_n_batch(2048);
190 /// assert_eq!(params.n_batch(), 2048);
191 /// ```
192 #[must_use]
193 pub fn with_n_batch(mut self, n_batch: u32) -> Self {
194 self.context_params.n_batch = n_batch;
195 self
196 }
197
198 /// Get the `n_batch`
199 ///
200 /// # Examples
201 ///
202 /// ```rust
203 /// use llama_cpp_4::context::params::LlamaContextParams;
204 /// let params = LlamaContextParams::default();
205 /// assert_eq!(params.n_batch(), 2048);
206 /// ```
207 #[must_use]
208 pub fn n_batch(&self) -> u32 {
209 self.context_params.n_batch
210 }
211
212 /// Set the `n_ubatch`
213 ///
214 /// # Examples
215 ///
216 /// ```rust
217 /// # use std::num::NonZeroU32;
218 /// use llama_cpp_4::context::params::LlamaContextParams;
219 /// let params = LlamaContextParams::default()
220 /// .with_n_ubatch(512);
221 /// assert_eq!(params.n_ubatch(), 512);
222 /// ```
223 #[must_use]
224 pub fn with_n_ubatch(mut self, n_ubatch: u32) -> Self {
225 self.context_params.n_ubatch = n_ubatch;
226 self
227 }
228
229 /// Get the `n_ubatch`
230 ///
231 /// # Examples
232 ///
233 /// ```rust
234 /// use llama_cpp_4::context::params::LlamaContextParams;
235 /// let params = LlamaContextParams::default();
236 /// assert_eq!(params.n_ubatch(), 512);
237 /// ```
238 #[must_use]
239 pub fn n_ubatch(&self) -> u32 {
240 self.context_params.n_ubatch
241 }
242
243 /// Set the context type (e.g. [`LlamaContextType::Mtp`] for the draft context in
244 /// [`crate::mtp::MtpSession`]).
245 #[must_use]
246 pub fn with_ctx_type(mut self, ctx_type: LlamaContextType) -> Self {
247 self.context_params.ctx_type = ctx_type.into();
248 self
249 }
250
251 /// Get the configured context type.
252 #[must_use]
253 pub fn ctx_type(&self) -> LlamaContextType {
254 self.context_params.ctx_type.into()
255 }
256
257 /// Set the number of recurrent-state snapshots per sequence (MTP rollback).
258 ///
259 /// Must be `>=` [`MtpSessionConfig::n_draft_max`](crate::mtp::MtpSessionConfig::n_draft_max)
260 /// on the draft context. See [`crate::mtp`].
261 #[must_use]
262 pub fn with_n_rs_seq(mut self, n_rs_seq: u32) -> Self {
263 self.context_params.n_rs_seq = n_rs_seq;
264 self
265 }
266
267 /// Get the number of recurrent-state snapshots per sequence used for MTP rollback.
268 #[must_use]
269 pub fn n_rs_seq(&self) -> u32 {
270 self.context_params.n_rs_seq
271 }
272
273 /// Set the `flash_attention` parameter
274 ///
275 /// # Examples
276 ///
277 /// ```rust
278 /// use llama_cpp_4::context::params::LlamaContextParams;
279 /// let params = LlamaContextParams::default()
280 /// .with_flash_attention(true);
281 /// assert_eq!(params.flash_attention(), true);
282 /// ```
283 #[must_use]
284 pub fn with_flash_attention(mut self, enabled: bool) -> Self {
285 self.context_params.flash_attn_type = if enabled {
286 llama_cpp_sys_4::LLAMA_FLASH_ATTN_TYPE_ENABLED
287 } else {
288 llama_cpp_sys_4::LLAMA_FLASH_ATTN_TYPE_DISABLED
289 };
290 self
291 }
292
293 /// Get the `flash_attention` parameter
294 ///
295 /// # Examples
296 ///
297 /// ```rust
298 /// use llama_cpp_4::context::params::LlamaContextParams;
299 /// let params = LlamaContextParams::default();
300 /// assert_eq!(params.flash_attention(), false);
301 /// ```
302 #[must_use]
303 pub fn flash_attention(&self) -> bool {
304 self.context_params.flash_attn_type == llama_cpp_sys_4::LLAMA_FLASH_ATTN_TYPE_ENABLED
305 }
306
307 /// Set the `offload_kqv` parameter to control offloading KV cache & KQV ops to GPU
308 ///
309 /// # Examples
310 ///
311 /// ```rust
312 /// use llama_cpp_4::context::params::LlamaContextParams;
313 /// let params = LlamaContextParams::default()
314 /// .with_offload_kqv(false);
315 /// assert_eq!(params.offload_kqv(), false);
316 /// ```
317 #[must_use]
318 pub fn with_offload_kqv(mut self, enabled: bool) -> Self {
319 self.context_params.offload_kqv = enabled;
320 self
321 }
322
323 /// Get the `offload_kqv` parameter
324 ///
325 /// # Examples
326 ///
327 /// ```rust
328 /// use llama_cpp_4::context::params::LlamaContextParams;
329 /// let params = LlamaContextParams::default();
330 /// assert_eq!(params.offload_kqv(), true);
331 /// ```
332 #[must_use]
333 pub fn offload_kqv(&self) -> bool {
334 self.context_params.offload_kqv
335 }
336
337 /// Set the type of rope scaling.
338 ///
339 /// # Examples
340 ///
341 /// ```rust
342 /// use llama_cpp_4::context::params::{LlamaContextParams, RopeScalingType};
343 /// let params = LlamaContextParams::default()
344 /// .with_rope_scaling_type(RopeScalingType::Linear);
345 /// assert_eq!(params.rope_scaling_type(), RopeScalingType::Linear);
346 /// ```
347 #[must_use]
348 pub fn with_rope_scaling_type(mut self, rope_scaling_type: RopeScalingType) -> Self {
349 self.context_params.rope_scaling_type = i32::from(rope_scaling_type);
350 self
351 }
352
353 /// Get the type of rope scaling.
354 ///
355 /// # Examples
356 ///
357 /// ```rust
358 /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
359 /// assert_eq!(params.rope_scaling_type(), llama_cpp_4::context::params::RopeScalingType::Unspecified);
360 /// ```
361 #[must_use]
362 pub fn rope_scaling_type(&self) -> RopeScalingType {
363 RopeScalingType::from(self.context_params.rope_scaling_type)
364 }
365
366 /// Set the rope frequency base.
367 ///
368 /// # Examples
369 ///
370 /// ```rust
371 /// use llama_cpp_4::context::params::LlamaContextParams;
372 /// let params = LlamaContextParams::default()
373 /// .with_rope_freq_base(0.5);
374 /// assert_eq!(params.rope_freq_base(), 0.5);
375 /// ```
376 #[must_use]
377 pub fn with_rope_freq_base(mut self, rope_freq_base: f32) -> Self {
378 self.context_params.rope_freq_base = rope_freq_base;
379 self
380 }
381
382 /// Get the rope frequency base.
383 ///
384 /// # Examples
385 ///
386 /// ```rust
387 /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
388 /// assert_eq!(params.rope_freq_base(), 0.0);
389 /// ```
390 #[must_use]
391 pub fn rope_freq_base(&self) -> f32 {
392 self.context_params.rope_freq_base
393 }
394
395 /// Set the rope frequency scale.
396 ///
397 /// # Examples
398 ///
399 /// ```rust
400 /// use llama_cpp_4::context::params::LlamaContextParams;
401 /// let params = LlamaContextParams::default()
402 /// .with_rope_freq_scale(0.5);
403 /// assert_eq!(params.rope_freq_scale(), 0.5);
404 /// ```
405 #[must_use]
406 pub fn with_rope_freq_scale(mut self, rope_freq_scale: f32) -> Self {
407 self.context_params.rope_freq_scale = rope_freq_scale;
408 self
409 }
410
411 /// Get the rope frequency scale.
412 ///
413 /// # Examples
414 ///
415 /// ```rust
416 /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
417 /// assert_eq!(params.rope_freq_scale(), 0.0);
418 /// ```
419 #[must_use]
420 pub fn rope_freq_scale(&self) -> f32 {
421 self.context_params.rope_freq_scale
422 }
423
424 /// Get the number of threads.
425 ///
426 /// # Examples
427 ///
428 /// ```rust
429 /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
430 /// assert_eq!(params.n_threads(), 4);
431 /// ```
432 #[must_use]
433 pub fn n_threads(&self) -> i32 {
434 self.context_params.n_threads
435 }
436
437 /// Get the number of threads allocated for batches.
438 ///
439 /// # Examples
440 ///
441 /// ```rust
442 /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
443 /// assert_eq!(params.n_threads_batch(), 4);
444 /// ```
445 #[must_use]
446 pub fn n_threads_batch(&self) -> i32 {
447 self.context_params.n_threads_batch
448 }
449
450 /// Set the number of threads.
451 ///
452 /// # Examples
453 ///
454 /// ```rust
455 /// use llama_cpp_4::context::params::LlamaContextParams;
456 /// let params = LlamaContextParams::default()
457 /// .with_n_threads(8);
458 /// assert_eq!(params.n_threads(), 8);
459 /// ```
460 #[must_use]
461 pub fn with_n_threads(mut self, n_threads: i32) -> Self {
462 self.context_params.n_threads = n_threads;
463 self
464 }
465
466 /// Set the number of threads allocated for batches.
467 ///
468 /// # Examples
469 ///
470 /// ```rust
471 /// use llama_cpp_4::context::params::LlamaContextParams;
472 /// let params = LlamaContextParams::default()
473 /// .with_n_threads_batch(8);
474 /// assert_eq!(params.n_threads_batch(), 8);
475 /// ```
476 #[must_use]
477 pub fn with_n_threads_batch(mut self, n_threads: i32) -> Self {
478 self.context_params.n_threads_batch = n_threads;
479 self
480 }
481
482 /// Check whether embeddings are enabled
483 ///
484 /// # Examples
485 ///
486 /// ```rust
487 /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
488 /// assert!(!params.embeddings());
489 /// ```
490 #[must_use]
491 pub fn embeddings(&self) -> bool {
492 self.context_params.embeddings
493 }
494
495 /// Enable the use of embeddings
496 ///
497 /// # Examples
498 ///
499 /// ```rust
500 /// use llama_cpp_4::context::params::LlamaContextParams;
501 /// let params = LlamaContextParams::default()
502 /// .with_embeddings(true);
503 /// assert!(params.embeddings());
504 /// ```
505 #[must_use]
506 pub fn with_embeddings(mut self, embedding: bool) -> Self {
507 self.context_params.embeddings = embedding;
508 self
509 }
510
511 /// Set the evaluation callback.
512 ///
513 /// # Examples
514 ///
515 /// ```no_run
516 /// extern "C" fn cb_eval_fn(
517 /// t: *mut llama_cpp_sys_4::ggml_tensor,
518 /// ask: bool,
519 /// user_data: *mut std::ffi::c_void,
520 /// ) -> bool {
521 /// false
522 /// }
523 ///
524 /// use llama_cpp_4::context::params::LlamaContextParams;
525 /// let params = LlamaContextParams::default().with_cb_eval(Some(cb_eval_fn));
526 /// ```
527 #[must_use]
528 pub fn with_cb_eval(
529 mut self,
530 cb_eval: llama_cpp_sys_4::ggml_backend_sched_eval_callback,
531 ) -> Self {
532 self.context_params.cb_eval = cb_eval;
533 self
534 }
535
536 /// Set the evaluation callback user data.
537 ///
538 /// # Examples
539 ///
540 /// ```no_run
541 /// use llama_cpp_4::context::params::LlamaContextParams;
542 /// let params = LlamaContextParams::default();
543 /// let user_data = std::ptr::null_mut();
544 /// let params = params.with_cb_eval_user_data(user_data);
545 /// ```
546 #[must_use]
547 pub fn with_cb_eval_user_data(mut self, cb_eval_user_data: *mut std::ffi::c_void) -> Self {
548 self.context_params.cb_eval_user_data = cb_eval_user_data;
549 self
550 }
551
552 /// Attach a [`TensorCapture`](super::tensor_capture::TensorCapture) to
553 /// intercept intermediate tensor outputs during `decode()`.
554 ///
555 /// This sets up the `cb_eval` callback to capture tensors matching the
556 /// capture's filter (e.g. specific layer outputs). After `decode()` the
557 /// captured data can be read from the `TensorCapture`.
558 ///
559 /// # Example
560 ///
561 /// ```rust,ignore
562 /// use llama_cpp_4::context::params::LlamaContextParams;
563 /// use llama_cpp_4::context::tensor_capture::TensorCapture;
564 ///
565 /// let mut capture = TensorCapture::for_layers(&[13, 20, 27]);
566 /// let ctx_params = LlamaContextParams::default()
567 /// .with_embeddings(true)
568 /// .with_tensor_capture(&mut capture);
569 /// ```
570 #[must_use]
571 pub fn with_tensor_capture(self, capture: &mut super::tensor_capture::TensorCapture) -> Self {
572 self.with_cb_eval(Some(super::tensor_capture::tensor_capture_callback))
573 .with_cb_eval_user_data(
574 std::ptr::from_mut::<super::tensor_capture::TensorCapture>(capture)
575 .cast::<std::ffi::c_void>(),
576 )
577 }
578
579 /// Set the storage type for the **K** (key) KV cache tensors.
580 ///
581 /// The default is `GgmlType::F16`. Quantized types like `GgmlType::Q5_0`
582 /// or `GgmlType::Q4_0` reduce VRAM usage significantly; combining them with
583 /// `TurboQuant` attention rotation (the default) keeps quality high.
584 ///
585 /// # Examples
586 ///
587 /// ```rust
588 /// use llama_cpp_4::context::params::LlamaContextParams;
589 /// use llama_cpp_4::quantize::GgmlType;
590 /// let params = LlamaContextParams::default()
591 /// .with_cache_type_k(GgmlType::Q5_0);
592 /// ```
593 #[must_use]
594 pub fn with_cache_type_k(mut self, ty: crate::quantize::GgmlType) -> Self {
595 self.context_params.type_k = ty as llama_cpp_sys_4::ggml_type;
596 self
597 }
598
599 /// Get the K-cache storage type.
600 #[must_use]
601 pub fn cache_type_k(&self) -> llama_cpp_sys_4::ggml_type {
602 self.context_params.type_k
603 }
604
605 /// Set the storage type for the **V** (value) KV cache tensors.
606 ///
607 /// See [`with_cache_type_k`](Self::with_cache_type_k) for details.
608 ///
609 /// # Examples
610 ///
611 /// ```rust
612 /// use llama_cpp_4::context::params::LlamaContextParams;
613 /// use llama_cpp_4::quantize::GgmlType;
614 /// let params = LlamaContextParams::default()
615 /// .with_cache_type_v(GgmlType::Q5_0);
616 /// ```
617 #[must_use]
618 pub fn with_cache_type_v(mut self, ty: crate::quantize::GgmlType) -> Self {
619 self.context_params.type_v = ty as llama_cpp_sys_4::ggml_type;
620 self
621 }
622
623 /// Get the V-cache storage type.
624 #[must_use]
625 pub fn cache_type_v(&self) -> llama_cpp_sys_4::ggml_type {
626 self.context_params.type_v
627 }
628
629 /// Control the `TurboQuant` attention-rotation feature (llama.cpp PR #21038).
630 ///
631 /// By default, llama.cpp applies a Hadamard rotation to Q/K/V tensors
632 /// before writing them into the KV cache. This significantly improves
633 /// quantized KV-cache quality at near-zero overhead, and is enabled
634 /// automatically for models whose head dimension is a power of two.
635 ///
636 /// Set `disabled = true` to opt out (equivalent to `LLAMA_ATTN_ROT_DISABLE=1`).
637 /// The env-var is applied just before the context is created and restored
638 /// afterwards, so this is safe to call from a single thread.
639 ///
640 /// # Examples
641 ///
642 /// ```rust
643 /// use llama_cpp_4::context::params::LlamaContextParams;
644 /// // Disable rotation for this context only:
645 /// let params = LlamaContextParams::default().with_attn_rot_disabled(true);
646 /// assert!(params.attn_rot_disabled());
647 /// ```
648 #[must_use]
649 pub fn with_attn_rot_disabled(mut self, disabled: bool) -> Self {
650 self.attn_rot_disabled = disabled;
651 self
652 }
653
654 /// Returns `true` if `TurboQuant` attention rotation is disabled for this context.
655 ///
656 /// ```rust
657 /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
658 /// assert!(!params.attn_rot_disabled());
659 /// ```
660 #[must_use]
661 pub fn attn_rot_disabled(&self) -> bool {
662 self.attn_rot_disabled
663 }
664
665 /// Set the type of pooling.
666 ///
667 /// # Examples
668 ///
669 /// ```rust
670 /// use llama_cpp_4::context::params::{LlamaContextParams, LlamaPoolingType};
671 /// let params = LlamaContextParams::default()
672 /// .with_pooling_type(LlamaPoolingType::Last);
673 /// assert_eq!(params.pooling_type(), LlamaPoolingType::Last);
674 /// ```
675 #[must_use]
676 pub fn with_pooling_type(mut self, pooling_type: LlamaPoolingType) -> Self {
677 self.context_params.pooling_type = i32::from(pooling_type);
678 self
679 }
680
681 /// Get the type of pooling.
682 ///
683 /// # Examples
684 ///
685 /// ```rust
686 /// let params = llama_cpp_4::context::params::LlamaContextParams::default();
687 /// assert_eq!(params.pooling_type(), llama_cpp_4::context::params::LlamaPoolingType::Unspecified);
688 /// ```
689 #[must_use]
690 pub fn pooling_type(&self) -> LlamaPoolingType {
691 LlamaPoolingType::from(self.context_params.pooling_type)
692 }
693}
694
695/// Default parameters for `LlamaContext`. (as defined in llama.cpp by `llama_context_default_params`)
696/// ```
697/// # use std::num::NonZeroU32;
698/// use llama_cpp_4::context::params::{LlamaContextParams, RopeScalingType};
699/// let params = LlamaContextParams::default();
700/// assert_eq!(params.n_ctx(), NonZeroU32::new(512), "n_ctx should be 512");
701/// assert_eq!(params.rope_scaling_type(), RopeScalingType::Unspecified);
702/// ```
703impl Default for LlamaContextParams {
704 fn default() -> Self {
705 let context_params = unsafe { llama_cpp_sys_4::llama_context_default_params() };
706 Self {
707 context_params,
708 attn_rot_disabled: false,
709 }
710 }
711}