libxc 0.1.1

libxc wrapper for Rust
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
//! CPU wrapper for libxc functionals.

use crate::prelude::*;

/// Unified input map for all functional families.
///
/// Keys are `"rho"`, `"sigma"`, `"lapl"`, `"tau"`.
/// Required keys depend on the functional family:
/// - LDA: `"rho"`
/// - GGA: `"rho"`, `"sigma"`
/// - MGGA: `"rho"`, `"sigma"`; `"lapl"` and `"tau"` if the functional needs
///   them
pub type LibXCCpuInput<'a> = HashMap<String, &'a [f64]>;

/// Extract a required input slice from the map.
fn require_input<'a>(input: &LibXCCpuInput<'a>, key: &str) -> Result<&'a [f64], LibXCError> {
    input
        .get(key)
        .copied()
        .ok_or_else(|| LibXCError::ComputeError(format!("{key}: required input not provided")))
}

/// Validate a required input's size and return a const pointer.
fn require_input_ptr(
    input: &LibXCCpuInput,
    key: &'static str,
    npoints: usize,
    expected_dim: i32,
) -> Result<*const f64, LibXCError> {
    let slice = require_input(input, key)?;
    let expected = npoints * (expected_dim as usize);
    if slice.len() != expected {
        return Err(LibXCError::ComputeError(format!(
            "{key}: expected size {expected}, got {}",
            slice.len()
        )));
    }
    Ok(slice.as_ptr())
}

/// Validate a conditionally-required input and return a const pointer.
/// Returns null if `required` is false; errors if required but absent.
fn conditional_input_ptr(
    input: &LibXCCpuInput,
    key: &'static str,
    npoints: usize,
    expected_dim: i32,
    required: bool,
) -> Result<*const f64, LibXCError> {
    match (input.get(key).copied(), required) {
        (Some(slice), true) => {
            let expected = npoints * (expected_dim as usize);
            if slice.len() != expected {
                return Err(LibXCError::ComputeError(format!(
                    "{key}: expected size {expected}, got {}",
                    slice.len()
                )));
            }
            Ok(slice.as_ptr())
        },
        (None, true) => {
            Err(LibXCError::ComputeError(format!("{key}: required input not provided")))
        },
        (_, false) => Ok(std::ptr::null()),
    }
}

// ---------------------------------------------------------------------------
// Preallocated output types
// ---------------------------------------------------------------------------

/// Unified output map for all functional families (preallocated buffers).
///
/// Keys are derivative component names (e.g. `"zk"`, `"vrho"`, `"v2rho2"`,
/// ...). Key present = user provides that buffer; absent = null pointer passed
/// to libxc.
pub type LibXCCpuOutputMut<'a> = HashMap<String, &'a mut [f64]>;

/// Validate an output slice from the map and return a mutable pointer.
/// Returns null if the key is absent; validates size if present.
fn validate_output_ptr(
    output: &LibXCCpuOutputMut,
    key: &str,
    npoints: usize,
    expected_dim: i32,
) -> Result<*mut f64, LibXCError> {
    match output.get(key) {
        Some(s) => {
            let expected = npoints * (expected_dim as usize);
            if s.len() != expected {
                return Err(LibXCError::ComputeError(format!(
                    "{key}: expected size {expected}, got {}",
                    s.len()
                )));
            }
            Ok(s.as_ptr() as *mut f64)
        },
        None => Ok(std::ptr::null_mut::<f64>()),
    }
}

/// Validate all output entries for a given label set, returning a pointer map.
fn validate_output_ptrs(
    output: &LibXCCpuOutputMut,
    labels: &[&'static str],
    npoints: usize,
    dim: &ffi::xc_dimensions,
) -> Result<HashMap<&'static str, *mut f64>, LibXCError> {
    let mut ptrs = HashMap::new();
    for &label in labels {
        let d = crate::layout_handling::get_dim(dim, label);
        ptrs.insert(label, validate_output_ptr(output, label, npoints, d)?);
    }
    Ok(ptrs)
}

impl LibXCFunctional {
    // -- LDA private helpers -----------------------------------------------

    /// Validate input and compute output layout for LDA.
    /// Returns `(npoints, rho_ptr, layout)`.
    fn lda_prepare(
        &self,
        input: &LibXCCpuInput,
        deriv_flags: impl Into<LibXCDerivativeFlags>,
    ) -> Result<(usize, *const f64, LibXCOutputLayout), LibXCError> {
        let flags = deriv_flags.into();
        self.validate_flags(flags)?;
        let rho = require_input(input, "rho")?;
        let nspin = self.spin() as usize;
        if rho.len() % nspin != 0 {
            return Err(LibXCError::ComputeError(
                "rho input has invalid shape: size not divisible by nspin".into(),
            ));
        }
        let npoints = rho.len() / nspin;
        let layout = self.lda_output_layout(npoints, flags);
        Ok((npoints, rho.as_ptr(), layout))
    }

    // -- LDA compute --------------------------------------------------------

    /// Compute LDA functional with preallocated output buffer slice. Validates
    /// buffer sizes and passes null for absent components.
    pub fn compute_lda_with_unsliced_output(
        &self,
        input: &LibXCCpuInput,
        output: &mut [f64],
        deriv_flags: impl Into<LibXCDerivativeFlags>,
    ) -> Result<LibXCOutputLayout, LibXCError> {
        let (npoints, rho_ptr, layout) = self.lda_prepare(input, deriv_flags)?;
        if output.len() < layout.total_size {
            return Err(LibXCError::ComputeError(format!(
                "output buffer has too small size: expected {}, got {}",
                layout.total_size,
                output.len()
            )));
        }
        unsafe {
            xc_lda_call(self.ptr, npoints, rho_ptr, output.as_mut_ptr(), &layout);
        }
        Ok(layout)
    }

    /// Compute LDA functional with automatic allocation.
    /// Returns `(buffer, layout)` where buffer is a contiguous f64 array.
    pub fn compute_lda(
        &self,
        input: &LibXCCpuInput,
        flags: impl Into<LibXCDerivativeFlags>,
    ) -> Result<(Vec<f64>, LibXCOutputLayout), LibXCError> {
        #[cfg(feature = "cuda")]
        if self.is_on_device() {
            return Err(LibXCError::ComputeError(
                "functional was initialized for GPU; use cuda_compute_lda instead".into(),
            ));
        }
        let (npoints, rho_ptr, layout) = self.lda_prepare(input, flags)?;
        let mut buffer = vec![0.0f64; layout.total_size];
        unsafe {
            xc_lda_call(self.ptr, npoints, rho_ptr, buffer.as_mut_ptr(), &layout);
        }
        Ok((buffer, layout))
    }

    /// Compute LDA functional with user-preallocated output buffers.
    pub fn compute_lda_with_output(
        &self,
        input: &LibXCCpuInput,
        output: &LibXCCpuOutputMut,
    ) -> Result<(), LibXCError> {
        let rho = require_input(input, "rho")?;
        let nspin = self.spin() as usize;
        if rho.len() % nspin != 0 {
            return Err(LibXCError::ComputeError(
                "rho input has invalid shape: size not divisible by nspin".into(),
            ));
        }
        let npoints = rho.len() / nspin;
        let rho_ptr = rho.as_ptr();
        let dim = self.dim();
        let ptrs = validate_output_ptrs(output, &LDA_OUTPUT_LABELS, npoints, dim)?;

        unsafe {
            xc_lda_call_with_output(self.ptr, npoints, rho_ptr, &ptrs);
        }
        Ok(())
    }

    // -- GGA private helpers -----------------------------------------------

    /// Validate input and compute output layout for GGA.
    /// Returns `(npoints, rho_ptr, sigma_ptr, layout)`.
    fn gga_prepare(
        &self,
        input: &LibXCCpuInput,
        deriv_flags: impl Into<LibXCDerivativeFlags>,
    ) -> Result<(usize, *const f64, *const f64, LibXCOutputLayout), LibXCError> {
        let flags = deriv_flags.into();
        self.validate_flags(flags)?;
        let rho = require_input(input, "rho")?;
        let nspin = self.spin() as usize;
        if rho.len() % nspin != 0 {
            return Err(LibXCError::ComputeError(
                "rho input has invalid shape: size not divisible by nspin".into(),
            ));
        }
        let npoints = rho.len() / nspin;
        let dim = self.dim();
        let sigma_ptr = require_input_ptr(input, "sigma", npoints, dim.sigma)?;
        let layout = self.gga_output_layout(npoints, flags);
        Ok((npoints, rho.as_ptr(), sigma_ptr, layout))
    }

    // -- GGA compute --------------------------------------------------------

    /// Compute GGA functional with preallocated output buffer slice. Validates
    /// buffer sizes and passes null for absent components.
    pub fn compute_gga_with_unsliced_output(
        &self,
        input: &LibXCCpuInput,
        output: &mut [f64],
        deriv_flags: impl Into<LibXCDerivativeFlags>,
    ) -> Result<LibXCOutputLayout, LibXCError> {
        let (npoints, rho_ptr, sigma_ptr, layout) = self.gga_prepare(input, deriv_flags)?;
        if output.len() < layout.total_size {
            return Err(LibXCError::ComputeError(format!(
                "output buffer has too small size: expected {}, got {}",
                layout.total_size,
                output.len()
            )));
        }
        unsafe {
            xc_gga_call(self.ptr, npoints, rho_ptr, sigma_ptr, output.as_mut_ptr(), &layout);
        }
        Ok(layout)
    }

    /// Compute GGA functional with automatic allocation.
    pub fn compute_gga(
        &self,
        input: &LibXCCpuInput,
        flags: impl Into<LibXCDerivativeFlags>,
    ) -> Result<(Vec<f64>, LibXCOutputLayout), LibXCError> {
        #[cfg(feature = "cuda")]
        if self.is_on_device() {
            return Err(LibXCError::ComputeError(
                "functional was initialized for GPU; use cuda_compute_gga instead".into(),
            ));
        }
        let (npoints, rho_ptr, sigma_ptr, layout) = self.gga_prepare(input, flags)?;
        let mut buffer = vec![0.0f64; layout.total_size];
        unsafe {
            xc_gga_call(self.ptr, npoints, rho_ptr, sigma_ptr, buffer.as_mut_ptr(), &layout);
        }
        Ok((buffer, layout))
    }

    /// Compute GGA functional with user-preallocated output buffers.
    pub fn compute_gga_with_output(
        &self,
        input: &LibXCCpuInput,
        output: &LibXCCpuOutputMut,
    ) -> Result<(), LibXCError> {
        let rho = require_input(input, "rho")?;
        let nspin = self.spin() as usize;
        if rho.len() % nspin != 0 {
            return Err(LibXCError::ComputeError(
                "rho input has invalid shape: size not divisible by nspin".into(),
            ));
        }
        let npoints = rho.len() / nspin;
        let rho_ptr = rho.as_ptr();
        let dim = self.dim();
        let sigma_ptr = require_input_ptr(input, "sigma", npoints, dim.sigma)?;
        let ptrs = validate_output_ptrs(output, &GGA_OUTPUT_LABELS, npoints, dim)?;

        unsafe {
            xc_gga_call_with_output(self.ptr, npoints, rho_ptr, sigma_ptr, &ptrs);
        }
        Ok(())
    }

    // -- MGGA private helpers ----------------------------------------------

    /// Validate input and compute output layout for MGGA.
    /// Returns `(npoints, rho_ptr, sigma_ptr, lapl_ptr, tau_ptr, layout)`.
    fn mgga_prepare(
        &self,
        input: &LibXCCpuInput,
        deriv_flags: impl Into<LibXCDerivativeFlags>,
    ) -> Result<
        (usize, *const f64, *const f64, *const f64, *const f64, LibXCOutputLayout),
        LibXCError,
    > {
        let flags = deriv_flags.into();
        self.validate_flags(flags)?;
        let rho = require_input(input, "rho")?;
        let nspin = self.spin() as usize;
        if rho.len() % nspin != 0 {
            return Err(LibXCError::ComputeError(
                "rho input has invalid shape: size not divisible by nspin".into(),
            ));
        }
        let npoints = rho.len() / nspin;
        let dim = self.dim();
        let needs_lapl = self.needs_laplacian();
        let needs_tau = self.needs_tau();
        let sigma_ptr = require_input_ptr(input, "sigma", npoints, dim.sigma)?;
        let lapl_ptr = conditional_input_ptr(input, "lapl", npoints, dim.lapl, needs_lapl)?;
        let tau_ptr = conditional_input_ptr(input, "tau", npoints, dim.tau, needs_tau)?;
        let layout = self.mgga_output_layout(npoints, flags);
        Ok((npoints, rho.as_ptr(), sigma_ptr, lapl_ptr, tau_ptr, layout))
    }

    // -- MGGA compute -------------------------------------------------------

    /// Compute MGGA functional with preallocated output buffer slice. Validates
    /// buffer sizes and passes null for absent components.
    pub fn compute_mgga_with_unsliced_output(
        &self,
        input: &LibXCCpuInput,
        output: &mut [f64],
        deriv_flags: impl Into<LibXCDerivativeFlags>,
    ) -> Result<LibXCOutputLayout, LibXCError> {
        let (npoints, rho_ptr, sigma_ptr, lapl_ptr, tau_ptr, layout) =
            self.mgga_prepare(input, deriv_flags)?;
        if output.len() < layout.total_size {
            return Err(LibXCError::ComputeError(format!(
                "output buffer has too small size: expected {}, got {}",
                layout.total_size,
                output.len()
            )));
        }
        unsafe {
            xc_mgga_call(
                self.ptr,
                npoints,
                rho_ptr,
                sigma_ptr,
                lapl_ptr,
                tau_ptr,
                output.as_mut_ptr(),
                &layout,
            );
        }
        Ok(layout)
    }

    /// Compute MGGA functional with automatic allocation.
    pub fn compute_mgga(
        &self,
        input: &LibXCCpuInput,
        flags: impl Into<LibXCDerivativeFlags>,
    ) -> Result<(Vec<f64>, LibXCOutputLayout), LibXCError> {
        #[cfg(feature = "cuda")]
        if self.is_on_device() {
            return Err(LibXCError::ComputeError(
                "functional was initialized for GPU; use cuda_compute_mgga instead".into(),
            ));
        }
        let (npoints, rho_ptr, sigma_ptr, lapl_ptr, tau_ptr, layout) =
            self.mgga_prepare(input, flags)?;
        let mut buffer = vec![0.0f64; layout.total_size];
        unsafe {
            xc_mgga_call(
                self.ptr,
                npoints,
                rho_ptr,
                sigma_ptr,
                lapl_ptr,
                tau_ptr,
                buffer.as_mut_ptr(),
                &layout,
            );
        }
        Ok((buffer, layout))
    }

    /// Compute MGGA functional with user-preallocated output buffers.
    pub fn compute_mgga_with_output(
        &self,
        input: &LibXCCpuInput,
        output: &LibXCCpuOutputMut,
    ) -> Result<(), LibXCError> {
        let rho = require_input(input, "rho")?;
        let nspin = self.spin() as usize;
        if rho.len() % nspin != 0 {
            return Err(LibXCError::ComputeError(
                "rho input has invalid shape: size not divisible by nspin".into(),
            ));
        }
        let npoints = rho.len() / nspin;
        let rho_ptr = rho.as_ptr();
        let dim = self.dim();
        let needs_lapl = self.needs_laplacian();
        let needs_tau = self.needs_tau();
        let sigma_ptr = require_input_ptr(input, "sigma", npoints, dim.sigma)?;
        let lapl_ptr = conditional_input_ptr(input, "lapl", npoints, dim.lapl, needs_lapl)?;
        let tau_ptr = conditional_input_ptr(input, "tau", npoints, dim.tau, needs_tau)?;
        let ptrs = validate_output_ptrs(output, &MGGA_OUTPUT_LABELS, npoints, dim)?;

        unsafe {
            xc_mgga_call_with_output(
                self.ptr, npoints, rho_ptr, sigma_ptr, lapl_ptr, tau_ptr, &ptrs,
            );
        }
        Ok(())
    }

    // -- Unified dispatch ---------------------------------------------------

    /// Compute the functional with automatic allocation, dispatching by family.
    ///
    /// Inspects `self.family()` and delegates to
    /// [`compute_lda`](Self::compute_lda),
    /// [`compute_gga`](Self::compute_gga), or
    /// [`compute_mgga`](Self::compute_mgga) accordingly. Hybrid families
    /// (HybLDA, HybGGA, HybMGGA) are dispatched to their base family's
    /// compute.
    ///
    /// Returns `(buffer, layout)` where `buffer` is a contiguous `Vec<f64>` and
    /// `layout` describes how to index named components (e.g. `"zk"`, `"vrho"`)
    /// within it. Use [`LibXCOutputLayout::get`] to extract a `Range<usize>`
    /// for each component.
    ///
    /// The `flags` parameter controls which derivative levels to compute:
    /// - `0`: energy only (EXC)
    /// - `1`: energy + first derivative (EXC + VXC)
    /// - `2`: up to second derivative (EXC + VXC + FXC)
    /// - `3`: up to third derivative (EXC + VXC + FXC + KXC)
    /// - `4`: up to fourth derivative (EXC + VXC + FXC + KXC + LXC)
    ///
    /// You can also pass a [`LibXCDerivativeFlags`] struct for fine-grained
    /// control over individual derivative levels.
    ///
    /// # Input keys
    ///
    /// | Family | Required keys |
    /// |--------|--------------|
    /// | LDA    | `"rho"` |
    /// | GGA    | `"rho"`, `"sigma"` |
    /// | MGGA   | `"rho"`, `"sigma"`, `"tau"`<br>(and `"lapl"` if the functional needs it) |
    ///
    /// # Output keys
    ///
    /// Output keys can be complicated for GGA and MGGA functionals. Please
    /// refer to the source code in [layout_handling](crate::layout_handling)
    /// for details on the naming convention and how to interpret them.
    ///
    /// | Level | LDA components | GGA adds | MGGA adds |
    /// |-------|---------------|----------|-----------|
    /// | EXC   | `zk` | `zk` | `zk` |
    /// | VXC   | `vrho` | `vrho`, `vsigma` | `vrho`, `vsigma`, `vlapl`, `vtau` |
    /// | FXC   | `v2rho2` | `v2rho2`, `v2rhosigma`, `v2sigma2` | (10 components) |
    /// | KXC   | `v3rho3` | (4 components) | (20 components) |
    /// | LXC   | `v4rho4` | (5 components) | (35 components) |
    ///
    /// # Convention for polarization input/output arrays
    ///
    /// For polarized functionals, in a certain component, the input and output
    /// arrays use row-major order `[n_points, n_spin]` and are then flattened
    /// (the most contiguous dimension is spin).
    ///
    /// For example, the density input follows the order `rho[i * n_spin + s]`
    /// where `i` is the grid point index and `s` is the spin index.
    ///
    /// Note `rho`, `tau` and `lapl` has two spin components (alpha or ↑, and
    /// beta or ↓), but `sigma` only has three (↑↑, ↑↓, and ↓↓). The output
    /// components is complicated and refer to source code of module
    /// [layout_handling](crate::layout_handling) for details.
    ///
    /// # Errors
    ///
    /// Returns [`LibXCError::ComputeError`] if:
    /// - A required input key is missing
    /// - An input array has the wrong size
    /// - A requested derivative level is not supported by the functional
    ///
    /// # Examples
    ///
    /// LDA exchange on 5 grid points (unpolarized, derivative level 1):
    ///
    /// ```
    /// use libxc::prelude::*;
    /// use std::collections::HashMap;
    ///
    /// let func = LibXCFunctional::from_identifier("lda_x", LibXCSpin::Unpolarized);
    /// let rho = vec![0.1, 0.2, 0.3, 0.4, 0.5];
    /// let input = HashMap::from([("rho".to_string(), rho.as_slice())]);
    ///
    /// let (buf, layout) = func.compute_xc(&input, 1).unwrap();
    ///
    /// // Extract the energy-per-particle (zk) array:
    /// let zk = &buf[layout.get("zk").unwrap()]; // length = 5
    /// assert!((zk[0] - (-0.3428)).abs() < 1e-3);
    ///
    /// // Extract the first derivative (vrho) array:
    /// let vrho = &buf[layout.get("vrho").unwrap()]; // length = 5
    /// assert!((vrho[0] - (-0.4571)).abs() < 1e-3);
    /// ```
    ///
    /// GGA exchange (PBE) requires `"sigma"` in addition to `"rho"`:
    ///
    /// ```
    /// use libxc::prelude::*;
    /// use std::collections::HashMap;
    ///
    /// let func = LibXCFunctional::from_identifier("gga_x_pbe", LibXCSpin::Unpolarized);
    /// let rho = vec![0.1, 0.2, 0.3];
    /// let sigma = vec![0.01, 0.02, 0.03];
    /// let input = HashMap::from([
    ///     ("rho".to_string(), rho.as_slice()),
    ///     ("sigma".to_string(), sigma.as_slice()),
    /// ]);
    ///
    /// let (buf, layout) = func.compute_xc(&input, 1).unwrap();
    /// let zk = &buf[layout.get("zk").unwrap()];
    /// let vrho = &buf[layout.get("vrho").unwrap()];
    /// let vsigma = &buf[layout.get("vsigma").unwrap()];
    /// ```
    ///
    /// MGGA correlation (TPSS) requires `"tau"` (and `"lapl"` if the functional
    /// needs the laplacian):
    ///
    /// ```
    /// use libxc::prelude::*;
    /// use std::collections::HashMap;
    ///
    /// let func = LibXCFunctional::from_identifier("mgga_c_tpss", LibXCSpin::Unpolarized);
    /// let rho = vec![0.1, 0.2, 0.3];
    /// let sigma = vec![0.01, 0.02, 0.03];
    /// let tau = vec![0.05, 0.1, 0.15];
    /// let input = HashMap::from([
    ///     ("rho".to_string(), rho.as_slice()),
    ///     ("sigma".to_string(), sigma.as_slice()),
    ///     ("tau".to_string(), tau.as_slice()),
    /// ]);
    ///
    /// let (buf, layout) = func.compute_xc(&input, 1).unwrap();
    /// let vtau = &buf[layout.get("vtau").unwrap()];
    /// ```
    ///
    /// Higher derivative levels include more output components:
    ///
    /// ```
    /// use libxc::prelude::*;
    /// use std::collections::HashMap;
    ///
    /// let func = LibXCFunctional::from_identifier("lda_x", LibXCSpin::Unpolarized);
    /// let rho = vec![0.1, 0.2, 0.3];
    /// let input = HashMap::from([("rho".to_string(), rho.as_slice())]);
    ///
    /// // Level 2 = EXC + VXC + FXC
    /// let (buf, layout) = func.compute_xc(&input, 2).unwrap();
    /// let v2rho2 = &buf[layout.get("v2rho2").unwrap()];
    /// ```
    pub fn compute_xc(
        &self,
        input: &LibXCCpuInput,
        flags: impl Into<LibXCDerivativeFlags>,
    ) -> Result<(Vec<f64>, LibXCOutputLayout), LibXCError> {
        #[cfg(feature = "cuda")]
        if self.is_on_device() {
            return Err(LibXCError::ComputeError(
                r#"
Functional was initialized for GPU; use cuda_compute_xc instead.
Also make sure appropriate cargo feature (api-v7_0, api-v7_1)
is enabled according to your libxc version."#
                    .trim()
                    .into(),
            ));
        }
        use crate::prelude::libxc_enum_items::*;
        match self.family() {
            LDA | HybLDA => self.compute_lda(input, flags),
            GGA | HybGGA => self.compute_gga(input, flags),
            MGGA | HybMGGA => self.compute_mgga(input, flags),
            OEP | LCA => {
                Err(LibXCError::ComputeError("compute_xc: OEP/LCA family is not supported".into()))
            },
        }
    }

    /// Compute the functional into a user-provided contiguous buffer,
    /// dispatching by family.
    ///
    /// This is the preallocated variant of [`compute_xc`](Self::compute_xc):
    /// instead of allocating a new `Vec<f64>`, the caller provides a `&mut
    /// [f64]` buffer. The buffer must be at least `layout.total_size`
    /// elements long, where `layout` can be obtained from
    /// [`output_layout`](Self::output_layout).
    ///
    /// Returns the [`LibXCOutputLayout`] that describes how to index named
    /// components within the buffer. Unused trailing elements (if the buffer is
    /// larger than `total_size`) are left unchanged.
    ///
    /// This variant is useful when you want to reuse the same buffer across
    /// multiple evaluations (e.g. in a SCF loop) to avoid repeated allocations.
    ///
    /// # Errors
    ///
    /// Returns [`LibXCError::ComputeError`] if the buffer is too small or if
    /// any input validation fails (same conditions as
    /// [`compute_xc`](Self::compute_xc)).
    ///
    /// # Examples
    ///
    /// ```
    /// use libxc::prelude::*;
    /// use std::collections::HashMap;
    ///
    /// let func = LibXCFunctional::from_identifier("lda_x", LibXCSpin::Unpolarized);
    /// let rho = vec![0.1, 0.2, 0.3, 0.4, 0.5];
    /// let input = HashMap::from([("rho".to_string(), rho.as_slice())]);
    ///
    /// // Get the layout to know the required buffer size
    /// let layout = func.output_layout(5, 1);
    /// let mut buf = vec![0.0f64; layout.total_size];
    ///
    /// let layout = func.compute_xc_with_unsliced_output(&input, &mut buf, 1).unwrap();
    ///
    /// let zk = &buf[layout.get("zk").unwrap()];
    /// assert!((zk[0] - (-0.3428)).abs() < 1e-3);
    /// ```
    pub fn compute_xc_with_unsliced_output(
        &self,
        input: &LibXCCpuInput,
        output: &mut [f64],
        deriv_flags: impl Into<LibXCDerivativeFlags>,
    ) -> Result<LibXCOutputLayout, LibXCError> {
        #[cfg(feature = "cuda")]
        if self.is_on_device() {
            return Err(LibXCError::ComputeError(
                "functional was initialized for GPU; use cuda_compute_xc_with_unsliced_output instead".into(),
            ));
        }
        use crate::prelude::libxc_enum_items::*;
        match self.family() {
            LDA | HybLDA => self.compute_lda_with_unsliced_output(input, output, deriv_flags),
            GGA | HybGGA => self.compute_gga_with_unsliced_output(input, output, deriv_flags),
            MGGA | HybMGGA => self.compute_mgga_with_unsliced_output(input, output, deriv_flags),
            OEP | LCA => {
                Err(LibXCError::ComputeError("compute_xc: OEP/LCA family is not supported".into()))
            },
        }
    }

    /// Compute the functional into named user-provided output buffers,
    /// dispatching by family.
    ///
    /// This is the most explicit compute variant: the caller provides a
    /// [`LibXCCpuOutputMut`] (a `HashMap<String, &mut [f64]>`) whose keys are
    /// the output component names (e.g. `"zk"`, `"vrho"`, `"vsigma"`). Only the
    /// components present in the map are computed; absent keys are passed as
    /// null pointers to libxc.
    ///
    /// Unlike [`compute_xc`](Self::compute_xc) and
    /// [`compute_xc_with_unsliced_output`](Self::compute_xc_with_unsliced_output),
    /// this method does **not** take a `deriv_flags` parameter — the derivative
    /// level is implicitly determined by which output keys you provide.
    ///
    /// Each output buffer must have the correct size: `npoints * n_comp`, where
    /// `n_comp` depends on the component and spin polarization. For example,
    /// for an unpolarized GGA functional on 5 grid points, `"vrho"` needs 5
    /// elements and `"vsigma"` needs 5 elements; for a polarized GGA,
    /// `"vrho"` needs 10 and `"vsigma"` needs 15.
    ///
    /// # Errors
    ///
    /// Returns [`LibXCError::ComputeError`] if any output buffer has the wrong
    /// size or if any input validation fails.
    ///
    /// # Examples
    ///
    /// LDA exchange, requesting only `"zk"` and `"vrho"`:
    ///
    /// ```
    /// use libxc::prelude::*;
    /// use std::collections::HashMap;
    ///
    /// let func = LibXCFunctional::from_identifier("lda_x", LibXCSpin::Unpolarized);
    /// let rho = vec![0.1, 0.2, 0.3, 0.4, 0.5];
    /// let input = HashMap::from([("rho".to_string(), rho.as_slice())]);
    ///
    /// let mut zk = vec![0.0f64; 5];
    /// let mut vrho = vec![0.0f64; 5];
    /// let mut output = HashMap::new();
    /// output.insert("zk".to_string(), zk.as_mut_slice());
    /// output.insert("vrho".to_string(), vrho.as_mut_slice());
    ///
    /// func.compute_xc_with_output(&input, &mut output).unwrap();
    ///
    /// assert!((zk[0] - (-0.3428)).abs() < 1e-3);
    /// assert!((vrho[0] - (-0.4571)).abs() < 1e-3);
    /// ```
    ///
    /// GGA exchange (PBE), requesting `"zk"`, `"vrho"`, and `"vsigma"`:
    ///
    /// ```
    /// use libxc::prelude::*;
    /// use std::collections::HashMap;
    ///
    /// let func = LibXCFunctional::from_identifier("gga_x_pbe", LibXCSpin::Unpolarized);
    /// let rho = vec![0.1, 0.2, 0.3, 0.4, 0.5];
    /// let sigma = vec![0.01, 0.02, 0.03, 0.04, 0.05];
    /// let input = HashMap::from([
    ///     ("rho".to_string(), rho.as_slice()),
    ///     ("sigma".to_string(), sigma.as_slice()),
    /// ]);
    ///
    /// let mut zk = vec![0.0f64; 5];
    /// let mut vrho = vec![0.0f64; 5];
    /// let mut vsigma = vec![0.0f64; 5];
    /// let mut output = HashMap::new();
    /// output.insert("zk".to_string(), zk.as_mut_slice());
    /// output.insert("vrho".to_string(), vrho.as_mut_slice());
    /// output.insert("vsigma".to_string(), vsigma.as_mut_slice());
    ///
    /// func.compute_xc_with_output(&input, &mut output).unwrap();
    ///
    /// assert!((zk[0] - (-0.3516)).abs() < 1e-3);
    /// assert!((vsigma[0] - (-0.0855)).abs() < 1e-3);
    /// ```
    pub fn compute_xc_with_output(
        &self,
        input: &LibXCCpuInput,
        output: &LibXCCpuOutputMut,
    ) -> Result<(), LibXCError> {
        #[cfg(feature = "cuda")]
        if self.is_on_device() {
            return Err(LibXCError::ComputeError(
                "functional was initialized for GPU; use cuda_compute_xc_with_output instead"
                    .into(),
            ));
        }
        use crate::prelude::libxc_enum_items::*;
        match self.family() {
            LDA | HybLDA => self.compute_lda_with_output(input, output),
            GGA | HybGGA => self.compute_gga_with_output(input, output),
            MGGA | HybMGGA => self.compute_mgga_with_output(input, output),
            OEP | LCA => {
                Err(LibXCError::ComputeError("compute_xc: OEP/LCA family is not supported".into()))
            },
        }
    }
}