#![warn(clippy::all, clippy::pedantic)]
#![deny(rust_2018_idioms, missing_debug_implementations)]
#![allow(missing_docs)]
#![allow(
// Doc prose includes `CubeCL`, `GGUF`, `Q4_0`, etc. — surrounding every
// such word in backticks would hurt readability for technical prose.
clippy::doc_markdown,
// # Errors / # Panics sections will be added in the workspace-wide
// rustdoc pass; not gated on this lint baseline.
clippy::missing_errors_doc,
clippy::missing_panics_doc,
// Numeric ML code casts pervasively between `usize` and `u32` for buffer
// sizes, dimensions, and CubeCL launch arithmetic; explicit `as` is more
// readable than `try_into().unwrap()` cluttering hot paths.
clippy::cast_possible_truncation,
clippy::cast_possible_wrap,
clippy::cast_sign_loss,
clippy::cast_precision_loss,
clippy::cast_lossless,
// `#[must_use]` on every getter is churn for marginal value; existing
// callers already use the returned values.
clippy::must_use_candidate,
// Math kernels naturally use single-character names (m, k, n for matmul
// dims; a, b for binary operands); requiring longer names hurts
// readability.
clippy::many_single_char_names,
// Pre-existing pedantic warnings in this crate's frozen files
// (storage.rs, runtime.rs) and the dataplane-heavy quant.rs / kernels.rs
// bodies are tracked for the cubecl-B SAFETY substantiation pass and a
// workspace-wide rustdoc / format-args sweep. Allowing them keeps
// `-D warnings` viable now without scope-creeping into frozen files.
clippy::ptr_as_ptr,
clippy::uninlined_format_args,
)]
pub mod grammar;
pub mod kernels;
pub mod ops;
pub mod quant;
pub mod runtime;
pub mod storage;
pub(crate) fn elementwise_launch_dims(
n: u32,
) -> (cubecl::prelude::CubeCount, cubecl::prelude::CubeDim) {
let units_per_cube: u32 = 256;
let num_cubes = n.div_ceil(units_per_cube).max(1);
(
cubecl::prelude::CubeCount::Static(num_cubes, 1, 1),
cubecl::prelude::CubeDim::new_1d(units_per_cube),
)
}
pub(crate) fn debug_assert_handle_capacity<T>(handle: &cubecl::server::Handle, n: usize) {
debug_assert!(
handle.size() as usize >= n.saturating_mul(std::mem::size_of::<T>()),
"cubecl handle capacity {} bytes < required {} bytes ({} elements x {} byte stride)",
handle.size(),
n.saturating_mul(std::mem::size_of::<T>()),
n,
std::mem::size_of::<T>(),
);
}
pub use runtime::{CubeClient, CubeDevice, CubeRuntime};
pub use storage::{CubeclStorageHandle, cubecl_handle_of, upload_f32, wrap_kernel_output};
pub use quant::{
GgufBlockKind, dequantize_q4_0_to_gpu, dequantize_q4_1_to_gpu, dequantize_q5_0_to_gpu,
dequantize_q5_1_to_gpu, dequantize_q8_0_to_gpu, dequantize_q8_1_to_gpu, split_q4_0_blocks,
split_q4_1_blocks, split_q5_0_blocks, split_q5_1_blocks, split_q8_0_blocks, split_q8_1_blocks,
};
pub use grammar::{DfaMaskInputs, compute_token_mask_dfa_to_gpu, kernel_compute_token_mask_dfa};