mistralrs-core 0.8.1

Fast, flexible LLM inference.
Documentation
1
2
3
4
5
6
7
8
pub(super) mod cpu;
mod flash;
mod naive;
mod sinks;

pub(crate) use flash::flash_attn;
pub(crate) use naive::{maybe_synchronize, naive_sdpa};
pub(crate) use sinks::sinks_attn;