boostr 0.1.0

ML framework built on numr - attention, quantization, model architectures
Documentation
pub mod decode_graph;
pub mod kv_cache;
pub mod memory;
pub mod prefix_cache;
pub mod scheduler;
pub mod speculative;
pub mod ssm_state;

#[cfg(feature = "cuda")]
pub use decode_graph::{DecodeGraph, DeviceScalars, PagedDecodeGraph};
pub use kv_cache::{
    KvCache, LayeredKvCache, LayeredKvCacheConfig, LayeredPagedKvCache, PagedKvCache,
};
#[cfg(feature = "cuda")]
pub use memory::GpuBlockAllocator;
pub use memory::{BlockAllocator, BlockAllocatorStats, BlockId, BlockTable, CpuBlockAllocator};
pub use prefix_cache::{
    CacheResult, GpuRadixStats, GpuRadixTree, PrefixCache, PrefixCacheConfig, PrefixCacheStats,
    PrefixLookup,
};
pub use scheduler::{
    ScheduledBatch, SchedulerConfig, SchedulerStats, SchedulingPriority, SequenceId,
    SequenceRequest, SequenceScheduler, SequenceState,
};
pub use speculative::{
    SpeculativeConfig, SpeculativeExecutor, SpeculativeModel, SpeculativeStats, VerificationResult,
};
pub use ssm_state::{LayeredSsmState, SsmState};