use SmallVec;
/// Token identifier - matches the C type from llama.cpp
pub type TokenId = i32;
/// Stack-allocated token buffer for small operations (up to 32 tokens on stack)
///
/// This is a Rust-specific optimization that cannot be replicated in Go:
/// - Go slices always have heap backing
/// - No stack-allocated dynamic arrays in Go
/// - Go's escape analysis is a runtime heuristic, not compile-time guarantee
///
/// For operations with <= 32 tokens, this avoids heap allocation entirely.
/// Larger operations transparently spill to heap with no API change.
///
/// **Use cases:**
/// - Single token decoding (common in generation loops)
/// - Small batch operations
/// - Prompt processing chunks
///
/// **Performance impact:** 5-10% faster for small prompts, reduced allocator pressure
pub type TokenBuffer = ;
/// Larger token buffer for generation output (up to 256 tokens on stack)
///
/// Suitable for typical generation responses before spilling to heap.
pub type GenerationBuffer = ;
/// Represents a token with its metadata