use llama_cpp_2::token::LlamaToken;
pub(crate) struct InferenceSession {
pub history_tokens: Vec<LlamaToken>,
pub utf8_buffer: Vec<u8>,
pub pos: i32,
}
impl InferenceSession {
pub fn new() -> Self {
Self {
history_tokens: Vec::new(),
utf8_buffer: Vec::with_capacity(32),
pos: 0,
}
}
pub fn snapshot(&self) -> (Vec<LlamaToken>, Vec<u8>, i32) {
(
self.history_tokens.clone(),
self.utf8_buffer.clone(),
self.pos,
)
}
pub fn restore(&mut self, snapshot: (Vec<LlamaToken>, Vec<u8>, i32)) {
self.history_tokens = snapshot.0;
self.utf8_buffer = snapshot.1;
self.pos = snapshot.2;
}
pub fn reset(&mut self) {
self.history_tokens.clear();
self.utf8_buffer.clear();
self.pos = 0;
}
}