pub struct GgufFile<R> { /* private fields */ }
Implementations§
source§impl<R: Read + Seek> GgufFile<R>
impl<R: Read + Seek> GgufFile<R>
sourcepub fn from_reader(reader: R) -> Result<Self, PllmError>
pub fn from_reader(reader: R) -> Result<Self, PllmError>
Examples found in repository?
examples/gemma.rs (line 14)
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
fn main() {
let f = File::open("testdata/gemma2b").unwrap();
// let mmap = unsafe { Mmap::map(&f).unwrap() };
// let reader = io::Cursor::new(&mmap[..]);
let reader = BufReader::new(f);
let mut gf = GgufFile::from_reader(reader).unwrap();
let config = Config::from_gguf(&gf).unwrap();
println!("{:?}", config.clone());
let tokenizer = Tokenizer::from_gguf(&gf).unwrap();
let mut weights = Weights::new(config.clone());
weights.load_from_gguf(&mut gf, config.clone()).unwrap();
let iterator = LLM::new(config, tokenizer, weights)
.inference("why the sky is blue?".to_string(), 0.8)
.unwrap();
let mut token_count = 0;
let start = Instant::now();
for (_, t) in iterator.enumerate() {
print!("{}", t.unwrap());
io::stdout().flush().unwrap();
token_count += 1;
}
println!(
"\ntoken/s: {}\n",
(token_count as f64 - 1.0) / start.elapsed().as_millis() as f64 * 1000.0
);
}
pub fn metadata(&self) -> &Metadata
pub fn get_tensor(&mut self, name: &str) -> Result<Tensor, PllmError>
Trait Implementations§
Auto Trait Implementations§
impl<R> Freeze for GgufFile<R>where
R: Freeze,
impl<R> RefUnwindSafe for GgufFile<R>where
R: RefUnwindSafe,
impl<R> Send for GgufFile<R>where
R: Send,
impl<R> Sync for GgufFile<R>where
R: Sync,
impl<R> Unpin for GgufFile<R>where
R: Unpin,
impl<R> UnwindSafe for GgufFile<R>where
R: UnwindSafe,
Blanket Implementations§
source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more