pub struct Weights { /* private fields */ }
Implementations§
source§impl Weights
impl Weights
sourcepub fn new(c: Config) -> Self
pub fn new(c: Config) -> Self
Examples found in repository?
examples/llama2c.rs (line 15)
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
fn main() {
let f = File::open("testdata/stories15M.bin").unwrap();
let mut reader = BufReader::new(f);
let config = Config::from_reader(&mut reader).unwrap();
println!("{:?}", config);
let mut weights = Weights::new(config.clone());
weights.load_data(&mut reader).unwrap();
let tokenizer_file = File::open("testdata/tokenizer.bin").unwrap();
let tokenizer_reader = BufReader::new(tokenizer_file);
let tokenizer = Tokenizer::from_reader(config.vocab_size as usize, tokenizer_reader).unwrap();
let iterator = LLM::new(config, tokenizer, weights)
.inference("a dog".to_string(), 0.8)
.unwrap();
let mut token_count = 0;
let start = Instant::now();
for (_, t) in iterator.enumerate() {
print!("{}", t.unwrap());
io::stdout().flush().unwrap();
token_count += 1;
}
println!(
"\ntoken/s: {}\n",
(token_count as f64 - 1.0) / start.elapsed().as_millis() as f64 * 1000.0
);
}
More examples
examples/gemma.rs (line 21)
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
fn main() {
let f = File::open("testdata/gemma2b").unwrap();
// let mmap = unsafe { Mmap::map(&f).unwrap() };
// let reader = io::Cursor::new(&mmap[..]);
let reader = BufReader::new(f);
let mut gf = GgufFile::from_reader(reader).unwrap();
let config = Config::from_gguf(&gf).unwrap();
println!("{:?}", config.clone());
let tokenizer = Tokenizer::from_gguf(&gf).unwrap();
let mut weights = Weights::new(config.clone());
weights.load_from_gguf(&mut gf, config.clone()).unwrap();
let iterator = LLM::new(config, tokenizer, weights)
.inference("why the sky is blue?".to_string(), 0.8)
.unwrap();
let mut token_count = 0;
let start = Instant::now();
for (_, t) in iterator.enumerate() {
print!("{}", t.unwrap());
io::stdout().flush().unwrap();
token_count += 1;
}
println!(
"\ntoken/s: {}\n",
(token_count as f64 - 1.0) / start.elapsed().as_millis() as f64 * 1000.0
);
}
sourcepub fn load_data(&mut self, reader: impl Read) -> Result<(), PllmError>
pub fn load_data(&mut self, reader: impl Read) -> Result<(), PllmError>
Examples found in repository?
examples/llama2c.rs (line 16)
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
fn main() {
let f = File::open("testdata/stories15M.bin").unwrap();
let mut reader = BufReader::new(f);
let config = Config::from_reader(&mut reader).unwrap();
println!("{:?}", config);
let mut weights = Weights::new(config.clone());
weights.load_data(&mut reader).unwrap();
let tokenizer_file = File::open("testdata/tokenizer.bin").unwrap();
let tokenizer_reader = BufReader::new(tokenizer_file);
let tokenizer = Tokenizer::from_reader(config.vocab_size as usize, tokenizer_reader).unwrap();
let iterator = LLM::new(config, tokenizer, weights)
.inference("a dog".to_string(), 0.8)
.unwrap();
let mut token_count = 0;
let start = Instant::now();
for (_, t) in iterator.enumerate() {
print!("{}", t.unwrap());
io::stdout().flush().unwrap();
token_count += 1;
}
println!(
"\ntoken/s: {}\n",
(token_count as f64 - 1.0) / start.elapsed().as_millis() as f64 * 1000.0
);
}
sourcepub fn load_from_gguf<R: Read + Seek>(
&mut self,
gf: &mut GgufFile<R>,
c: Config
) -> Result<(), PllmError>
pub fn load_from_gguf<R: Read + Seek>( &mut self, gf: &mut GgufFile<R>, c: Config ) -> Result<(), PllmError>
Examples found in repository?
examples/gemma.rs (line 22)
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
fn main() {
let f = File::open("testdata/gemma2b").unwrap();
// let mmap = unsafe { Mmap::map(&f).unwrap() };
// let reader = io::Cursor::new(&mmap[..]);
let reader = BufReader::new(f);
let mut gf = GgufFile::from_reader(reader).unwrap();
let config = Config::from_gguf(&gf).unwrap();
println!("{:?}", config.clone());
let tokenizer = Tokenizer::from_gguf(&gf).unwrap();
let mut weights = Weights::new(config.clone());
weights.load_from_gguf(&mut gf, config.clone()).unwrap();
let iterator = LLM::new(config, tokenizer, weights)
.inference("why the sky is blue?".to_string(), 0.8)
.unwrap();
let mut token_count = 0;
let start = Instant::now();
for (_, t) in iterator.enumerate() {
print!("{}", t.unwrap());
io::stdout().flush().unwrap();
token_count += 1;
}
println!(
"\ntoken/s: {}\n",
(token_count as f64 - 1.0) / start.elapsed().as_millis() as f64 * 1000.0
);
}
pub fn make_quantize_tensor(&self, size: usize) -> Tensor
Auto Trait Implementations§
impl Freeze for Weights
impl RefUnwindSafe for Weights
impl Send for Weights
impl Sync for Weights
impl Unpin for Weights
impl UnwindSafe for Weights
Blanket Implementations§
source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more