Struct pllm::LLM

source ·

pub struct LLM { /* private fields */ }

Implementations§

source §

impl LLM

source

pub fn new(config: Config, tokenizer: Tokenizer, weights: Weights) -> Self

Examples found in repository ?

examples/llama2c.rs (line 23)

fn main() {
    let f = File::open("testdata/stories15M.bin").unwrap();
    let mut reader = BufReader::new(f);
    let config = Config::from_reader(&mut reader).unwrap();
    println!("{:?}", config);

    let mut weights = Weights::new(config.clone());
    weights.load_data(&mut reader).unwrap();

    let tokenizer_file = File::open("testdata/tokenizer.bin").unwrap();
    let tokenizer_reader = BufReader::new(tokenizer_file);

    let tokenizer = Tokenizer::from_reader(config.vocab_size as usize, tokenizer_reader).unwrap();

    let iterator = LLM::new(config, tokenizer, weights)
        .inference("a dog".to_string(), 0.8)
        .unwrap();
    let mut token_count = 0;
    let start = Instant::now();
    for (_, t) in iterator.enumerate() {
        print!("{}", t.unwrap());
        io::stdout().flush().unwrap();
        token_count += 1;
    }
    println!(
        "\ntoken/s: {}\n",
        (token_count as f64 - 1.0) / start.elapsed().as_millis() as f64 * 1000.0
    );
}

More examples

Hide additional examples

examples/gemma.rs (line 24)

fn main() {
    let f = File::open("testdata/gemma2b").unwrap();
    // let mmap = unsafe { Mmap::map(&f).unwrap() };
    // let reader = io::Cursor::new(&mmap[..]);
    let reader = BufReader::new(f);
    let mut gf = GgufFile::from_reader(reader).unwrap();

    let config = Config::from_gguf(&gf).unwrap();
    println!("{:?}", config.clone());

    let tokenizer = Tokenizer::from_gguf(&gf).unwrap();

    let mut weights = Weights::new(config.clone());
    weights.load_from_gguf(&mut gf, config.clone()).unwrap();

    let iterator = LLM::new(config, tokenizer, weights)
        .inference("why the sky is blue?".to_string(), 0.8)
        .unwrap();

    let mut token_count = 0;
    let start = Instant::now();
    for (_, t) in iterator.enumerate() {
        print!("{}", t.unwrap());
        io::stdout().flush().unwrap();
        token_count += 1;
    }
    println!(
        "\ntoken/s: {}\n",
        (token_count as f64 - 1.0) / start.elapsed().as_millis() as f64 * 1000.0
    );
}

source

pub fn inference( self, prompt: String, temperature: f32 ) -> Result<InferenceIterator, PllmError>

Examples found in repository ?

examples/llama2c.rs (line 24)

fn main() {
    let f = File::open("testdata/stories15M.bin").unwrap();
    let mut reader = BufReader::new(f);
    let config = Config::from_reader(&mut reader).unwrap();
    println!("{:?}", config);

    let mut weights = Weights::new(config.clone());
    weights.load_data(&mut reader).unwrap();

    let tokenizer_file = File::open("testdata/tokenizer.bin").unwrap();
    let tokenizer_reader = BufReader::new(tokenizer_file);

    let tokenizer = Tokenizer::from_reader(config.vocab_size as usize, tokenizer_reader).unwrap();

    let iterator = LLM::new(config, tokenizer, weights)
        .inference("a dog".to_string(), 0.8)
        .unwrap();
    let mut token_count = 0;
    let start = Instant::now();
    for (_, t) in iterator.enumerate() {
        print!("{}", t.unwrap());
        io::stdout().flush().unwrap();
        token_count += 1;
    }
    println!(
        "\ntoken/s: {}\n",
        (token_count as f64 - 1.0) / start.elapsed().as_millis() as f64 * 1000.0
    );
}

More examples

Hide additional examples

examples/gemma.rs (line 25)

fn main() {
    let f = File::open("testdata/gemma2b").unwrap();
    // let mmap = unsafe { Mmap::map(&f).unwrap() };
    // let reader = io::Cursor::new(&mmap[..]);
    let reader = BufReader::new(f);
    let mut gf = GgufFile::from_reader(reader).unwrap();

    let config = Config::from_gguf(&gf).unwrap();
    println!("{:?}", config.clone());

    let tokenizer = Tokenizer::from_gguf(&gf).unwrap();

    let mut weights = Weights::new(config.clone());
    weights.load_from_gguf(&mut gf, config.clone()).unwrap();

    let iterator = LLM::new(config, tokenizer, weights)
        .inference("why the sky is blue?".to_string(), 0.8)
        .unwrap();

    let mut token_count = 0;
    let start = Instant::now();
    for (_, t) in iterator.enumerate() {
        print!("{}", t.unwrap());
        io::stdout().flush().unwrap();
        token_count += 1;
    }
    println!(
        "\ntoken/s: {}\n",
        (token_count as f64 - 1.0) / start.elapsed().as_millis() as f64 * 1000.0
    );
}