kapsl-llm 0.1.0

Large language model inference with GGUF and ONNX backend support for Kapsl
Documentation
#[cfg(test)]
mod tests {
    use super::super::{LLMScheduler, SchedulerConfig};
    use crate::block_manager::BlockManager;
    use crate::sequence::{FinishReason, SamplingParams, SequenceGroup, SequenceStatus};
    use tokio::sync::mpsc;

    fn make_group(prompt_len: usize) -> SequenceGroup {
        let (tx, _rx) = mpsc::channel(1);
        SequenceGroup::new(
            "req".to_string(),
            None,
            "prompt".to_string(),
            vec![0u32; prompt_len],
            SamplingParams {
                max_tokens: 8,
                temperature: 0.7,
                top_p: 0.9,
                top_k: 40,
                stop_token_ids: Vec::new(),
                repetition_penalty: 1.0,
                seed: None,
            },
            None,
            tx,
        )
    }

    #[test]
    fn schedule_moves_waiting_to_running_and_allocates_blocks() {
        let config = SchedulerConfig {
            max_num_batched_tokens: 64,
            max_num_seqs: 4,
            max_paddings: 0,
        };
        let block_manager = BlockManager::new(4, 16, 0);
        let mut scheduler = LLMScheduler::new(config, block_manager);

        scheduler.add_sequence_group(make_group(4));
        let outputs = scheduler.schedule();
        assert_eq!(outputs.scheduled_seq_groups.len(), 1);

        let group = outputs.scheduled_seq_groups[0].lock().unwrap();
        let seq_arc = group.sequences.values().next().unwrap();
        let seq = seq_arc.lock().unwrap();
        assert_eq!(seq.status, SequenceStatus::Running);
        assert!(scheduler
            .block_manager
            .get_block_table(seq.sequence_id)
            .is_some());
    }

    #[test]
    fn free_finished_sequences_releases_blocks() {
        let config = SchedulerConfig {
            max_num_batched_tokens: 64,
            max_num_seqs: 4,
            max_paddings: 0,
        };
        let block_manager = BlockManager::new(1, 16, 0);
        let mut scheduler = LLMScheduler::new(config, block_manager);

        scheduler.add_sequence_group(make_group(1));
        let _ = scheduler.schedule();

        let group_arc = scheduler
            .running_queue
            .front()
            .expect("running group")
            .clone();
        let seq_arc = group_arc
            .lock()
            .unwrap()
            .sequences
            .values()
            .next()
            .unwrap()
            .clone();
        let (old_status, new_status) = {
            let mut seq = seq_arc.lock().unwrap();
            let old_status = seq.status;
            seq.status = SequenceStatus::Finished(FinishReason::Stop);
            (old_status, seq.status)
        };
        {
            let mut group = group_arc.lock().unwrap();
            group.update_seq_status(old_status, new_status);
        }

        let finished = scheduler.free_finished_sequences();
        assert_eq!(finished.len(), 1);
        assert!(scheduler.block_manager.can_allocate(1));
    }
}