aha 0.2.5

aha model inference library, now supports Qwen(2.5VL/3/3VL/3.5/ASR/3Embedding/3Reranker), MiniCPM4, VoxCPM/1.5, DeepSeek-OCR/2, Hunyuan-OCR, PaddleOCR-VL/1.5, RMBG2.0, GLM(ASR-Nano-2512/OCR), Fun-ASR-Nano-2512, LFM(2/2.5/2VL/2.5VL)
Documentation
use crate::models::{
    common::reranker::{RerankerSimilarity, TextRerank},
    qwen3_embedding::Qwen3Embedding,
};
use anyhow::Result;
use candle_core::{DType, Device};

pub struct Qwen3Reranker {
    embedding: Qwen3Embedding,
    similar: RerankerSimilarity,
}

impl Qwen3Reranker {
    pub fn init(path: &str, device: Option<&Device>, dtype: Option<DType>) -> Result<Self> {
        let embedding = Qwen3Embedding::init(path, device, dtype)?;
        Ok(Self {
            embedding,
            similar: RerankerSimilarity::Cosine,
        })
    }
}

impl TextRerank for Qwen3Reranker {
    fn rerank(&mut self, query: &str, documents: &[String]) -> Result<Vec<f32>> {
        let query = self.embedding.embed_one(query)?.unsqueeze(0)?;
        let documents_matrix = self.embedding.embed_multi(documents)?;
        let score = self.similar.similar(&query, &documents_matrix, false)?;
        let score = score.squeeze(0)?.to_vec1::<f32>()?;
        Ok(score)
    }
}