car-inference 0.15.0

Local model inference for CAR — Candle backend with Qwen3 models
Documentation
//! Reranking request/result types.
//!
//! A reranker takes a query plus a candidate set of documents and
//! returns them scored by relevance. Qwen3-Reranker is the canonical
//! model here — it's a small generative LM fine-tuned to emit
//! `P("yes" | query, document)` as the relevance signal, paired with
//! the Qwen3-Embedding family for a full retrieval stack.
//!
//! The actual scoring logic lives in backend-specific modules; this
//! module defines the public request/result shape used by
//! `InferenceEngine::rerank()`.

use serde::{Deserialize, Serialize};

/// A reranking request.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RerankRequest {
    /// The search query to score candidates against.
    pub query: String,
    /// Candidate documents to score. Order is preserved in the result
    /// via [`RerankedDocument::index`].
    pub documents: Vec<String>,
    /// Optional model override (e.g. "Qwen3-Reranker-0.6B").
    #[serde(default)]
    pub model: Option<String>,
    /// If set, only the top-N highest-scoring candidates are returned.
    /// Otherwise all candidates are returned, sorted by score desc.
    #[serde(default)]
    pub top_n: Option<usize>,
    /// Optional task instruction (Qwen3-Reranker prompt prefix).
    /// Defaults to "Given a web search query, retrieve relevant
    /// passages that answer the query" — the Qwen upstream default.
    #[serde(default)]
    pub instruction: Option<String>,
}

/// One scored candidate returned from a rerank call.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RerankedDocument {
    /// Original position of this document in [`RerankRequest::documents`].
    pub index: usize,
    /// Relevance score in `[0.0, 1.0]`. Higher = more relevant.
    pub score: f32,
    /// The document text. Included so callers can use the result
    /// directly without cross-referencing against the request.
    pub document: String,
}

/// Reranking result — candidates sorted by descending relevance.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RerankResult {
    /// Scored candidates, best first. Length is `min(documents.len(), top_n)`
    /// when `top_n` is set.
    pub ranked: Vec<RerankedDocument>,
    /// Name of the model that produced these scores, when known.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub model_used: Option<String>,
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn minimal_request_deserializes_with_defaults() {
        let json = r#"{"query":"who is the president","documents":["doc a","doc b"]}"#;
        let req: RerankRequest = serde_json::from_str(json).unwrap();
        assert_eq!(req.query, "who is the president");
        assert_eq!(req.documents.len(), 2);
        assert!(req.model.is_none());
        assert!(req.top_n.is_none());
        assert!(req.instruction.is_none());
    }

    #[test]
    fn full_request_roundtrip() {
        let req = RerankRequest {
            query: "q".into(),
            documents: vec!["d1".into(), "d2".into()],
            model: Some("Qwen3-Reranker-0.6B".into()),
            top_n: Some(1),
            instruction: Some("retrieve passages relevant to the query".into()),
        };
        let json = serde_json::to_string(&req).unwrap();
        let parsed: RerankRequest = serde_json::from_str(&json).unwrap();
        assert_eq!(parsed.query, req.query);
        assert_eq!(parsed.documents, req.documents);
        assert_eq!(parsed.top_n, Some(1));
        assert_eq!(
            parsed.instruction.as_deref(),
            Some("retrieve passages relevant to the query")
        );
    }

    #[test]
    fn result_serializes_without_null_model_used() {
        let result = RerankResult {
            ranked: vec![RerankedDocument {
                index: 0,
                score: 0.97,
                document: "doc".into(),
            }],
            model_used: None,
        };
        let json = serde_json::to_string(&result).unwrap();
        assert!(
            !json.contains("model_used"),
            "None should be omitted, not null"
        );
        assert!(json.contains("\"index\":0"));
        assert!(json.contains("\"score\":0.97"));
    }
}