Skip to main content

sqlrite_ask/provider/
mod.rs

1//! LLM provider abstraction.
2//!
3//! `Provider` is the trait every backend implements. Today there's one
4//! production impl ([`anthropic::AnthropicProvider`]) and one test impl
5//! ([`MockProvider`]). OpenAI and Ollama follow-ups will plug in here
6//! without touching the rest of the crate (per Phase 7 plan Q4 —
7//! Anthropic-first, others later).
8//!
9//! The trait is deliberately narrow: one method, sync, one prompt
10//! shape in, one parsed response out. Schema-aware prompt construction
11//! lives one layer up in `crate::prompt`, so providers stay generic
12//! over what's being asked.
13
14use crate::AskError;
15use crate::prompt::{SystemBlock, UserMessage};
16
17pub mod anthropic;
18
19#[cfg(test)]
20mod mock;
21#[cfg(test)]
22pub(crate) use mock::MockProvider;
23
24/// One LLM call's worth of input. Mirrors the Anthropic Messages
25/// request shape because it's the most expressive of the three
26/// providers we'll support; OpenAI and Ollama adapters convert to
27/// their native shapes inside their own `complete` impls.
28pub struct Request<'a> {
29    pub model: &'a str,
30    pub max_tokens: u32,
31    pub system: &'a [SystemBlock],
32    pub messages: &'a [UserMessage],
33}
34
35/// What every provider returns. We keep this minimal — `text` is the
36/// raw string the model produced (the caller parses it), `usage`
37/// surfaces token counts so callers can verify cache hits.
38pub struct Response {
39    /// The raw text content of the assistant's reply. Caller is
40    /// responsible for JSON-parsing it (per the prompt template, this
41    /// will be `{"sql": "...", "explanation": "..."}` on success).
42    pub text: String,
43    pub usage: Usage,
44}
45
46/// Token-usage breakdown. Names match Anthropic's API field names so
47/// the mapping stays obvious; OpenAI's `prompt_tokens` /
48/// `completion_tokens` will fan into `input_tokens` / `output_tokens`
49/// when that adapter lands.
50///
51/// **Verifying cache hits:** if `cache_read_input_tokens` is zero
52/// across repeated `ask()` calls with the same schema, something in
53/// the prefix is invalidating the cache (a silent invalidator —
54/// `datetime.now()` in a system block, varying tool list, etc.).
55#[derive(Debug, Clone, Default)]
56pub struct Usage {
57    pub input_tokens: u64,
58    pub output_tokens: u64,
59    pub cache_creation_input_tokens: u64,
60    pub cache_read_input_tokens: u64,
61}
62
63/// A single one-shot call. Sync because every supported provider has
64/// a sync HTTPS entry point and `ask()` itself is sync (matches the
65/// engine's surface — `Connection::execute` etc. are all sync).
66pub trait Provider {
67    fn complete(&self, req: Request<'_>) -> Result<Response, AskError>;
68}