Skip to main content

sqlrite_ask/provider/
mod.rs

1//! LLM provider abstraction.
2//!
3//! `Provider` is the trait every backend implements. Today there's one
4//! production impl ([`anthropic::AnthropicProvider`]) and one test impl
5//! ([`MockProvider`]). OpenAI and Ollama follow-ups will plug in here
6//! without touching the rest of the crate (per Phase 7 plan Q4 —
7//! Anthropic-first, others later).
8//!
9//! The trait is deliberately narrow: one method, sync, one prompt
10//! shape in, one parsed response out. Schema-aware prompt construction
11//! lives one layer up in `crate::prompt`, so providers stay generic
12//! over what's being asked.
13
14use crate::AskError;
15use crate::prompt::{SystemBlock, UserMessage};
16
17// Anthropic adapter is HTTP-based (sync ureq POST). The `http`
18// feature gates the whole module so wasm32 builds — which can't
19// link ureq + rustls — skip it cleanly. The WASM SDK uses Q9's
20// JS-callback shape (caller does the HTTP from JS) and never
21// instantiates this provider.
22#[cfg(feature = "http")]
23pub mod anthropic;
24
25#[cfg(test)]
26mod mock;
27#[cfg(test)]
28pub(crate) use mock::MockProvider;
29
30/// One LLM call's worth of input. Mirrors the Anthropic Messages
31/// request shape because it's the most expressive of the three
32/// providers we'll support; OpenAI and Ollama adapters convert to
33/// their native shapes inside their own `complete` impls.
34pub struct Request<'a> {
35    pub model: &'a str,
36    pub max_tokens: u32,
37    pub system: &'a [SystemBlock],
38    pub messages: &'a [UserMessage],
39}
40
41/// What every provider returns. We keep this minimal — `text` is the
42/// raw string the model produced (the caller parses it), `usage`
43/// surfaces token counts so callers can verify cache hits.
44pub struct Response {
45    /// The raw text content of the assistant's reply. Caller is
46    /// responsible for JSON-parsing it (per the prompt template, this
47    /// will be `{"sql": "...", "explanation": "..."}` on success).
48    pub text: String,
49    pub usage: Usage,
50}
51
52/// Token-usage breakdown. Names match Anthropic's API field names so
53/// the mapping stays obvious; OpenAI's `prompt_tokens` /
54/// `completion_tokens` will fan into `input_tokens` / `output_tokens`
55/// when that adapter lands.
56///
57/// **Verifying cache hits:** if `cache_read_input_tokens` is zero
58/// across repeated `ask()` calls with the same schema, something in
59/// the prefix is invalidating the cache (a silent invalidator —
60/// `datetime.now()` in a system block, varying tool list, etc.).
61#[derive(Debug, Clone, Default)]
62pub struct Usage {
63    pub input_tokens: u64,
64    pub output_tokens: u64,
65    pub cache_creation_input_tokens: u64,
66    pub cache_read_input_tokens: u64,
67}
68
69/// A single one-shot call. Sync because every supported provider has
70/// a sync HTTPS entry point and `ask()` itself is sync (matches the
71/// engine's surface — `Connection::execute` etc. are all sync).
72pub trait Provider {
73    fn complete(&self, req: Request<'_>) -> Result<Response, AskError>;
74}