Skip to main content

turul_llm_core/
request.rs

1//! Request types passed to [`LlmClient::complete`](crate::client::LlmClient::complete).
2
3use serde::{Deserialize, Serialize};
4use serde_json::Value;
5
6/// A single LLM completion request.
7///
8/// The caller is responsible for rendering the prompt — the trait does
9/// not own the template surface. `output_schema` is optional: leaving it
10/// `None` means "free-form text" and the adapter will treat the response
11/// as a raw string wrapped into [`crate::response::CompletionResponse::parsed_output`].
12#[derive(Debug, Clone)]
13#[non_exhaustive]
14pub struct CompletionRequest {
15    /// Fully-rendered prompt string the model should consume as user
16    /// input. The trait deliberately carries a single string here
17    /// rather than a system/user split: providers that distinguish
18    /// roles can split the rendered prompt at a marker. If a future
19    /// provider needs role-tagged or multi-part input, the trait must
20    /// grow a structured input type rather than overloading this
21    /// field — `#[non_exhaustive]` on this struct keeps that path open.
22    pub rendered_prompt: String,
23
24    /// Optional JSON Schema 2020-12 document describing the expected
25    /// output shape. When present, adapters pass it through to the
26    /// provider's structured-output API (Ollama `format`, OpenAI
27    /// `response_format.json_schema.schema`, etc.). When absent the
28    /// model is unconstrained and the adapter wraps any returned text
29    /// into [`crate::response::CompletionResponse::parsed_output`] as a JSON string.
30    pub output_schema: Option<Value>,
31
32    /// Generation hints. All fields are optional; adapters apply each
33    /// hint only if the provider's API supports it.
34    pub execution_hints: ExecutionHints,
35}
36
37impl CompletionRequest {
38    /// Build a request with just a rendered prompt; no schema, default
39    /// hints. Use this for free-form text completion.
40    pub fn new(rendered_prompt: impl Into<String>) -> Self {
41        Self {
42            rendered_prompt: rendered_prompt.into(),
43            output_schema: None,
44            execution_hints: ExecutionHints::default(),
45        }
46    }
47
48    /// Builder-style setter for [`output_schema`](Self::output_schema).
49    pub fn with_output_schema(mut self, schema: Value) -> Self {
50        self.output_schema = Some(schema);
51        self
52    }
53
54    /// Builder-style setter for [`execution_hints`](Self::execution_hints).
55    pub fn with_execution_hints(mut self, hints: ExecutionHints) -> Self {
56        self.execution_hints = hints;
57        self
58    }
59}
60
61/// Optional generation hints. Adapters apply each field only if the
62/// provider's API supports it; unsupported fields are silently ignored.
63#[derive(Debug, Clone, Default, Serialize, Deserialize)]
64#[non_exhaustive]
65pub struct ExecutionHints {
66    /// Upper bound on tokens generated for the completion.
67    pub max_tokens: Option<u32>,
68
69    /// Sampling temperature. Conventional range is `0.0..=2.0`.
70    pub temperature: Option<f32>,
71
72    /// Nucleus sampling cutoff. Conventional range is `0.0..=1.0`.
73    pub top_p: Option<f32>,
74}
75
76impl ExecutionHints {
77    /// Construct an empty hints block — equivalent to
78    /// [`ExecutionHints::default`] but available in `const` contexts
79    /// and as a builder entry point.
80    pub fn new() -> Self {
81        Self {
82            max_tokens: None,
83            temperature: None,
84            top_p: None,
85        }
86    }
87
88    /// Builder-style setter for [`max_tokens`](Self::max_tokens).
89    pub fn with_max_tokens(mut self, max_tokens: u32) -> Self {
90        self.max_tokens = Some(max_tokens);
91        self
92    }
93
94    /// Builder-style setter for [`temperature`](Self::temperature).
95    pub fn with_temperature(mut self, temperature: f32) -> Self {
96        self.temperature = Some(temperature);
97        self
98    }
99
100    /// Builder-style setter for [`top_p`](Self::top_p).
101    pub fn with_top_p(mut self, top_p: f32) -> Self {
102        self.top_p = Some(top_p);
103        self
104    }
105
106    /// True iff every field is `None` — useful for adapters that want
107    /// to skip emitting the generation-options block entirely.
108    pub fn is_empty(&self) -> bool {
109        self.max_tokens.is_none() && self.temperature.is_none() && self.top_p.is_none()
110    }
111}