chat_applefm/
client.rs

1use std::path::PathBuf;
2use std::sync::Arc;
3
4use chat_core::types::provider_meta::ProviderMeta;
5
6use crate::api::types::request::{ConvoEntry, TurnPlan, hash_convo};
7use crate::ffi;
8
9/// Decoding strategy for the on-device model — the full set
10/// FoundationModels exposes via `GenerationOptions.SamplingMode`.
11#[derive(Debug, Clone, Copy, PartialEq)]
12pub enum Sampling {
13    /// Deterministic decoding.
14    Greedy,
15    /// Sample among the `k` most probable tokens.
16    TopK { k: u32, seed: Option<u64> },
17    /// Nucleus sampling: sample within the smallest set of tokens whose
18    /// cumulative probability reaches `p`.
19    TopP { p: f64, seed: Option<u64> },
20}
21
22/// Model wiring baked in by the builder. Conversation concerns —
23/// including system prompts — live in `Messages`, not here. Everything
24/// below `lora` is a default; per-call `ChatOptions` override it.
25#[derive(Debug, Default)]
26pub(crate) struct Config {
27    /// Path to a `.fmadapter` package — a LoRA trained with Apple's
28    /// adapter training toolkit, applied over the on-device base model.
29    pub(crate) lora: Option<PathBuf>,
30    pub(crate) temperature: Option<f64>,
31    pub(crate) max_tokens: Option<u32>,
32    pub(crate) sampling: Option<Sampling>,
33}
34
35/// Owned handle to a live bridge session; releases it on drop.
36#[derive(Debug)]
37struct SessionHandle(u64);
38
39impl Drop for SessionHandle {
40    fn drop(&mut self) {
41        ffi::session_free(self.0);
42    }
43}
44
45#[derive(Debug)]
46struct SessionState {
47    handle: SessionHandle,
48    instructions_hash: u64,
49    /// Hash of every conversation entry the session has seen, including
50    /// model replies.
51    prefix_hash: u64,
52    prefix_len: usize,
53}
54
55/// The client's slot for a live bridge session, with the fingerprint
56/// that recognizes append-only continuations. All lifecycle transitions
57/// go through these methods; nothing else touches the state.
58#[derive(Debug, Default)]
59pub(crate) struct Session(Option<SessionState>);
60
61impl Session {
62    /// Decide how to run this turn: incremental prefill against the held
63    /// session, or tear down and rebuild. Reuse requires the same
64    /// instructions and a conversation that extends what the session has
65    /// seen by exactly one message.
66    pub(crate) fn plan(&self, instructions_hash: u64, convo: &[ConvoEntry]) -> TurnPlan {
67        match &self.0 {
68            Some(s)
69                if s.instructions_hash == instructions_hash
70                    && convo.len() == s.prefix_len + 1
71                    && hash_convo(&convo[..s.prefix_len]) == s.prefix_hash =>
72            {
73                TurnPlan::Reuse
74            }
75            _ => TurnPlan::Rebuild,
76        }
77    }
78
79    /// The live bridge session id, if any.
80    pub(crate) fn id(&self) -> Option<u64> {
81        self.0.as_ref().map(|s| s.handle.0)
82    }
83
84    /// Drop the held session (frees the bridge side). Called on rebuild
85    /// and whenever a turn errors — the bridge session may then hold a
86    /// half-applied turn, so the next call starts fresh.
87    pub(crate) fn invalidate(&mut self) {
88        self.0 = None;
89    }
90
91    /// Replace with a freshly created bridge session that has seen
92    /// nothing yet.
93    pub(crate) fn install(&mut self, id: u64, instructions_hash: u64) {
94        self.0 = Some(SessionState {
95            handle: SessionHandle(id),
96            instructions_hash,
97            prefix_hash: hash_convo(&[]),
98            prefix_len: 0,
99        });
100    }
101
102    /// Record that the session has now seen `convo` plus the model's
103    /// reply, so the next append-only turn plans as `Reuse`.
104    pub(crate) fn advance(&mut self, mut convo: Vec<ConvoEntry>, reply_text: String) {
105        if let Some(s) = &mut self.0 {
106            convo.push(ConvoEntry {
107                role: "assistant",
108                text: reply_text,
109            });
110            s.prefix_len = convo.len();
111            s.prefix_hash = hash_convo(&convo);
112        }
113    }
114}
115
116/// Client for the Apple Intelligence on-device foundation model.
117///
118/// There is no model slug and no API key: the OS owns the (one) model.
119/// What varies per client is the configuration — a LoRA adapter and
120/// decoding defaults.
121///
122/// The client holds a live session across turns: append-only
123/// conversations prefill only the newest message, which is what makes
124/// multi-turn chat fast. Clones share that session, so use one client
125/// per concurrent conversation.
126#[derive(Clone, Debug)]
127pub struct AppleFMClient {
128    pub(crate) config: Arc<Config>,
129    /// `Arc` because `ProviderMeta` is not `Clone`.
130    pub(crate) meta: Arc<ProviderMeta>,
131    /// Locked for the whole of each turn — serializes calls so the
132    /// bridge session is never used concurrently.
133    pub(crate) session: Arc<tokio::sync::Mutex<Session>>,
134}
135
136impl AppleFMClient {
137    /// Identifier used as the response `model_slug`: the base model name,
138    /// plus the adapter file stem when a LoRA is loaded.
139    pub fn model_slug(&self) -> String {
140        match self.config.lora.as_deref().and_then(|p| p.file_stem()) {
141            Some(stem) => format!("apple-on-device+{}", stem.to_string_lossy()),
142            None => "apple-on-device".to_owned(),
143        }
144    }
145
146    pub fn provider_meta(&self) -> &ProviderMeta {
147        &self.meta
148    }
149
150    /// Hint the OS to stage model resources for an upcoming turn.
151    ///
152    /// The runtime stages the model down between requests, so a turn that
153    /// follows an idle pause (a user typing, say) pays seconds of warm-up.
154    /// Call this when you can predict a turn is coming — on input focus,
155    /// when the user starts typing — and the warm-up overlaps the wait.
156    ///
157    /// Fire-and-forget and cheap: returns immediately, never fails, and
158    /// is skipped entirely if a turn is already in flight (the model is
159    /// active then anyway). Works before the first turn too.
160    pub fn prewarm(&self) {
161        if let Ok(session) = self.session.try_lock() {
162            let id = session.id().unwrap_or(0);
163            drop(session);
164            ffi::prewarm(id);
165        }
166    }
167
168    /// Fill the metadata the bridge can't: wall-clock duration and
169    /// creation time measured around the call, the active LoRA adapter,
170    /// and whether the turn reused the session (`prefill:
171    /// "incremental"`) or rebuilt it (`prefill: "full"`). Token usage
172    /// stays zero — Apple's FoundationModels API does not expose token
173    /// counts.
174    pub(crate) fn enrich_metadata(
175        &self,
176        metadata: &mut chat_core::types::metadata::Metadata,
177        elapsed: std::time::Duration,
178        reused_session: bool,
179    ) {
180        metadata.duration_ms = Some(elapsed.as_millis() as u64);
181        metadata.created_at = std::time::SystemTime::now()
182            .duration_since(std::time::UNIX_EPOCH)
183            .ok()
184            .map(|d| d.as_secs());
185        metadata.provider_specific.insert(
186            "prefill".to_owned(),
187            serde_json::Value::String(
188                if reused_session {
189                    "incremental"
190                } else {
191                    "full"
192                }
193                .to_owned(),
194            ),
195        );
196        if let Some(lora) = &self.config.lora {
197            metadata.provider_specific.insert(
198                "lora".to_owned(),
199                serde_json::Value::String(lora.to_string_lossy().into_owned()),
200            );
201        }
202    }
203}
204
205#[cfg(test)]
206mod tests {
207    use super::*;
208    use crate::api::types::request::hash_instructions;
209
210    fn entry(role: &'static str, text: &str) -> ConvoEntry {
211        ConvoEntry {
212            role,
213            text: text.to_owned(),
214        }
215    }
216
217    #[test]
218    fn session_lifecycle_plans_correctly() {
219        let instructions_hash = hash_instructions(Some("sys"));
220        let mut session = Session::default();
221
222        // No session yet → rebuild.
223        let convo1 = vec![entry("user", "hi")];
224        assert_eq!(session.plan(instructions_hash, &convo1), TurnPlan::Rebuild);
225
226        // Simulate the first turn: install, then advance with the reply.
227        session.install(1, instructions_hash);
228        session.advance(convo1, "yo".to_owned());
229
230        // Append-only next turn → reuse.
231        let convo2 = vec![
232            entry("user", "hi"),
233            entry("assistant", "yo"),
234            entry("user", "how are you?"),
235        ];
236        assert_eq!(session.plan(instructions_hash, &convo2), TurnPlan::Reuse);
237
238        // Edited history → rebuild.
239        let edited = vec![
240            entry("user", "hi EDITED"),
241            entry("assistant", "yo"),
242            entry("user", "how are you?"),
243        ];
244        assert_eq!(session.plan(instructions_hash, &edited), TurnPlan::Rebuild);
245
246        // Changed instructions → rebuild.
247        assert_eq!(
248            session.plan(hash_instructions(Some("other")), &convo2),
249            TurnPlan::Rebuild
250        );
251
252        // Invalidated → rebuild.
253        session.invalidate();
254        assert_eq!(session.id(), None);
255        assert_eq!(session.plan(instructions_hash, &convo2), TurnPlan::Rebuild);
256    }
257}
chat_applefm/client.rs

chat_applefm/
client.rs