apple-intelligence-models 0.2.0

AIMX: safe Rust bindings for Apple's on-device Apple Intelligence language models
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
import Foundation

#if canImport(FoundationModels)
import FoundationModels
#endif

// ─── Availability reason codes (must stay in sync with lib.rs) ───────────────
private let FM_AVAILABLE: Int32 = 0
private let FM_DEVICE_NOT_ELIGIBLE: Int32 = 1
private let FM_NOT_ENABLED: Int32 = 2
private let FM_MODEL_NOT_READY: Int32 = 3
private let FM_UNKNOWN: Int32 = 4

// ─── Callback type aliases ────────────────────────────────────────────────────

/// Called once with the result or an error string. Exactly one of `result` / `error` is non-nil.
typealias ResultCallback = @convention(c) (
    UnsafeMutableRawPointer?,  // Rust context pointer (passed back to caller)
    UnsafePointer<CChar>?,     // result (nil on error)
    UnsafePointer<CChar>?      // error  (nil on success)
) -> Void

/// Called for each streaming token/snapshot.
typealias TokenCallback = @convention(c) (
    UnsafeMutableRawPointer?,  // Rust context pointer
    UnsafePointer<CChar>       // token text (never nil)
) -> Void

/// Called once when streaming finishes. `error` is nil on success.
typealias DoneCallback = @convention(c) (
    UnsafeMutableRawPointer?,  // Rust context pointer
    UnsafePointer<CChar>?      // error message (nil on success)
) -> Void

/// Called by Swift to dispatch a tool invocation to Rust. Rust must call `resultCb` exactly once
/// (synchronously) to deliver the tool result before returning.
typealias ToolDispatchCallback = @convention(c) (
    UnsafeMutableRawPointer?,  // tool_ctx (Rust ToolsContext raw ptr)
    UnsafePointer<CChar>,      // tool name
    UnsafePointer<CChar>,      // arguments as JSON string
    UnsafeMutableRawPointer?,  // result_ctx passed back to resultCb
    ResultCallback             // write result / error here
) -> Void

// ─── Private JSON schema description types ────────────────────────────────────
// These decode the schema JSON that Rust passes for structured generation.

private struct SchemaPropertyDesc: Decodable {
    let name: String
    let description: String?
    let type: String
    let optional: Bool
}

private struct SchemaDesc: Decodable {
    let name: String
    let description: String?
    let properties: [SchemaPropertyDesc]
}

// ─── Session holder ───────────────────────────────────────────────────────────

/// ARC-managed wrapper around LanguageModelSession.
/// Only instantiated on macOS 26+; the guard in every @_cdecl function ensures this.
@available(macOS 26.0, *)
private final class SessionHolder {
    let session: LanguageModelSession

    init(_ session: LanguageModelSession) {
        self.session = session
    }
}

// ─── Tool support ─────────────────────────────────────────────────────────────

/// Holds the result of a synchronous tool call dispatched to Rust.
private final class ToolCallResult {
    var result: String?
    var error: String?
}

/// Static C-compatible callback written to by Rust's `tool_dispatch` after executing a tool.
/// `ctx` is an unretained `ToolCallResult` pointer allocated by `DynamicTool.call`.
private let writeToolCallResult: ResultCallback = { ctx, result, error in
    guard let ctx = ctx else { return }
    let holder = Unmanaged<ToolCallResult>.fromOpaque(ctx).takeUnretainedValue()
    if let result = result {
        holder.result = String(cString: result)
    } else if let error = error {
        holder.error = String(cString: error)
    }
}

/// A `Tool` implementation that forwards calls to a Rust `ToolsContext` via a C callback.
@available(macOS 26.0, *)
private final class DynamicTool: Tool {
    typealias Arguments = GeneratedContent
    typealias Output = String

    let name: String
    let description: String
    let parameters: GenerationSchema
    private let toolCtx: UnsafeMutableRawPointer?
    private let dispatch: ToolDispatchCallback

    init(
        name: String,
        description: String,
        schema: GenerationSchema,
        ctx: UnsafeMutableRawPointer?,
        dispatch: ToolDispatchCallback
    ) {
        self.name = name
        self.description = description
        self.parameters = schema
        self.toolCtx = ctx
        self.dispatch = dispatch
    }

    func call(arguments: GeneratedContent) async throws -> String {
        let argsJson = contentToJson(arguments)
        let resultHolder = ToolCallResult()
        // Use unretained — resultHolder is alive on the stack for the duration of the call.
        let resultHolderPtr = Unmanaged.passUnretained(resultHolder).toOpaque()
        name.withCString { namePtr in
            argsJson.withCString { argsPtr in
                dispatch(toolCtx, namePtr, argsPtr, resultHolderPtr, writeToolCallResult)
            }
        }
        if let error = resultHolder.error {
            throw ToolDispatchError(message: error)
        }
        return resultHolder.result ?? ""
    }
}

private struct ToolDispatchError: Error {
    let message: String
}

// ─── Helper: GeneratedContent → JSON string ───────────────────────────────────

/// Serialises a `GeneratedContent` value to a JSON string.
/// `Kind` cases: null, bool, number (f64), string, array, structure.
@available(macOS 26.0, *)
private func contentToJson(_ content: GeneratedContent) -> String {
    switch content.kind {
    case .null:
        return "null"
    case .bool(let b):
        return b ? "true" : "false"
    case .number(let n):
        // Emit as integer when the value is a whole number to keep JSON tidy.
        if !n.isInfinite && !n.isNaN && n.truncatingRemainder(dividingBy: 1) == 0 {
            return String(Int64(n))
        }
        return String(n)
    case .string(let s):
        return jsonQuote(s)
    case .array(let elements):
        let items = elements.map { contentToJson($0) }.joined(separator: ",")
        return "[\(items)]"
    case .structure(let props, let orderedKeys):
        let pairs = orderedKeys.compactMap { key -> String? in
            guard let value = props[key] else { return nil }
            return "\(jsonQuote(key)):\(contentToJson(value))"
        }.joined(separator: ",")
        return "{\(pairs)}"
    @unknown default:
        return "null"
    }
}

/// Returns a JSON-encoded string literal (double-quoted, with necessary escapes).
private func jsonQuote(_ s: String) -> String {
    var out = "\""
    for scalar in s.unicodeScalars {
        switch scalar.value {
        case 0x22: out += "\\\""
        case 0x5C: out += "\\\\"
        case 0x0A: out += "\\n"
        case 0x0D: out += "\\r"
        case 0x09: out += "\\t"
        case 0x00...0x1F: out += String(format: "\\u%04X", scalar.value)
        default:   out += String(scalar)
        }
    }
    out += "\""
    return out
}

// ─── Schema building helper ───────────────────────────────────────────────────

/// Builds a `DynamicGenerationSchema.Property` from a decoded property descriptor.
/// Returns `nil` for unknown type strings.
@available(macOS 26.0, *)
private func buildProperty(_ prop: SchemaPropertyDesc) -> DynamicGenerationSchema.Property? {
    let propSchema: DynamicGenerationSchema
    switch prop.type {
    case "string":  propSchema = DynamicGenerationSchema(type: String.self)
    case "integer": propSchema = DynamicGenerationSchema(type: Int.self)
    case "double":  propSchema = DynamicGenerationSchema(type: Double.self)
    case "bool":    propSchema = DynamicGenerationSchema(type: Bool.self)
    default:        return nil
    }
    return DynamicGenerationSchema.Property(
        name: prop.name,
        description: prop.description,
        schema: propSchema,
        isOptional: prop.optional
    )
}

/// Builds a `GenerationSchema` from a decoded `SchemaDesc`. Returns `nil` on failure.
@available(macOS 26.0, *)
private func buildGenerationSchema(_ desc: SchemaDesc) -> GenerationSchema? {
    let props = desc.properties.compactMap { buildProperty($0) }
    let dynSchema = DynamicGenerationSchema(
        name: desc.name,
        description: desc.description,
        properties: props
    )
    return try? GenerationSchema(root: dynSchema, dependencies: [])
}

// ─── Availability ─────────────────────────────────────────────────────────────

/// Returns FM_AVAILABLE if Apple Intelligence is ready, otherwise an FM_* reason code.
@_cdecl("fm_availability_reason")
func availabilityReason() -> Int32 {
    #if canImport(FoundationModels)
    guard #available(macOS 26.0, *) else { return FM_DEVICE_NOT_ELIGIBLE }
    switch SystemLanguageModel.default.availability {
    case .available:
        return FM_AVAILABLE
    case .unavailable(let reason):
        switch reason {
        case .deviceNotEligible:           return FM_DEVICE_NOT_ELIGIBLE
        case .appleIntelligenceNotEnabled: return FM_NOT_ENABLED
        case .modelNotReady:               return FM_MODEL_NOT_READY
        @unknown default:                  return FM_UNKNOWN
        }
    }
    #else
    return FM_DEVICE_NOT_ELIGIBLE
    #endif
}

// ─── Session lifecycle ────────────────────────────────────────────────────────

/// Creates a new LanguageModelSession with the given system instructions.
/// Returns an opaque pointer to an ARC-retained SessionHolder, or NULL on failure.
@_cdecl("fm_session_create")
func sessionCreate(instructionsPtr: UnsafePointer<CChar>) -> UnsafeMutableRawPointer? {
    #if canImport(FoundationModels)
    guard #available(macOS 26.0, *) else { return nil }
    let session = LanguageModelSession(instructions: String(cString: instructionsPtr))
    return Unmanaged.passRetained(SessionHolder(session)).toOpaque()
    #else
    return nil
    #endif
}

/// Creates a session pre-loaded with tools defined by `toolsJsonPtr` (a JSON array of
/// `{"name","description","properties":[{"name","type","description","optional"}]}` objects).
/// `toolCtx` and `toolDispatch` are forwarded to each `DynamicTool` so it can call back into Rust.
@_cdecl("fm_session_create_with_tools")
func sessionCreateWithTools(
    instructionsPtr: UnsafePointer<CChar>,
    toolsJsonPtr: UnsafePointer<CChar>,
    toolCtx: UnsafeMutableRawPointer?,
    toolDispatch: ToolDispatchCallback
) -> UnsafeMutableRawPointer? {
    #if canImport(FoundationModels)
    guard #available(macOS 26.0, *) else { return nil }

    let instructions = String(cString: instructionsPtr)
    let toolsJson = String(cString: toolsJsonPtr)

    guard
        let data = toolsJson.data(using: .utf8),
        let toolDescs = try? JSONDecoder().decode([SchemaDesc].self, from: data)
    else { return nil }

    let tools: [any Tool] = toolDescs.compactMap { desc -> DynamicTool? in
        guard let schema = buildGenerationSchema(desc) else { return nil }
        return DynamicTool(
            name: desc.name,
            description: desc.description ?? "",
            schema: schema,
            ctx: toolCtx,
            dispatch: toolDispatch
        )
    }

    let session = LanguageModelSession(tools: tools, instructions: instructions)
    return Unmanaged.passRetained(SessionHolder(session)).toOpaque()
    #else
    return nil
    #endif
}

/// Releases the ARC-retained SessionHolder created by fm_session_create / fm_session_create_with_tools.
/// Must be called exactly once per handle.
@_cdecl("fm_session_destroy")
func sessionDestroy(handlePtr: UnsafeMutableRawPointer) {
    #if canImport(FoundationModels)
    guard #available(macOS 26.0, *) else { return }
    Unmanaged<SessionHolder>.fromOpaque(handlePtr).release()
    #endif
}

// ─── Single-shot response ─────────────────────────────────────────────────────

/// Sends a prompt to the model and calls `callback` exactly once when done.
///
/// - `temperature`: generation temperature in [0.0, 2.0]. Pass -1.0 to use the model default.
/// - `maxTokens`:   maximum response tokens. Pass -1 to use the model default.
@_cdecl("fm_session_respond")
func sessionRespond(
    handlePtr: UnsafeMutableRawPointer,
    promptPtr: UnsafePointer<CChar>,
    temperature: Double,
    maxTokens: Int64,
    callbackCtx: UnsafeMutableRawPointer?,
    callback: ResultCallback
) {
    #if canImport(FoundationModels)
    guard #available(macOS 26.0, *) else {
        "Apple Intelligence requires macOS 26 or later".withCString { callback(callbackCtx, nil, $0) }
        return
    }

    let holder = Unmanaged<SessionHolder>.fromOpaque(handlePtr).takeUnretainedValue()
    let prompt = String(cString: promptPtr)
    var options = GenerationOptions()
    if temperature >= 0.0 { options.temperature = temperature }
    if maxTokens >= 0     { options.maximumResponseTokens = Int(maxTokens) }

    Task {
        do {
            let response = try await holder.session.respond(to: prompt, options: options)
            response.content.withCString { callback(callbackCtx, $0, nil) }
        } catch {
            error.localizedDescription.withCString { callback(callbackCtx, nil, $0) }
        }
    }
    #else
    "FoundationModels framework not available in this build".withCString { callback(callbackCtx, nil, $0) }
    #endif
}

// ─── Structured generation ────────────────────────────────────────────────────

/// Like `fm_session_respond` but constrains the output to the JSON schema described by
/// `schemaJsonPtr`. The callback receives the model output serialised as a JSON string.
///
/// `schemaJsonPtr` must be a UTF-8 JSON object:
/// `{"name":"T","description":"...","properties":[{"name":"x","type":"string","description":"...","optional":false}]}`
/// Supported types: `"string"`, `"integer"`, `"double"`, `"bool"`.
@_cdecl("fm_session_respond_structured")
func sessionRespondStructured(
    handlePtr: UnsafeMutableRawPointer,
    promptPtr: UnsafePointer<CChar>,
    schemaJsonPtr: UnsafePointer<CChar>,
    temperature: Double,
    maxTokens: Int64,
    callbackCtx: UnsafeMutableRawPointer?,
    callback: ResultCallback
) {
    #if canImport(FoundationModels)
    guard #available(macOS 26.0, *) else {
        "Apple Intelligence requires macOS 26 or later".withCString { callback(callbackCtx, nil, $0) }
        return
    }

    let holder = Unmanaged<SessionHolder>.fromOpaque(handlePtr).takeUnretainedValue()
    let prompt = String(cString: promptPtr)
    let schemaJson = String(cString: schemaJsonPtr)

    guard
        let schemaData = schemaJson.data(using: .utf8),
        let schemaDesc = try? JSONDecoder().decode(SchemaDesc.self, from: schemaData),
        let genSchema = buildGenerationSchema(schemaDesc)
    else {
        "Invalid or unsupported schema JSON".withCString { callback(callbackCtx, nil, $0) }
        return
    }

    var options = GenerationOptions()
    if temperature >= 0.0 { options.temperature = temperature }
    if maxTokens >= 0     { options.maximumResponseTokens = Int(maxTokens) }

    Task {
        do {
            let response = try await holder.session.respond(to: prompt, schema: genSchema, options: options)
            let json = contentToJson(response.content)
            json.withCString { callback(callbackCtx, $0, nil) }
        } catch {
            error.localizedDescription.withCString { callback(callbackCtx, nil, $0) }
        }
    }
    #else
    "FoundationModels framework not available in this build".withCString { callback(callbackCtx, nil, $0) }
    #endif
}

// ─── Streaming response ───────────────────────────────────────────────────────

/// Streams the model response, calling `onToken` for each text chunk and `onDone` when finished.
///
/// Each `onToken` call delivers an incremental snapshot of the response. `onDone` is called
/// exactly once, with a non-nil error string on failure and nil on success.
/// After `onDone` returns, `callbackCtx` must not be used.
///
/// - `temperature`: generation temperature in [0.0, 2.0]. Pass -1.0 to use the model default.
/// - `maxTokens`:   maximum response tokens. Pass -1 to use the model default.
@_cdecl("fm_session_stream")
func sessionStream(
    handlePtr: UnsafeMutableRawPointer,
    promptPtr: UnsafePointer<CChar>,
    temperature: Double,
    maxTokens: Int64,
    callbackCtx: UnsafeMutableRawPointer?,
    onToken: TokenCallback,
    onDone: DoneCallback
) {
    #if canImport(FoundationModels)
    guard #available(macOS 26.0, *) else {
        "Apple Intelligence requires macOS 26 or later".withCString { onDone(callbackCtx, $0) }
        return
    }

    let holder = Unmanaged<SessionHolder>.fromOpaque(handlePtr).takeUnretainedValue()
    let prompt = String(cString: promptPtr)
    var options = GenerationOptions()
    if temperature >= 0.0 { options.temperature = temperature }
    if maxTokens >= 0     { options.maximumResponseTokens = Int(maxTokens) }

    Task {
        do {
            for try await chunk in holder.session.streamResponse(to: prompt, options: options) {
                chunk.content.withCString { onToken(callbackCtx, $0) }
            }
            onDone(callbackCtx, nil)
        } catch {
            error.localizedDescription.withCString { onDone(callbackCtx, $0) }
        }
    }
    #else
    "FoundationModels framework not available in this build".withCString { onDone(callbackCtx, $0) }
    #endif
}