entelix_core/ir/source.rs
1//! `MediaSource` and `CitationSource` — provenance for multimodal and
2//! citation [`ContentPart`](crate::ir::ContentPart) variants.
3//!
4//! `MediaSource` is shared across every modality (`Image / Audio / Video /
5//! Document`) so codecs can route on a single shape regardless of media
6//! type. `CitationSource` is the lean common subset every vendor returns
7//! for grounding outputs — vendor-specific positioning stays out of IR
8//! per the 2-codec rule.
9
10use serde::{Deserialize, Serialize};
11
12/// How a media payload is delivered to the model.
13///
14/// Every codec routes on the variant alone, so adding a new modality
15/// ([`ContentPart::Audio`](crate::ir::ContentPart::Audio),
16/// [`ContentPart::Video`](crate::ir::ContentPart::Video) etc.) reuses the
17/// same source shape — no parallel `*Source` enums.
18///
19/// `media_type` is required on [`MediaSource::Base64`] (no other context
20/// to infer from) and optional on [`MediaSource::Url`] /
21/// [`MediaSource::FileId`] where URL extension or vendor metadata
22/// typically supplies it.
23#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
24#[serde(tag = "kind", rename_all = "snake_case")]
25#[non_exhaustive]
26pub enum MediaSource {
27 /// Hosted at an `https://` URL the vendor can fetch directly.
28 Url {
29 /// HTTPS URL.
30 url: String,
31 /// IANA media type (e.g. `image/png`, `audio/mpeg`). Optional —
32 /// the URL extension or HTTP `Content-Type` header normally
33 /// suffices for the vendor.
34 media_type: Option<String>,
35 },
36 /// Inlined as base64 bytes with a media type tag.
37 Base64 {
38 /// IANA media type — required.
39 media_type: String,
40 /// Base64-encoded payload bytes.
41 data: String,
42 },
43 /// Already uploaded via the vendor's Files API.
44 ///
45 /// Covers OpenAI Files (`file-…` IDs), Gemini File API (`files/…` IDs),
46 /// Anthropic file inputs.
47 FileId {
48 /// Vendor-assigned file identifier.
49 id: String,
50 /// IANA media type — optional (vendor metadata typically supplies).
51 media_type: Option<String>,
52 },
53}
54
55impl MediaSource {
56 /// Convenience constructor for a base64-inlined source.
57 #[must_use]
58 pub fn base64(media_type: impl Into<String>, data: impl Into<String>) -> Self {
59 Self::Base64 {
60 media_type: media_type.into(),
61 data: data.into(),
62 }
63 }
64
65 /// Convenience constructor for a URL source.
66 #[must_use]
67 pub fn url(url: impl Into<String>) -> Self {
68 Self::Url {
69 url: url.into(),
70 media_type: None,
71 }
72 }
73
74 /// Convenience constructor for a vendor file-id source.
75 #[must_use]
76 pub fn file_id(id: impl Into<String>) -> Self {
77 Self::FileId {
78 id: id.into(),
79 media_type: None,
80 }
81 }
82
83 /// Borrow the media type if known.
84 #[must_use]
85 pub fn media_type(&self) -> Option<&str> {
86 match self {
87 Self::Url { media_type, .. } | Self::FileId { media_type, .. } => media_type.as_deref(),
88 Self::Base64 { media_type, .. } => Some(media_type),
89 }
90 }
91}
92
93/// Provenance for a [`ContentPart::Citation`](crate::ir::ContentPart::Citation).
94///
95/// Two-variant lean union covering the common subset every vendor returns
96/// for grounding outputs. Vendor-specific positioning (start/end byte
97/// offsets, chunk indices) is *not* modeled — codecs that receive offsets
98/// emit a [`ModelWarning::LossyEncode`](crate::ir::ModelWarning::LossyEncode).
99#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
100#[serde(tag = "kind", rename_all = "snake_case")]
101#[non_exhaustive]
102pub enum CitationSource {
103 /// External web reference — covers Anthropic web-search citations,
104 /// OpenAI URL annotations, Gemini grounding-chunk URIs.
105 Url {
106 /// Cited URL.
107 url: String,
108 /// Document or page title (vendor-supplied if known).
109 title: Option<String>,
110 },
111 /// Reference to a [`ContentPart::Document`](crate::ir::ContentPart::Document)
112 /// supplied earlier in the request.
113 Document {
114 /// Index into the request's `Document` content blocks (0-based,
115 /// counted across the whole conversation).
116 document_index: u32,
117 /// Document title (vendor-supplied if known).
118 title: Option<String>,
119 },
120}