Skip to main content

entelix_core/ir/
source.rs

1//! `MediaSource` and `CitationSource` — provenance for multimodal and
2//! citation [`ContentPart`](crate::ir::ContentPart) variants.
3//!
4//! `MediaSource` is shared across every modality (`Image / Audio / Video /
5//! Document`) so codecs can route on a single shape regardless of media
6//! type. `CitationSource` is the lean common subset every vendor returns
7//! for grounding outputs — vendor-specific positioning stays out of IR
8//! per the 2-codec rule.
9
10use serde::{Deserialize, Serialize};
11
12/// How a media payload is delivered to the model.
13///
14/// Every codec routes on the variant alone, so adding a new modality
15/// ([`ContentPart::Audio`](crate::ir::ContentPart::Audio),
16/// [`ContentPart::Video`](crate::ir::ContentPart::Video) etc.) reuses the
17/// same source shape — no parallel `*Source` enums.
18///
19/// `media_type` is required on [`MediaSource::Base64`] (no other context
20/// to infer from) and optional on [`MediaSource::Url`] /
21/// [`MediaSource::FileId`] where URL extension or vendor metadata
22/// typically supplies it.
23#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
24#[serde(tag = "kind", rename_all = "snake_case")]
25#[non_exhaustive]
26pub enum MediaSource {
27    /// Hosted at an `https://` URL the vendor can fetch directly.
28    Url {
29        /// HTTPS URL.
30        url: String,
31        /// IANA media type (e.g. `image/png`, `audio/mpeg`). Optional —
32        /// the URL extension or HTTP `Content-Type` header normally
33        /// suffices for the vendor.
34        media_type: Option<String>,
35    },
36    /// Inlined as base64 bytes with a media type tag.
37    Base64 {
38        /// IANA media type — required.
39        media_type: String,
40        /// Base64-encoded payload bytes.
41        data: String,
42    },
43    /// Already uploaded via the vendor's Files API.
44    ///
45    /// Covers OpenAI Files (`file-…` IDs), Gemini File API (`files/…` IDs),
46    /// Anthropic file inputs.
47    FileId {
48        /// Vendor-assigned file identifier.
49        id: String,
50        /// IANA media type — optional (vendor metadata typically supplies).
51        media_type: Option<String>,
52    },
53}
54
55impl MediaSource {
56    /// Convenience constructor for a base64-inlined source.
57    #[must_use]
58    pub fn base64(media_type: impl Into<String>, data: impl Into<String>) -> Self {
59        Self::Base64 {
60            media_type: media_type.into(),
61            data: data.into(),
62        }
63    }
64
65    /// Convenience constructor for a URL source.
66    #[must_use]
67    pub fn url(url: impl Into<String>) -> Self {
68        Self::Url {
69            url: url.into(),
70            media_type: None,
71        }
72    }
73
74    /// Convenience constructor for a vendor file-id source.
75    #[must_use]
76    pub fn file_id(id: impl Into<String>) -> Self {
77        Self::FileId {
78            id: id.into(),
79            media_type: None,
80        }
81    }
82
83    /// Borrow the media type if known.
84    #[must_use]
85    pub fn media_type(&self) -> Option<&str> {
86        match self {
87            Self::Url { media_type, .. } | Self::FileId { media_type, .. } => media_type.as_deref(),
88            Self::Base64 { media_type, .. } => Some(media_type),
89        }
90    }
91}
92
93/// Provenance for a [`ContentPart::Citation`](crate::ir::ContentPart::Citation).
94///
95/// Two-variant lean union covering the common subset every vendor returns
96/// for grounding outputs. Vendor-specific positioning (start/end byte
97/// offsets, chunk indices) is *not* modeled — codecs that receive offsets
98/// emit a [`ModelWarning::LossyEncode`](crate::ir::ModelWarning::LossyEncode).
99#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
100#[serde(tag = "kind", rename_all = "snake_case")]
101#[non_exhaustive]
102pub enum CitationSource {
103    /// External web reference — covers Anthropic web-search citations,
104    /// OpenAI URL annotations, Gemini grounding-chunk URIs.
105    Url {
106        /// Cited URL.
107        url: String,
108        /// Document or page title (vendor-supplied if known).
109        title: Option<String>,
110    },
111    /// Reference to a [`ContentPart::Document`](crate::ir::ContentPart::Document)
112    /// supplied earlier in the request.
113    Document {
114        /// Index into the request's `Document` content blocks (0-based,
115        /// counted across the whole conversation).
116        document_index: u32,
117        /// Document title (vendor-supplied if known).
118        title: Option<String>,
119    },
120}