Skip to main content

nika_core/ast/
content.rs

1//! Content parts for multimodal vision support.
2//!
3//! Three-phase types following the AST pipeline convention:
4//! - `RawContentPart` — parsed from YAML with span tracking
5//! - `AnalyzedContentPart` — validated, spans stripped
6//! - `ContentPart` — runtime type used in `InferParams`
7//!
8//! # YAML Syntax
9//!
10//! ```yaml
11//! content:
12//!   - type: text
13//!     text: "Describe this image"
14//!   - type: image
15//!     source: "{{with.photo.media[0].hash}}"
16//!     detail: high
17//!   - type: image_url
18//!     url: "https://example.com/photo.jpg"
19//!     detail: low
20//! ```
21
22use serde::{Deserialize, Serialize};
23
24use crate::source::{Span, Spanned};
25
26// ═══════════════════════════════════════════════════════════════
27// Phase 1: Raw (from YAML parser, with spans)
28// ═══════════════════════════════════════════════════════════════
29
30/// Raw content part parsed from YAML with full span tracking.
31#[derive(Debug, Clone)]
32pub enum RawContentPart {
33    /// Text content: `{ type: text, text: "..." }`
34    Text { text: Spanned<String> },
35    /// CAS image reference: `{ type: image, source: "blake3:...", detail: auto }`
36    Image {
37        source: Spanned<String>,
38        detail: Option<Spanned<String>>,
39    },
40    /// External image URL: `{ type: image_url, url: "https://...", detail: low }`
41    ImageUrl {
42        url: Spanned<String>,
43        detail: Option<Spanned<String>>,
44    },
45}
46
47// ═══════════════════════════════════════════════════════════════
48// Phase 2: Analyzed (validated, spans stripped)
49// ═══════════════════════════════════════════════════════════════
50
51/// Analyzed content part — validated and span-free.
52#[derive(Debug, Clone)]
53pub enum AnalyzedContentPart {
54    Text { text: String },
55    Image { source: String, detail: ImageDetail },
56    ImageUrl { url: String, detail: ImageDetail },
57}
58
59// ═══════════════════════════════════════════════════════════════
60// Phase 3: Runtime (used in InferParams, serde-enabled)
61// ═══════════════════════════════════════════════════════════════
62
63/// Runtime content part for multimodal inference.
64///
65/// Used in `InferParams.content` to specify text + image parts
66/// sent to vision-capable LLMs.
67#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
68#[serde(tag = "type", rename_all = "snake_case")]
69pub enum ContentPart {
70    /// Text content
71    Text { text: String },
72    /// CAS image reference (resolved to base64 at runtime)
73    Image {
74        source: String,
75        #[serde(default)]
76        detail: ImageDetail,
77    },
78    /// External image URL (passed directly to provider)
79    ImageUrl {
80        url: String,
81        #[serde(default)]
82        detail: ImageDetail,
83    },
84}
85
86/// Image detail level for vision models.
87///
88/// - `Auto`: Let the model decide (default)
89/// - `Low`: Faster, cheaper, lower resolution
90/// - `High`: Full resolution analysis
91#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
92#[serde(rename_all = "lowercase")]
93pub enum ImageDetail {
94    #[default]
95    Auto,
96    Low,
97    High,
98}
99
100impl std::fmt::Display for ImageDetail {
101    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
102        match self {
103            ImageDetail::Auto => write!(f, "auto"),
104            ImageDetail::Low => write!(f, "low"),
105            ImageDetail::High => write!(f, "high"),
106        }
107    }
108}
109
110impl ImageDetail {
111    /// Parse from string, defaulting to Auto for unknown values.
112    pub fn from_str_lossy(s: &str) -> Self {
113        match s {
114            "low" => ImageDetail::Low,
115            "high" => ImageDetail::High,
116            _ => ImageDetail::Auto,
117        }
118    }
119}
120
121impl std::fmt::Display for ContentPart {
122    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
123        match self {
124            ContentPart::Text { text } => write!(f, "text({} chars)", text.len()),
125            ContentPart::Image { source, detail } => {
126                write!(f, "image(source={}, detail={})", source, detail)
127            }
128            ContentPart::ImageUrl { url, detail } => {
129                write!(f, "image_url(url={}, detail={})", url, detail)
130            }
131        }
132    }
133}
134
135/// Convert an analyzed content part to a runtime content part.
136impl From<AnalyzedContentPart> for ContentPart {
137    fn from(part: AnalyzedContentPart) -> Self {
138        match part {
139            AnalyzedContentPart::Text { text } => ContentPart::Text { text },
140            AnalyzedContentPart::Image { source, detail } => ContentPart::Image { source, detail },
141            AnalyzedContentPart::ImageUrl { url, detail } => ContentPart::ImageUrl { url, detail },
142        }
143    }
144}
145
146/// Convert a raw detail string to ImageDetail.
147pub fn parse_detail(detail: Option<&Spanned<String>>) -> ImageDetail {
148    detail
149        .map(|s| ImageDetail::from_str_lossy(&s.value))
150        .unwrap_or_default()
151}
152
153/// Analyze a raw content part into an analyzed content part.
154pub fn analyze_content_part(raw: &RawContentPart) -> AnalyzedContentPart {
155    match raw {
156        RawContentPart::Text { text } => AnalyzedContentPart::Text {
157            text: text.value.clone(),
158        },
159        RawContentPart::Image { source, detail } => AnalyzedContentPart::Image {
160            source: source.value.clone(),
161            detail: parse_detail(detail.as_ref()),
162        },
163        RawContentPart::ImageUrl { url, detail } => AnalyzedContentPart::ImageUrl {
164            url: url.value.clone(),
165            detail: parse_detail(detail.as_ref()),
166        },
167    }
168}
169
170/// Get the span of a raw content part (for error reporting).
171impl RawContentPart {
172    pub fn span(&self) -> Span {
173        match self {
174            RawContentPart::Text { text } => text.span,
175            RawContentPart::Image { source, .. } => source.span,
176            RawContentPart::ImageUrl { url, .. } => url.span,
177        }
178    }
179}
180
181#[cfg(test)]
182mod tests {
183    use super::*;
184
185    #[test]
186    fn content_part_serde_text_round_trip() {
187        let part = ContentPart::Text {
188            text: "Hello world".to_string(),
189        };
190        let json = serde_json::to_string(&part).unwrap();
191        let parsed: ContentPart = serde_json::from_str(&json).unwrap();
192        assert_eq!(part, parsed);
193        assert!(json.contains(r#""type":"text""#));
194    }
195
196    #[test]
197    fn content_part_serde_image_round_trip() {
198        let part = ContentPart::Image {
199            source: "blake3:abc123".to_string(),
200            detail: ImageDetail::High,
201        };
202        let json = serde_json::to_string(&part).unwrap();
203        let parsed: ContentPart = serde_json::from_str(&json).unwrap();
204        assert_eq!(part, parsed);
205        assert!(json.contains(r#""type":"image""#));
206        assert!(json.contains(r#""detail":"high""#));
207    }
208
209    #[test]
210    fn content_part_serde_image_url_round_trip() {
211        let part = ContentPart::ImageUrl {
212            url: "https://example.com/photo.jpg".to_string(),
213            detail: ImageDetail::Low,
214        };
215        let json = serde_json::to_string(&part).unwrap();
216        let parsed: ContentPart = serde_json::from_str(&json).unwrap();
217        assert_eq!(part, parsed);
218        assert!(json.contains(r#""type":"image_url""#));
219    }
220
221    #[test]
222    fn content_part_serde_default_detail() {
223        let json = r#"{"type":"image","source":"blake3:xyz"}"#;
224        let part: ContentPart = serde_json::from_str(json).unwrap();
225        match part {
226            ContentPart::Image { detail, .. } => assert_eq!(detail, ImageDetail::Auto),
227            _ => panic!("expected Image"),
228        }
229    }
230
231    #[test]
232    fn image_detail_display() {
233        assert_eq!(ImageDetail::Auto.to_string(), "auto");
234        assert_eq!(ImageDetail::Low.to_string(), "low");
235        assert_eq!(ImageDetail::High.to_string(), "high");
236    }
237
238    #[test]
239    fn image_detail_from_str_lossy() {
240        assert_eq!(ImageDetail::from_str_lossy("low"), ImageDetail::Low);
241        assert_eq!(ImageDetail::from_str_lossy("high"), ImageDetail::High);
242        assert_eq!(ImageDetail::from_str_lossy("auto"), ImageDetail::Auto);
243        assert_eq!(ImageDetail::from_str_lossy("unknown"), ImageDetail::Auto);
244        assert_eq!(ImageDetail::from_str_lossy(""), ImageDetail::Auto);
245    }
246
247    #[test]
248    fn image_detail_default_is_auto() {
249        assert_eq!(ImageDetail::default(), ImageDetail::Auto);
250    }
251
252    #[test]
253    fn content_part_display() {
254        let text = ContentPart::Text {
255            text: "hello".to_string(),
256        };
257        assert_eq!(text.to_string(), "text(5 chars)");
258
259        let img = ContentPart::Image {
260            source: "blake3:abc".to_string(),
261            detail: ImageDetail::High,
262        };
263        assert_eq!(img.to_string(), "image(source=blake3:abc, detail=high)");
264    }
265
266    #[test]
267    fn content_part_serde_vec_round_trip() {
268        let parts = vec![
269            ContentPart::Text {
270                text: "Describe this:".to_string(),
271            },
272            ContentPart::Image {
273                source: "blake3:deadbeef".to_string(),
274                detail: ImageDetail::High,
275            },
276            ContentPart::ImageUrl {
277                url: "https://example.com/img.png".to_string(),
278                detail: ImageDetail::Auto,
279            },
280        ];
281        let json = serde_json::to_string(&parts).unwrap();
282        let parsed: Vec<ContentPart> = serde_json::from_str(&json).unwrap();
283        assert_eq!(parts, parsed);
284    }
285
286    #[test]
287    fn analyze_content_part_text() {
288        let raw = RawContentPart::Text {
289            text: Spanned::dummy("hello".to_string()),
290        };
291        let analyzed = analyze_content_part(&raw);
292        match analyzed {
293            AnalyzedContentPart::Text { text } => assert_eq!(text, "hello"),
294            _ => panic!("expected Text"),
295        }
296    }
297
298    #[test]
299    fn analyze_content_part_image_with_detail() {
300        let raw = RawContentPart::Image {
301            source: Spanned::dummy("blake3:abc".to_string()),
302            detail: Some(Spanned::dummy("high".to_string())),
303        };
304        let analyzed = analyze_content_part(&raw);
305        match analyzed {
306            AnalyzedContentPart::Image { source, detail } => {
307                assert_eq!(source, "blake3:abc");
308                assert_eq!(detail, ImageDetail::High);
309            }
310            _ => panic!("expected Image"),
311        }
312    }
313
314    #[test]
315    fn analyze_content_part_image_no_detail_defaults_auto() {
316        let raw = RawContentPart::Image {
317            source: Spanned::dummy("blake3:xyz".to_string()),
318            detail: None,
319        };
320        let analyzed = analyze_content_part(&raw);
321        match analyzed {
322            AnalyzedContentPart::Image { detail, .. } => {
323                assert_eq!(detail, ImageDetail::Auto);
324            }
325            _ => panic!("expected Image"),
326        }
327    }
328
329    #[test]
330    fn analyzed_to_runtime_conversion() {
331        let analyzed = AnalyzedContentPart::Image {
332            source: "blake3:test".to_string(),
333            detail: ImageDetail::Low,
334        };
335        let runtime: ContentPart = analyzed.into();
336        assert_eq!(
337            runtime,
338            ContentPart::Image {
339                source: "blake3:test".to_string(),
340                detail: ImageDetail::Low,
341            }
342        );
343    }
344
345    #[test]
346    fn raw_content_part_span() {
347        let span = Span::new(crate::source::FileId(0), 10, 20);
348        let raw = RawContentPart::Text {
349            text: Spanned::new("test".to_string(), span),
350        };
351        assert_eq!(raw.span(), span);
352    }
353}