objectiveai_sdk/data_url.rs
1//! Data-URL parsing utility shared across the SDK.
2
3/// Parses a data URL, returning `(full_mime, base64_payload)`.
4///
5/// Expects the format `data:{type}/{subtype};base64,{payload}` and
6/// validates that **the entire input is a single data URL** — the
7/// payload must be exclusively standard base64 characters
8/// (`[A-Za-z0-9+/=]`) and the mime carries no whitespace. That
9/// rules out strings where a data-URL prefix is followed by
10/// unrelated text (e.g. a tool output that happens to start with
11/// `data:image/png;base64,XYZ\nfollowed by prose…`); those round-
12/// trip as `None` so callers reliably pass them through as text.
13///
14/// Returns `None` for:
15/// - Strings missing the `data:` prefix (including any leading
16/// whitespace or content before it).
17/// - Strings missing the `;base64,` marker.
18/// - Strings whose payload contains anything outside the standard
19/// base64 alphabet (newlines, spaces, trailing prose, etc.).
20/// - Strings whose mime portion contains ASCII whitespace.
21///
22/// `#[inline]` because this is on the hot path of every MCP
23/// content-block conversion (`From<ContentBlock>`,
24/// `RichContentPart::from_text_or_data_url`, and the per-leaf
25/// `file_content` extraction during log writes) and the body is
26/// a handful of cheap string ops — the call overhead would be a
27/// measurable fraction of the work.
28#[inline]
29pub fn parse_data_url(url: &str) -> Option<(&str, &str)> {
30 let rest = url.strip_prefix("data:")?;
31 let (mime, payload) = rest.split_once(";base64,")?;
32 if mime.bytes().any(|b| b.is_ascii_whitespace()) {
33 return None;
34 }
35 if !payload.bytes().all(is_base64_byte) {
36 return None;
37 }
38 Some((mime, payload))
39}
40
41/// True for bytes in the standard base64 alphabet
42/// (`[A-Za-z0-9+/=]`). Excludes whitespace and every URL-safe or
43/// padding-variant character — data URLs are required to use the
44/// standard alphabet, and rejecting anything else is exactly what
45/// makes [`parse_data_url`] refuse to swallow non-data-URL content.
46#[inline]
47fn is_base64_byte(b: u8) -> bool {
48 matches!(b, b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'+' | b'/' | b'=')
49}