devboy_core/
asset.rs

1//! Asset management types for file attachments and content analysis.
2//!
3//! These types are shared across the provider layer and the `devboy-assets`
4//! crate, providing a unified abstraction for working with attached files
5//! (screenshots, logs, configs, etc.) across different providers.
6//!
7//! See ADR-010 for the full design rationale.
8
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11
12// =============================================================================
13// AssetContext
14// =============================================================================
15
16/// Context to which an asset is attached.
17///
18/// Different providers support attachments in different contexts — an issue
19/// body, an issue comment, a merge request, etc. This enum captures all the
20/// supported targets in a provider-agnostic way.
21#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
22#[serde(tag = "type", rename_all = "snake_case")]
23pub enum AssetContext {
24    /// Attachment on an issue body/description.
25    Issue {
26        /// Issue key (e.g. "DEV-123", "gitlab#42").
27        key: String,
28    },
29    /// Attachment on a comment under an issue.
30    IssueComment {
31        key: String,
32        /// Comment identifier within the issue.
33        comment_id: String,
34    },
35    /// Attachment on a merge request / pull request body.
36    MergeRequest {
37        /// MR / PR identifier. Named `mr_id` for consistency with
38        /// [`AssetContext::MrComment`] so JSON-wire field names are the
39        /// same across both variants.
40        mr_id: String,
41    },
42    /// Attachment on a comment/note of a merge request.
43    MrComment { mr_id: String, note_id: String },
44    /// Attachment from a messenger chat (Slack, Telegram, etc.).
45    Chat { chat_id: String, message_id: String },
46    /// Attachment from a knowledge base page (Confluence, etc.).
47    KbPage { page_id: String },
48}
49
50impl AssetContext {
51    /// Short colon-separated string for logging and debugging.
52    ///
53    /// **Note:** The cache directory layout is handled by
54    /// `devboy_assets::CacheManager::dir_for` / `path_for` — this method
55    /// is intentionally *not* used for on-disk paths.
56    ///
57    /// Examples:
58    /// - `issue:DEV-123`
59    /// - `mr:42`
60    /// - `chat:C0123ABC:msg42`
61    pub fn slug(&self) -> String {
62        match self {
63            AssetContext::Issue { key } => format!("issue:{key}"),
64            AssetContext::IssueComment { key, comment_id } => {
65                format!("issue:{key}:comment:{comment_id}")
66            }
67            AssetContext::MergeRequest { mr_id } => format!("mr:{mr_id}"),
68            AssetContext::MrComment { mr_id, note_id } => format!("mr:{mr_id}:note:{note_id}"),
69            AssetContext::Chat {
70                chat_id,
71                message_id,
72            } => format!("chat:{chat_id}:msg:{message_id}"),
73            AssetContext::KbPage { page_id } => format!("kb:{page_id}"),
74        }
75    }
76
77    /// Kind of the context (category used in enrichment and capabilities).
78    pub fn kind(&self) -> AssetContextKind {
79        match self {
80            AssetContext::Issue { .. } => AssetContextKind::Issue,
81            AssetContext::IssueComment { .. } => AssetContextKind::IssueComment,
82            AssetContext::MergeRequest { .. } => AssetContextKind::MergeRequest,
83            AssetContext::MrComment { .. } => AssetContextKind::MrComment,
84            AssetContext::Chat { .. } => AssetContextKind::Chat,
85            AssetContext::KbPage { .. } => AssetContextKind::KbPage,
86        }
87    }
88}
89
90/// Category of an [`AssetContext`] — used for capability lookup.
91#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
92#[serde(rename_all = "snake_case")]
93pub enum AssetContextKind {
94    /// Issue body / description.
95    Issue,
96    IssueComment,
97    /// Merge request / pull request body.
98    MergeRequest,
99    /// Comment / note on a merge request.
100    MrComment,
101    /// Messenger chat message.
102    Chat,
103    /// Knowledge base page.
104    KbPage,
105}
106
107// =============================================================================
108// AssetMeta / AssetInput
109// =============================================================================
110
111/// Metadata describing an asset — used for listings and enriched responses.
112///
113/// This type intentionally does NOT contain the file bytes; use
114/// [`AssetInput`] for uploads or a dedicated download method to fetch content.
115#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
116pub struct AssetMeta {
117    /// Stable identifier for the asset within devboy (UUID or provider id).
118    pub id: String,
119    pub filename: String,
120    /// MIME type (best-effort; may be `None` for unknown binaries).
121    #[serde(default, skip_serializing_if = "Option::is_none")]
122    pub mime_type: Option<String>,
123    /// File size in bytes if known.
124    #[serde(default, skip_serializing_if = "Option::is_none")]
125    pub size: Option<u64>,
126    /// Remote URL at the provider (if available).
127    #[serde(default, skip_serializing_if = "Option::is_none")]
128    pub url: Option<String>,
129    /// Creation timestamp (ISO 8601).
130    #[serde(default, skip_serializing_if = "Option::is_none")]
131    pub created_at: Option<String>,
132    /// Username / display name of the uploader if known.
133    #[serde(default, skip_serializing_if = "Option::is_none")]
134    pub author: Option<String>,
135    /// Whether the file is currently present in the local cache.
136    #[serde(default)]
137    pub cached: bool,
138    /// Absolute local path if the file is cached locally.
139    #[serde(default, skip_serializing_if = "Option::is_none")]
140    pub local_path: Option<String>,
141    /// SHA-256 checksum of the content if known.
142    #[serde(default, skip_serializing_if = "Option::is_none")]
143    pub checksum_sha256: Option<String>,
144    /// Result of analysis (Levels 1-2 built-in or Level 3 semantic).
145    #[serde(default, skip_serializing_if = "Option::is_none")]
146    pub analysis: Option<AssetAnalysis>,
147}
148
149/// Input data for uploading a new asset.
150///
151/// This type is part of the public `devboy_core::asset` API and is
152/// (de)serializable so it can cross crate and MCP tool boundaries. File
153/// bytes go through serde's default `Vec<u8>` encoding, which is a JSON
154/// array of numbers — MCP tools typically base64-encode the payload in a
155/// wrapper struct rather than serializing this type directly.
156#[derive(Debug, Clone, Serialize, Deserialize)]
157pub struct AssetInput {
158    /// Filename to use on the provider side.
159    pub filename: String,
160    /// Raw file bytes.
161    pub data: Vec<u8>,
162    /// Optional MIME type hint.
163    #[serde(default, skip_serializing_if = "Option::is_none")]
164    pub mime_type: Option<String>,
165}
166
167impl AssetInput {
168    /// Create a new input descriptor.
169    pub fn new(filename: impl Into<String>, data: Vec<u8>) -> Self {
170        Self {
171            filename: filename.into(),
172            data,
173            mime_type: None,
174        }
175    }
176
177    /// Attach a MIME type hint to the input.
178    pub fn with_mime_type(mut self, mime_type: impl Into<String>) -> Self {
179        self.mime_type = Some(mime_type.into());
180        self
181    }
182}
183
184// =============================================================================
185// Capabilities
186// =============================================================================
187
188/// Per-provider capability matrix for asset operations.
189///
190/// Each provider declares which CRUD operations it supports for each
191/// context kind. The values are used by the enricher to generate
192/// `asset_capabilities` entries in tool schemas so that agents can see in
193/// advance what operations are available.
194#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
195pub struct AssetCapabilities {
196    /// Capabilities for issue bodies.
197    #[serde(default)]
198    pub issue: ContextCapabilities,
199    #[serde(default)]
200    pub issue_comment: ContextCapabilities,
201    /// Capabilities for merge request bodies.
202    #[serde(default)]
203    pub merge_request: ContextCapabilities,
204    #[serde(default)]
205    pub mr_comment: ContextCapabilities,
206}
207
208impl AssetCapabilities {
209    /// Return the capabilities for a given context kind.
210    ///
211    /// Chat / KB contexts are out of scope for the initial provider set —
212    /// they return a shared empty capability set.
213    pub fn for_kind(&self, kind: AssetContextKind) -> &ContextCapabilities {
214        match kind {
215            AssetContextKind::Issue => &self.issue,
216            AssetContextKind::IssueComment => &self.issue_comment,
217            AssetContextKind::MergeRequest => &self.merge_request,
218            AssetContextKind::MrComment => &self.mr_comment,
219            AssetContextKind::Chat | AssetContextKind::KbPage => empty_context_capabilities(),
220        }
221    }
222}
223
224/// Shared sentinel used for unsupported context kinds.
225fn empty_context_capabilities() -> &'static ContextCapabilities {
226    static EMPTY: std::sync::OnceLock<ContextCapabilities> = std::sync::OnceLock::new();
227    EMPTY.get_or_init(ContextCapabilities::default)
228}
229
230/// CRUD capabilities for a single context kind.
231#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
232pub struct ContextCapabilities {
233    /// Whether uploading new attachments is supported.
234    #[serde(default)]
235    pub upload: bool,
236    /// Whether downloading attachments is supported.
237    #[serde(default)]
238    pub download: bool,
239    /// Whether deleting attachments is supported.
240    #[serde(default)]
241    pub delete: bool,
242    /// Whether listing attachments is supported.
243    #[serde(default)]
244    pub list: bool,
245    /// Max file size in bytes, if the provider advertises a limit.
246    #[serde(default, skip_serializing_if = "Option::is_none")]
247    pub max_file_size: Option<u64>,
248    /// Allowed MIME type patterns (e.g. `image/*`). Empty means any.
249    #[serde(default, skip_serializing_if = "Vec::is_empty")]
250    pub allowed_types: Vec<String>,
251}
252
253impl ContextCapabilities {
254    /// Convenience: all operations enabled with no type restrictions.
255    pub fn full() -> Self {
256        Self {
257            upload: true,
258            download: true,
259            delete: true,
260            list: true,
261            max_file_size: None,
262            allowed_types: Vec::new(),
263        }
264    }
265
266    /// Convenience: read-only (download + list).
267    pub fn read_only() -> Self {
268        Self {
269            upload: false,
270            download: true,
271            delete: false,
272            list: true,
273            max_file_size: None,
274            allowed_types: Vec::new(),
275        }
276    }
277}
278
279// =============================================================================
280// AssetAnalysis
281// =============================================================================
282
283/// Result of analyzing an asset through the processor pipeline.
284///
285/// Produced by Levels 1-2 (built-in processors, no LLM) and optionally
286/// enriched by Level 3 (semantic LLM analysis).
287#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
288pub struct AssetAnalysis {
289    /// Short human-readable summary for the agent (1-3 sentences).
290    pub summary: String,
291    pub content_kind: ContentKind,
292    /// Text extracted from the file if applicable (logs, configs).
293    #[serde(default, skip_serializing_if = "Option::is_none")]
294    pub extractable_text: Option<String>,
295    /// Key findings produced by built-in heuristics.
296    #[serde(default, skip_serializing_if = "Vec::is_empty")]
297    pub key_findings: Vec<String>,
298    /// Additional metadata (dimensions, duration, line counts, ...).
299    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
300    pub metadata: HashMap<String, serde_json::Value>,
301    /// Level 3 semantic analysis result, if available.
302    #[serde(default, skip_serializing_if = "Option::is_none")]
303    pub semantic: Option<SemanticAnalysis>,
304}
305
306/// Result of a Level 3 semantic (LLM-based) analysis.
307#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
308pub struct SemanticAnalysis {
309    /// Summary produced by the LLM.
310    pub summary: String,
311    /// Key findings identified by the LLM.
312    #[serde(default, skip_serializing_if = "Vec::is_empty")]
313    pub findings: Vec<String>,
314    /// Prompt used for the analysis (for caching and debugging).
315    pub prompt_used: String,
316    /// Model identifier used (e.g. "claude-sonnet-4").
317    pub model: String,
318    /// Whether this result was served from cache.
319    #[serde(default)]
320    pub cached: bool,
321}
322
323/// High-level kind of content stored in an asset.
324#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
325#[serde(rename_all = "snake_case")]
326pub enum ContentKind {
327    /// Text-based content (logs, plain text, source code).
328    Text,
329    /// Raster or vector image.
330    Image,
331    Video,
332    /// Document file (PDF, DOCX, ...).
333    Document,
334    /// Structured data (CSV, XLSX, JSON, YAML).
335    Data,
336    /// Binary content of an unknown kind.
337    #[default]
338    Binary,
339}
340
341// =============================================================================
342// Markdown parsing helpers
343// =============================================================================
344
345/// Extract attachments embedded in a markdown string.
346///
347/// Recognizes both image syntax (`![alt](url)`) and link syntax
348/// (`[text](url)`). The result is deduplicated by URL and returned in the
349/// order the references appear in the source. Inputs without any markdown
350/// links produce an empty vector.
351///
352/// This helper is used by providers like GitLab and GitHub that embed
353/// attachments directly into issue / MR bodies and comments rather than
354/// exposing a dedicated attachments API.
355///
356/// **No filtering is applied** — every `[text](url)` and `![alt](url)`
357/// reference is returned, including plain web links. Callers that only
358/// want downloadable files should filter by scheme, host, or file
359/// extension as appropriate for their provider. The extracted `filename`
360/// is derived from the markdown alt text / link text when available and
361/// falls back to the final path segment of the URL.
362pub fn parse_markdown_attachments(markdown: &str) -> Vec<MarkdownAttachment> {
363    let mut out: Vec<MarkdownAttachment> = Vec::new();
364    let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
365
366    let bytes = markdown.as_bytes();
367    let mut i = 0;
368    while i < bytes.len() {
369        // Look for `[` (link) or `![` (image).
370        let is_image = i + 1 < bytes.len() && bytes[i] == b'!' && bytes[i + 1] == b'[';
371        let is_link = bytes[i] == b'[';
372        if !is_image && !is_link {
373            i += 1;
374            continue;
375        }
376
377        let text_start = if is_image { i + 2 } else { i + 1 };
378        let Some(text_end_rel) = find_matching(&bytes[text_start..], b'[', b']') else {
379            i += 1;
380            continue;
381        };
382        let text_end = text_start + text_end_rel;
383
384        // Must be immediately followed by `(`.
385        if text_end + 1 >= bytes.len() || bytes[text_end + 1] != b'(' {
386            i = text_end + 1;
387            continue;
388        }
389        let url_start = text_end + 2;
390        let Some(url_end_rel) = find_matching(&bytes[url_start..], b'(', b')') else {
391            i = text_end + 1;
392            continue;
393        };
394        let url_end = url_start + url_end_rel;
395
396        let text = std::str::from_utf8(&bytes[text_start..text_end])
397            .unwrap_or("")
398            .trim()
399            .to_string();
400        let url_raw = std::str::from_utf8(&bytes[url_start..url_end])
401            .unwrap_or("")
402            .trim();
403        // Strip optional title: `[foo](url "title")`
404        let url = match url_raw.split_once(char::is_whitespace) {
405            Some((head, _tail)) => head.trim(),
406            None => url_raw,
407        };
408        // Handle angle-bracket wrapped URLs: `[text](<url>)`
409        let url = url
410            .strip_prefix('<')
411            .and_then(|s| s.strip_suffix('>'))
412            .unwrap_or(url)
413            .to_string();
414
415        if !url.is_empty() && seen.insert(url.clone()) {
416            let filename = if !text.is_empty() && !looks_like_url(&text) {
417                text
418            } else {
419                filename_from_url(&url)
420            };
421            out.push(MarkdownAttachment {
422                filename,
423                url,
424                is_image,
425            });
426        }
427
428        i = url_end + 1;
429    }
430
431    // Also parse HTML <img> tags — GitHub's Web UI inserts attachments
432    // as `<img src="..." alt="..." />` rather than markdown `![]()`.
433    parse_html_img_tags(markdown, &mut out, &mut seen);
434
435    out
436}
437
438/// Extract `src` URLs from HTML `<img>` tags.
439fn parse_html_img_tags(
440    html: &str,
441    out: &mut Vec<MarkdownAttachment>,
442    seen: &mut std::collections::HashSet<String>,
443) {
444    let lower = html.to_ascii_lowercase();
445    let mut search_from = 0;
446    while let Some(tag_start) = lower[search_from..].find("<img ") {
447        let abs_start = search_from + tag_start;
448        let Some(tag_end_rel) = html[abs_start..].find('>') else {
449            break;
450        };
451        let tag = &html[abs_start..abs_start + tag_end_rel + 1];
452
453        // Extract src="..."
454        let url = extract_html_attr(tag, "src").unwrap_or_default();
455        let alt = extract_html_attr(tag, "alt").unwrap_or_default();
456
457        if !url.is_empty() && seen.insert(url.clone()) {
458            let filename = if !alt.is_empty() && alt != "Image" && !looks_like_url(&alt) {
459                alt
460            } else {
461                filename_from_url(&url)
462            };
463            out.push(MarkdownAttachment {
464                filename,
465                url,
466                is_image: true,
467            });
468        }
469
470        search_from = abs_start + tag_end_rel + 1;
471    }
472}
473
474/// Extract the value of an HTML attribute from a tag string.
475fn extract_html_attr(tag: &str, attr_name: &str) -> Option<String> {
476    let lower = tag.to_ascii_lowercase();
477    let pattern = format!("{attr_name}=\"");
478    let start = lower.find(&pattern)? + pattern.len();
479    let rest = &tag[start..];
480    let end = rest.find('"')?;
481    Some(rest[..end].to_string())
482}
483
484/// A single attachment reference found in a markdown document.
485#[derive(Debug, Clone, PartialEq, Eq)]
486pub struct MarkdownAttachment {
487    /// Best-effort filename (from alt text or URL path).
488    pub filename: String,
489    /// Absolute or relative URL as written in the markdown.
490    pub url: String,
491    /// `true` if the reference was an image (`![]()`), `false` for a link.
492    pub is_image: bool,
493}
494
495/// Find the index of the matching `close` byte for an open character, with
496/// simple bracket/parenthesis nesting support. Returns `None` if unmatched.
497fn find_matching(bytes: &[u8], open: u8, close: u8) -> Option<usize> {
498    let mut depth: usize = 1;
499    let mut i = 0;
500    while i < bytes.len() {
501        let c = bytes[i];
502        if c == b'\\' && i + 1 < bytes.len() {
503            i += 2;
504            continue;
505        }
506        if c == open {
507            depth += 1;
508        } else if c == close {
509            depth -= 1;
510            if depth == 0 {
511                return Some(i);
512            }
513        }
514        i += 1;
515    }
516    None
517}
518
519/// Cheap heuristic — is a string "a URL" (we use this to decide whether the
520/// link text is informative enough to be used as a filename).
521fn looks_like_url(s: &str) -> bool {
522    s.starts_with("http://") || s.starts_with("https://") || s.starts_with("www.")
523}
524
525/// Derive a filename from the final path segment of a URL. Query strings
526/// and fragments are stripped. Returns `"attachment"` if nothing sensible
527/// can be extracted (e.g. the URL has no path beyond the host).
528pub fn filename_from_url(url: &str) -> String {
529    let no_query = url.split_once('?').map(|(p, _)| p).unwrap_or(url);
530    let no_frag = no_query.split_once('#').map(|(p, _)| p).unwrap_or(no_query);
531
532    // Strip scheme + host so that `https://x/` does not incorrectly surface
533    // the host `x` as a filename. We only look at the path portion.
534    let path = match no_frag.split_once("://") {
535        Some((_scheme, rest)) => rest.split_once('/').map(|(_host, p)| p).unwrap_or(""),
536        None => no_frag,
537    };
538
539    let last = path
540        .rsplit('/')
541        .find(|segment| !segment.is_empty())
542        .unwrap_or("");
543    if last.is_empty() {
544        "attachment".to_string()
545    } else {
546        last.to_string()
547    }
548}
549
550#[cfg(test)]
551mod tests {
552    use super::*;
553
554    #[test]
555    fn asset_context_slug_formats() {
556        let issue = AssetContext::Issue {
557            key: "DEV-123".into(),
558        };
559        assert_eq!(issue.slug(), "issue:DEV-123");
560
561        let mr = AssetContext::MergeRequest { mr_id: "42".into() };
562        assert_eq!(mr.slug(), "mr:42");
563
564        let mr_note = AssetContext::MrComment {
565            mr_id: "42".into(),
566            note_id: "7".into(),
567        };
568        assert_eq!(mr_note.slug(), "mr:42:note:7");
569
570        let issue_comment = AssetContext::IssueComment {
571            key: "DEV-1".into(),
572            comment_id: "99".into(),
573        };
574        assert_eq!(issue_comment.slug(), "issue:DEV-1:comment:99");
575
576        let chat = AssetContext::Chat {
577            chat_id: "C0123".into(),
578            message_id: "m5".into(),
579        };
580        assert_eq!(chat.slug(), "chat:C0123:msg:m5");
581
582        let kb = AssetContext::KbPage {
583            page_id: "p7".into(),
584        };
585        assert_eq!(kb.slug(), "kb:p7");
586    }
587
588    #[test]
589    fn asset_context_kind_maps_correctly() {
590        assert_eq!(
591            AssetContext::Issue { key: "x".into() }.kind(),
592            AssetContextKind::Issue,
593        );
594        assert_eq!(
595            AssetContext::MergeRequest { mr_id: "1".into() }.kind(),
596            AssetContextKind::MergeRequest,
597        );
598    }
599
600    #[test]
601    fn capabilities_full_and_read_only() {
602        let full = ContextCapabilities::full();
603        assert!(full.upload && full.download && full.delete && full.list);
604
605        let ro = ContextCapabilities::read_only();
606        assert!(!ro.upload && ro.download && !ro.delete && ro.list);
607    }
608
609    #[test]
610    fn asset_capabilities_for_kind() {
611        let caps = AssetCapabilities {
612            issue: ContextCapabilities::full(),
613            merge_request: ContextCapabilities::read_only(),
614            ..Default::default()
615        };
616
617        assert!(caps.for_kind(AssetContextKind::Issue).upload);
618        assert!(!caps.for_kind(AssetContextKind::MergeRequest).upload);
619        assert!(caps.for_kind(AssetContextKind::MergeRequest).download);
620        // Out-of-scope kinds fall back to empty caps.
621        assert!(!caps.for_kind(AssetContextKind::Chat).download);
622    }
623
624    #[test]
625    fn asset_input_builder() {
626        let input = AssetInput::new("a.png", vec![1, 2, 3]).with_mime_type("image/png");
627        assert_eq!(input.filename, "a.png");
628        assert_eq!(input.data, vec![1, 2, 3]);
629        assert_eq!(input.mime_type.as_deref(), Some("image/png"));
630    }
631
632    #[test]
633    fn asset_input_serde_roundtrip() {
634        let input = AssetInput::new("x.bin", vec![0, 1, 2]).with_mime_type("application/octet");
635        let json = serde_json::to_string(&input).unwrap();
636        let back: AssetInput = serde_json::from_str(&json).unwrap();
637        assert_eq!(back.filename, "x.bin");
638        assert_eq!(back.data, vec![0, 1, 2]);
639        assert_eq!(back.mime_type.as_deref(), Some("application/octet"));
640
641        // mime_type omitted when None — the shape stays small on the wire.
642        let without_mime = AssetInput::new("y.txt", vec![]);
643        let json = serde_json::to_string(&without_mime).unwrap();
644        assert!(!json.contains("mime_type"), "unexpected field: {json}");
645    }
646
647    #[test]
648    fn asset_meta_serde_roundtrip() {
649        let mut meta = AssetMeta {
650            id: "a1".into(),
651            filename: "screen.png".into(),
652            mime_type: Some("image/png".into()),
653            size: Some(1234),
654            url: Some("https://x/y".into()),
655            created_at: Some("2026-04-11T00:00:00Z".into()),
656            author: Some("alice".into()),
657            cached: true,
658            local_path: Some("/tmp/cache/a1.png".into()),
659            checksum_sha256: Some("deadbeef".into()),
660            analysis: None,
661        };
662        let json = serde_json::to_string(&meta).unwrap();
663        let back: AssetMeta = serde_json::from_str(&json).unwrap();
664        assert_eq!(meta, back);
665
666        // With analysis attached.
667        meta.analysis = Some(AssetAnalysis {
668            summary: "1 error".into(),
669            content_kind: ContentKind::Text,
670            extractable_text: Some("ERROR line".into()),
671            key_findings: vec!["panic".into()],
672            metadata: HashMap::new(),
673            semantic: None,
674        });
675        let json = serde_json::to_string(&meta).unwrap();
676        let back: AssetMeta = serde_json::from_str(&json).unwrap();
677        assert_eq!(meta, back);
678    }
679
680    #[test]
681    fn asset_meta_skips_empty_optionals_when_serialized() {
682        let meta = AssetMeta {
683            id: "a1".into(),
684            filename: "x".into(),
685            ..Default::default()
686        };
687        let json = serde_json::to_string(&meta).unwrap();
688        // `cached` defaults to false and we don't add skip_serializing_if
689        // for it, but optional fields should not appear.
690        assert!(!json.contains("mime_type"));
691        assert!(!json.contains("analysis"));
692        assert!(!json.contains("author"));
693    }
694
695    #[test]
696    fn asset_capabilities_serde_roundtrip() {
697        let caps = AssetCapabilities {
698            issue: ContextCapabilities::full(),
699            issue_comment: ContextCapabilities::read_only(),
700            merge_request: ContextCapabilities {
701                upload: true,
702                download: true,
703                delete: false,
704                list: true,
705                max_file_size: Some(10_485_760),
706                allowed_types: vec!["image/*".into()],
707            },
708            mr_comment: ContextCapabilities::default(),
709        };
710        let json = serde_json::to_string(&caps).unwrap();
711        let back: AssetCapabilities = serde_json::from_str(&json).unwrap();
712        assert_eq!(caps, back);
713    }
714
715    #[test]
716    fn asset_analysis_with_semantic_serde_roundtrip() {
717        let mut metadata = HashMap::new();
718        metadata.insert("line_count".into(), serde_json::json!(5432));
719        let analysis = AssetAnalysis {
720            summary: "error log with 12 ERRORs".into(),
721            content_kind: ContentKind::Text,
722            extractable_text: Some("ERROR at line 147".into()),
723            key_findings: vec!["12 ERROR lines".into(), "race condition suspected".into()],
724            metadata,
725            semantic: Some(SemanticAnalysis {
726                summary: "Redis connection drops under load.".into(),
727                findings: vec!["timeout after 30s".into()],
728                prompt_used: "find db errors".into(),
729                model: "claude-sonnet-4".into(),
730                cached: false,
731            }),
732        };
733        let json = serde_json::to_string(&analysis).unwrap();
734        let back: AssetAnalysis = serde_json::from_str(&json).unwrap();
735        assert_eq!(analysis, back);
736    }
737
738    #[test]
739    fn content_kind_serde() {
740        for kind in [
741            ContentKind::Text,
742            ContentKind::Image,
743            ContentKind::Video,
744            ContentKind::Document,
745            ContentKind::Data,
746            ContentKind::Binary,
747        ] {
748            let json = serde_json::to_string(&kind).unwrap();
749            let back: ContentKind = serde_json::from_str(&json).unwrap();
750            assert_eq!(kind, back);
751        }
752    }
753
754    #[test]
755    fn asset_context_kind_serde() {
756        for kind in [
757            AssetContextKind::Issue,
758            AssetContextKind::IssueComment,
759            AssetContextKind::MergeRequest,
760            AssetContextKind::MrComment,
761            AssetContextKind::Chat,
762            AssetContextKind::KbPage,
763        ] {
764            let json = serde_json::to_string(&kind).unwrap();
765            let back: AssetContextKind = serde_json::from_str(&json).unwrap();
766            assert_eq!(kind, back);
767        }
768    }
769
770    #[test]
771    fn asset_context_all_variants_roundtrip() {
772        let variants = vec![
773            AssetContext::Issue {
774                key: "DEV-1".into(),
775            },
776            AssetContext::IssueComment {
777                key: "DEV-1".into(),
778                comment_id: "c1".into(),
779            },
780            AssetContext::MergeRequest { mr_id: "42".into() },
781            AssetContext::MrComment {
782                mr_id: "42".into(),
783                note_id: "n1".into(),
784            },
785            AssetContext::Chat {
786                chat_id: "C1".into(),
787                message_id: "m1".into(),
788            },
789            AssetContext::KbPage {
790                page_id: "p1".into(),
791            },
792        ];
793        for ctx in variants {
794            let json = serde_json::to_string(&ctx).unwrap();
795            let back: AssetContext = serde_json::from_str(&json).unwrap();
796            assert_eq!(ctx, back);
797
798            // Also exercise `kind()` / `slug()` for every variant so the
799            // match arms stay covered.
800            assert!(!ctx.slug().is_empty());
801            let _ = ctx.kind();
802        }
803    }
804
805    #[test]
806    fn asset_context_serde_roundtrip() {
807        let ctx = AssetContext::IssueComment {
808            key: "DEV-5".into(),
809            comment_id: "42".into(),
810        };
811        let json = serde_json::to_string(&ctx).unwrap();
812        let back: AssetContext = serde_json::from_str(&json).unwrap();
813        assert_eq!(ctx, back);
814    }
815
816    #[test]
817    fn content_kind_default_is_binary() {
818        assert_eq!(ContentKind::default(), ContentKind::Binary);
819    }
820
821    #[test]
822    fn filename_from_url_strips_query_and_fragment() {
823        assert_eq!(
824            filename_from_url("https://x/y/z/report.log?token=abc#top"),
825            "report.log"
826        );
827        assert_eq!(filename_from_url("https://x/"), "attachment");
828        assert_eq!(filename_from_url(""), "attachment");
829    }
830
831    #[test]
832    fn markdown_parses_image_and_link_syntax() {
833        let md = "Hello ![screenshot](https://cdn.example.com/a/b/screen.png) and \
834                  a [log](https://cdn.example.com/run-42.log).";
835        let attachments = parse_markdown_attachments(md);
836        assert_eq!(attachments.len(), 2);
837        assert_eq!(attachments[0].filename, "screenshot");
838        assert_eq!(attachments[0].url, "https://cdn.example.com/a/b/screen.png");
839        assert!(attachments[0].is_image);
840        assert_eq!(attachments[1].filename, "log");
841        assert!(!attachments[1].is_image);
842    }
843
844    #[test]
845    fn markdown_deduplicates_by_url() {
846        let md = "![a](https://x/1.png) and again ![b](https://x/1.png)";
847        let attachments = parse_markdown_attachments(md);
848        assert_eq!(attachments.len(), 1);
849        // The first reference wins.
850        assert_eq!(attachments[0].filename, "a");
851    }
852
853    #[test]
854    fn markdown_handles_titles_and_spaces() {
855        let md = "[spec](https://x/spec.pdf \"Specification\")";
856        let attachments = parse_markdown_attachments(md);
857        assert_eq!(attachments.len(), 1);
858        assert_eq!(attachments[0].url, "https://x/spec.pdf");
859        assert_eq!(attachments[0].filename, "spec");
860    }
861
862    #[test]
863    fn markdown_ignores_unmatched_brackets() {
864        let md = "Unclosed [foo( and then a good ![g](https://x/g.png)";
865        let attachments = parse_markdown_attachments(md);
866        assert_eq!(attachments.len(), 1);
867        assert_eq!(attachments[0].url, "https://x/g.png");
868    }
869
870    #[test]
871    fn markdown_falls_back_to_url_when_text_is_url() {
872        let md = "[https://x/a.png](https://x/a.png)";
873        let attachments = parse_markdown_attachments(md);
874        assert_eq!(attachments.len(), 1);
875        assert_eq!(attachments[0].filename, "a.png");
876    }
877
878    #[test]
879    fn markdown_empty_and_plain_text() {
880        assert!(parse_markdown_attachments("").is_empty());
881        assert!(parse_markdown_attachments("no links here at all").is_empty());
882    }
883
884    #[test]
885    fn markdown_strips_angle_bracket_urls() {
886        let md = "[spec](<https://example.com/spec.pdf>)";
887        let attachments = parse_markdown_attachments(md);
888        assert_eq!(attachments.len(), 1);
889        assert_eq!(attachments[0].url, "https://example.com/spec.pdf");
890        assert_eq!(attachments[0].filename, "spec");
891
892        // Image variant
893        let md = "![shot](<https://cdn.example.com/img.png>)";
894        let attachments = parse_markdown_attachments(md);
895        assert_eq!(attachments.len(), 1);
896        assert_eq!(attachments[0].url, "https://cdn.example.com/img.png");
897    }
898}
devboy_core/asset.rs

devboy_core/
asset.rs