Skip to main content

hirn_engine/
resource_presentation.rs

1use std::collections::{BTreeMap, BTreeSet};
2use std::time::Instant;
3
4use serde::{Deserialize, Serialize};
5
6use hirn_core::error::HirnResult;
7use hirn_core::id::MemoryId;
8use hirn_core::types::AgentId;
9use hirn_core::{DerivedArtifactKind, HydrationMode, ModalityProfile, ResourceId};
10
11use crate::db::HirnDB;
12use crate::ql::results::ScoredMemory;
13use crate::recall::{RecallResult, ResourceEvidenceSummary};
14
15const PREFERRED_PREVIEW_ARTIFACTS: [DerivedArtifactKind; 7] = [
16    DerivedArtifactKind::Preview,
17    DerivedArtifactKind::Transcript,
18    DerivedArtifactKind::Caption,
19    DerivedArtifactKind::OcrText,
20    DerivedArtifactKind::SyntaxSummary,
21    DerivedArtifactKind::SchemaSummary,
22    DerivedArtifactKind::Thumbnail,
23];
24
25const RESOURCE_PREVIEW_RERANK_WEIGHT: f32 = 0.08;
26const MAX_ATTRIBUTION_TERMS: usize = 6;
27
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub(crate) struct ResourcePreviewPackage {
30    pub(crate) resource_id: ResourceId,
31    pub(crate) role: hirn_core::EvidenceRole,
32    pub(crate) display_name: Option<String>,
33    pub(crate) modality: Option<ModalityProfile>,
34    pub(crate) artifact_kind: DerivedArtifactKind,
35    pub(crate) artifact_modality: ModalityProfile,
36    pub(crate) text_content: String,
37    pub(crate) truncated: bool,
38}
39
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct ResourceScoreAttribution {
42    pub resource_id: ResourceId,
43    pub role: hirn_core::EvidenceRole,
44    pub display_name: Option<String>,
45    pub modality: Option<ModalityProfile>,
46    pub artifact_kind: DerivedArtifactKind,
47    pub artifact_modality: ModalityProfile,
48    pub matched_terms: Vec<String>,
49    pub match_score: f32,
50    pub score_boost: f32,
51}
52
53#[derive(Debug, Clone)]
54struct CachedResourcePreview {
55    artifact_kind: DerivedArtifactKind,
56    artifact_modality: ModalityProfile,
57    text_content: String,
58    truncated: bool,
59}
60
61#[derive(Debug, Default)]
62pub(crate) struct PreviewPackageCache {
63    cached_previews: BTreeMap<ResourceId, Option<CachedResourcePreview>>,
64}
65
66#[derive(Debug, Clone, Copy)]
67pub(crate) enum PreviewPackageSurface {
68    Recall,
69    Think,
70}
71
72impl PreviewPackageSurface {
73    const fn as_label(self) -> &'static str {
74        match self {
75            Self::Recall => "recall",
76            Self::Think => "think",
77        }
78    }
79}
80
81pub(crate) async fn hydrate_resource_preview_packages_for_scored_records(
82    db: &HirnDB,
83    actor_id: &AgentId,
84    scored: &[ScoredMemory],
85    max_resource_previews_per_record: usize,
86    max_resource_preview_chars: usize,
87) -> HirnResult<BTreeMap<MemoryId, Vec<ResourcePreviewPackage>>> {
88    if max_resource_previews_per_record == 0 || max_resource_preview_chars == 0 {
89        return Ok(BTreeMap::new());
90    }
91
92    let mut preview_cache = PreviewPackageCache::default();
93    let mut packaged = BTreeMap::new();
94
95    for scored_record in scored {
96        let preview_packages = package_resource_preview_packages_for_evidence(
97            db,
98            actor_id,
99            &scored_record.resource_evidence,
100            &scored_record.resource_preview_packages,
101            max_resource_previews_per_record,
102            max_resource_preview_chars,
103            &mut preview_cache,
104            PreviewPackageSurface::Recall,
105        )
106        .await;
107        if !preview_packages.is_empty() {
108            packaged.insert(scored_record.record.id(), preview_packages);
109        }
110    }
111
112    Ok(packaged)
113}
114
115pub(crate) async fn apply_resource_preview_rerank(
116    db: &HirnDB,
117    actor_id: &AgentId,
118    query_text: &str,
119    results: &mut [RecallResult],
120    max_resource_previews_per_result: usize,
121    max_resource_preview_chars: usize,
122) -> HirnResult<()> {
123    if results.is_empty()
124        || max_resource_previews_per_result == 0
125        || max_resource_preview_chars == 0
126    {
127        return Ok(());
128    }
129
130    let query_terms = normalized_terms(query_text);
131    if query_terms.is_empty() {
132        return Ok(());
133    }
134
135    let mut preview_cache = PreviewPackageCache::default();
136    for result in results.iter_mut() {
137        let preview_packages = package_resource_preview_packages(
138            db,
139            actor_id,
140            &result.resource_evidence,
141            max_resource_previews_per_result,
142            max_resource_preview_chars,
143            &mut preview_cache,
144        )
145        .await;
146
147        result
148            .resource_preview_packages
149            .clone_from(&preview_packages);
150
151        if let Some(attribution) = best_resource_score_attribution(&query_terms, &preview_packages)
152        {
153            result.composite_score =
154                (result.composite_score + attribution.score_boost).clamp(0.0, 1.0);
155            result.resource_score_attribution = vec![attribution];
156        } else {
157            result.resource_score_attribution.clear();
158        }
159    }
160
161    results.sort_by(|left, right| {
162        right
163            .composite_score
164            .total_cmp(&left.composite_score)
165            .then_with(|| right.similarity.total_cmp(&left.similarity))
166    });
167
168    Ok(())
169}
170
171pub(crate) async fn apply_resource_preview_rerank_to_scored_records(
172    db: &HirnDB,
173    actor_id: &AgentId,
174    query_text: &str,
175    results: &mut [ScoredMemory],
176    max_resource_previews_per_result: usize,
177    max_resource_preview_chars: usize,
178) -> HirnResult<()> {
179    if results.is_empty()
180        || max_resource_previews_per_result == 0
181        || max_resource_preview_chars == 0
182    {
183        return Ok(());
184    }
185
186    let query_terms = normalized_terms(query_text);
187    if query_terms.is_empty() {
188        return Ok(());
189    }
190
191    let mut preview_cache = PreviewPackageCache::default();
192    for result in results.iter_mut() {
193        let preview_packages = package_resource_preview_packages_for_evidence(
194            db,
195            actor_id,
196            &result.resource_evidence,
197            &result.resource_preview_packages,
198            max_resource_previews_per_result,
199            max_resource_preview_chars,
200            &mut preview_cache,
201            PreviewPackageSurface::Recall,
202        )
203        .await;
204
205        result
206            .resource_preview_packages
207            .clone_from(&preview_packages);
208
209        if let Some(attribution) = best_resource_score_attribution(&query_terms, &preview_packages)
210        {
211            result.score = (result.score + attribution.score_boost).clamp(0.0, 1.0);
212            result.resource_score_attribution = vec![attribution];
213        } else {
214            result.resource_score_attribution.clear();
215        }
216    }
217
218    results.sort_by(|left, right| {
219        right.score.total_cmp(&left.score).then_with(|| {
220            right
221                .score_breakdown
222                .similarity
223                .total_cmp(&left.score_breakdown.similarity)
224        })
225    });
226
227    Ok(())
228}
229
230pub(crate) async fn package_resource_preview_packages_for_evidence(
231    db: &HirnDB,
232    actor_id: &AgentId,
233    resource_evidence: &[ResourceEvidenceSummary],
234    seeded_packages: &[ResourcePreviewPackage],
235    max_resource_previews: usize,
236    max_resource_preview_chars: usize,
237    preview_cache: &mut PreviewPackageCache,
238    surface: PreviewPackageSurface,
239) -> Vec<ResourcePreviewPackage> {
240    let started = Instant::now();
241
242    if max_resource_previews == 0 || max_resource_preview_chars == 0 {
243        return Vec::new();
244    }
245
246    if let Some(reused) = reuse_seeded_preview_packages(
247        seeded_packages,
248        max_resource_previews,
249        max_resource_preview_chars,
250    ) {
251        if !reused.is_empty() {
252            record_preview_package_resolution(surface, "seeded_reuse", started.elapsed());
253        }
254        return reused;
255    }
256
257    let has_previewable_evidence = resource_evidence
258        .iter()
259        .any(|summary| summary.has_preview && summary.can_hydrate_preview);
260
261    let packaged = package_resource_preview_packages(
262        db,
263        actor_id,
264        resource_evidence,
265        max_resource_previews,
266        max_resource_preview_chars,
267        preview_cache,
268    )
269    .await;
270
271    if has_previewable_evidence {
272        record_preview_package_resolution(surface, "hydrated_refetch", started.elapsed());
273    }
274
275    packaged
276}
277
278pub(crate) fn resource_preview_packages_to_json(
279    packages: &[ResourcePreviewPackage],
280) -> serde_json::Value {
281    serde_json::Value::Array(
282        packages
283            .iter()
284            .map(|package| {
285                serde_json::json!({
286                    "resource_id": package.resource_id.to_string(),
287                    "role": package.role.as_str(),
288                    "display_name": package.display_name,
289                    "modality": package.modality.map(|modality| modality.as_str()),
290                    "artifact_kind": package.artifact_kind.as_str(),
291                    "artifact_modality": package.artifact_modality.as_str(),
292                    "text_content": package.text_content,
293                    "truncated": package.truncated,
294                })
295            })
296            .collect(),
297    )
298}
299
300fn record_preview_package_resolution(
301    surface: PreviewPackageSurface,
302    path: &'static str,
303    elapsed: std::time::Duration,
304) {
305    metrics::counter!(
306        crate::metrics::PREVIEW_PACKAGE_PATH_TOTAL,
307        "surface" => surface.as_label(),
308        "path" => path
309    )
310    .increment(1);
311    metrics::histogram!(
312        crate::metrics::PREVIEW_PACKAGE_RESOLUTION_SECONDS,
313        "surface" => surface.as_label(),
314        "path" => path
315    )
316    .record(elapsed.as_secs_f64());
317}
318
319pub(crate) fn resource_score_attribution_to_json(
320    attributions: &[ResourceScoreAttribution],
321) -> serde_json::Value {
322    serde_json::Value::Array(
323        attributions
324            .iter()
325            .map(|attribution| {
326                serde_json::json!({
327                    "resource_id": attribution.resource_id.to_string(),
328                    "role": attribution.role.as_str(),
329                    "display_name": attribution.display_name,
330                    "modality": attribution.modality.map(|modality| modality.as_str()),
331                    "artifact_kind": attribution.artifact_kind.as_str(),
332                    "artifact_modality": attribution.artifact_modality.as_str(),
333                    "matched_terms": attribution.matched_terms,
334                    "match_score": attribution.match_score,
335                    "score_boost": attribution.score_boost,
336                })
337            })
338            .collect(),
339    )
340}
341
342async fn package_resource_preview_packages(
343    db: &HirnDB,
344    actor_id: &AgentId,
345    resource_evidence: &[ResourceEvidenceSummary],
346    max_resource_previews: usize,
347    max_resource_preview_chars: usize,
348    preview_cache: &mut PreviewPackageCache,
349) -> Vec<ResourcePreviewPackage> {
350    let mut packaged = Vec::new();
351    for summary in resource_evidence
352        .iter()
353        .filter(|summary| summary.has_preview && summary.can_hydrate_preview)
354        .take(max_resource_previews)
355    {
356        let cached = if let Some(cached) = preview_cache.cached_previews.get(&summary.resource_id) {
357            cached.clone()
358        } else {
359            let preview = match db
360                .fetch_resource(actor_id, summary.resource_id, HydrationMode::Preview)
361                .await
362            {
363                Ok(Some(resource)) => {
364                    select_cached_resource_preview(&resource, max_resource_preview_chars)
365                }
366                Ok(None) | Err(_) => None,
367            };
368            preview_cache
369                .cached_previews
370                .insert(summary.resource_id, preview.clone());
371            preview
372        };
373
374        if let Some(cached) = cached {
375            packaged.push(ResourcePreviewPackage {
376                resource_id: summary.resource_id,
377                role: summary.role,
378                display_name: summary.display_name.clone(),
379                modality: summary.modality,
380                artifact_kind: cached.artifact_kind,
381                artifact_modality: cached.artifact_modality,
382                text_content: cached.text_content,
383                truncated: cached.truncated,
384            });
385        }
386    }
387
388    packaged
389}
390
391fn select_cached_resource_preview(
392    hydrated: &hirn_storage::HydratedResource,
393    max_chars: usize,
394) -> Option<CachedResourcePreview> {
395    let artifact = PREFERRED_PREVIEW_ARTIFACTS
396        .iter()
397        .find_map(|kind| {
398            hydrated.artifacts.iter().find(|artifact| {
399                artifact.kind == *kind
400                    && artifact
401                        .text_content
402                        .as_deref()
403                        .is_some_and(|text| !text.trim().is_empty())
404            })
405        })
406        .or_else(|| {
407            hydrated.artifacts.iter().find(|artifact| {
408                artifact.kind.is_previewable()
409                    && artifact
410                        .text_content
411                        .as_deref()
412                        .is_some_and(|text| !text.trim().is_empty())
413            })
414        })?;
415
416    let text = artifact.text_content.as_deref()?.trim();
417    if text.is_empty() {
418        return None;
419    }
420
421    let truncated = text.chars().count() > max_chars;
422    let text_content = if truncated {
423        hirn_core::text_util::truncate_at_word_boundary(text, max_chars)
424    } else {
425        text.to_string()
426    };
427
428    Some(CachedResourcePreview {
429        artifact_kind: artifact.kind,
430        artifact_modality: artifact.modality,
431        text_content,
432        truncated,
433    })
434}
435
436fn best_resource_score_attribution(
437    query_terms: &BTreeSet<String>,
438    preview_packages: &[ResourcePreviewPackage],
439) -> Option<ResourceScoreAttribution> {
440    preview_packages
441        .iter()
442        .filter_map(|package| build_resource_score_attribution(query_terms, package))
443        .max_by(|left, right| {
444            left.match_score
445                .total_cmp(&right.match_score)
446                .then_with(|| left.score_boost.total_cmp(&right.score_boost))
447        })
448}
449
450fn build_resource_score_attribution(
451    query_terms: &BTreeSet<String>,
452    package: &ResourcePreviewPackage,
453) -> Option<ResourceScoreAttribution> {
454    let preview_terms = normalized_terms(&package.text_content);
455    if preview_terms.is_empty() {
456        return None;
457    }
458
459    let matched_terms: Vec<String> = query_terms
460        .intersection(&preview_terms)
461        .take(MAX_ATTRIBUTION_TERMS)
462        .cloned()
463        .collect();
464    if matched_terms.is_empty() {
465        return None;
466    }
467
468    let coverage = matched_terms.len() as f32 / query_terms.len() as f32;
469    let density = matched_terms.len() as f32 / preview_terms.len() as f32;
470    let match_score = (coverage * 0.75 + density * 0.25).clamp(0.0, 1.0);
471    let score_boost = (match_score * RESOURCE_PREVIEW_RERANK_WEIGHT).clamp(0.0, 1.0);
472
473    Some(ResourceScoreAttribution {
474        resource_id: package.resource_id,
475        role: package.role,
476        display_name: package.display_name.clone(),
477        modality: package.modality,
478        artifact_kind: package.artifact_kind,
479        artifact_modality: package.artifact_modality,
480        matched_terms,
481        match_score,
482        score_boost,
483    })
484}
485
486pub(crate) fn reuse_seeded_preview_packages(
487    packages: &[ResourcePreviewPackage],
488    max_resource_previews: usize,
489    max_resource_preview_chars: usize,
490) -> Option<Vec<ResourcePreviewPackage>> {
491    if max_resource_previews == 0 || max_resource_preview_chars == 0 {
492        return Some(Vec::new());
493    }
494    if packages.len() < max_resource_previews {
495        return None;
496    }
497
498    let mut reused = Vec::with_capacity(max_resource_previews);
499    for package in packages.iter().take(max_resource_previews) {
500        let visible_text = if package.truncated {
501            package
502                .text_content
503                .strip_suffix("...")
504                .unwrap_or(&package.text_content)
505        } else {
506            &package.text_content
507        };
508        let visible_chars = visible_text.chars().count();
509
510        if package.truncated && max_resource_preview_chars > visible_chars {
511            return None;
512        }
513
514        let mut reused_package = package.clone();
515        if visible_chars > max_resource_preview_chars {
516            reused_package.text_content = hirn_core::text_util::truncate_at_word_boundary(
517                visible_text,
518                max_resource_preview_chars,
519            );
520            reused_package.truncated = true;
521        } else if package.truncated {
522            reused_package
523                .text_content
524                .clone_from(&package.text_content);
525            reused_package.truncated = true;
526        } else {
527            reused_package.text_content = visible_text.to_string();
528            reused_package.truncated = false;
529        }
530        reused.push(reused_package);
531    }
532
533    Some(reused)
534}
535
536fn normalized_terms(text: &str) -> BTreeSet<String> {
537    let mut terms = BTreeSet::new();
538    let mut current = String::new();
539
540    for ch in text.chars() {
541        if ch.is_alphanumeric() {
542            current.extend(ch.to_lowercase());
543        } else {
544            push_normalized_term(&mut terms, &mut current);
545        }
546    }
547    push_normalized_term(&mut terms, &mut current);
548
549    terms
550}
551
552fn push_normalized_term(terms: &mut BTreeSet<String>, current: &mut String) {
553    if current.chars().count() >= 3 {
554        terms.insert(std::mem::take(current));
555    } else {
556        current.clear();
557    }
558}
559
560#[cfg(test)]
561mod tests {
562    use super::*;
563    use hirn_core::types::Namespace;
564
565    #[test]
566    fn resource_score_attribution_uses_query_overlap() {
567        let package = ResourcePreviewPackage {
568            resource_id: ResourceId::new(),
569            role: hirn_core::EvidenceRole::Source,
570            display_name: Some("incident.png".to_string()),
571            modality: Some(ModalityProfile::Image),
572            artifact_kind: DerivedArtifactKind::Preview,
573            artifact_modality: ModalityProfile::Text,
574            text_content: "waf latency spike timeline with annotated edge nodes".to_string(),
575            truncated: false,
576        };
577
578        let attribution = build_resource_score_attribution(
579            &normalized_terms("investigate waf latency spike"),
580            &package,
581        )
582        .expect("preview text should overlap the query");
583
584        assert!(attribution.match_score > 0.0);
585        assert!(attribution.score_boost > 0.0);
586        assert!(
587            attribution
588                .matched_terms
589                .iter()
590                .any(|term| term == "latency")
591        );
592    }
593
594    #[test]
595    fn seeded_preview_packages_can_be_reused_for_smaller_budget() {
596        let packages = vec![ResourcePreviewPackage {
597            resource_id: ResourceId::new(),
598            role: hirn_core::EvidenceRole::Attachment,
599            display_name: Some("preview".to_string()),
600            modality: Some(ModalityProfile::Image),
601            artifact_kind: DerivedArtifactKind::Preview,
602            artifact_modality: ModalityProfile::Text,
603            text_content: "alpha beta gamma delta epsilon zeta...".to_string(),
604            truncated: true,
605        }];
606
607        let reused = reuse_seeded_preview_packages(&packages, 1, 15).unwrap();
608
609        assert_eq!(reused.len(), 1);
610        assert!(reused[0].truncated);
611        assert!(reused[0].text_content.ends_with("..."));
612        assert!(reused[0].text_content.contains("alpha beta"));
613    }
614
615    #[test]
616    fn seeded_preview_packages_refetch_when_budget_needs_more_text() {
617        let packages = vec![ResourcePreviewPackage {
618            resource_id: ResourceId::new(),
619            role: hirn_core::EvidenceRole::Attachment,
620            display_name: Some("preview".to_string()),
621            modality: Some(ModalityProfile::Image),
622            artifact_kind: DerivedArtifactKind::Preview,
623            artifact_modality: ModalityProfile::Text,
624            text_content: "alpha beta gamma delta epsilon zeta...".to_string(),
625            truncated: true,
626        }];
627
628        assert!(reuse_seeded_preview_packages(&packages, 1, 128).is_none());
629    }
630
631    #[test]
632    fn cached_preview_prefers_caption_over_ocr_text_when_both_exist() {
633        let resource = hirn_core::ResourceObject::builder()
634            .modality(ModalityProfile::Image)
635            .location(hirn_core::ResourceLocation::Blob { blob_index: 0 })
636            .build()
637            .unwrap();
638        let hydrated = hirn_storage::HydratedResource {
639            resource,
640            artifacts: vec![
641                hirn_core::DerivedArtifact::builder()
642                    .resource_id(ResourceId::new())
643                    .kind(DerivedArtifactKind::OcrText)
644                    .modality(ModalityProfile::Text)
645                    .text_content("fallback ocr text")
646                    .namespace(Namespace::default())
647                    .build()
648                    .unwrap(),
649                hirn_core::DerivedArtifact::builder()
650                    .resource_id(ResourceId::new())
651                    .kind(DerivedArtifactKind::Caption)
652                    .modality(ModalityProfile::Text)
653                    .text_content("caption text")
654                    .namespace(Namespace::default())
655                    .build()
656                    .unwrap(),
657            ],
658            blob: None,
659        };
660
661        let preview = select_cached_resource_preview(&hydrated, 128).unwrap();
662        assert_eq!(preview.artifact_kind, DerivedArtifactKind::Caption);
663        assert_eq!(preview.text_content, "caption text");
664    }
665}