1use std::collections::{BTreeMap, BTreeSet};
2use std::time::Instant;
3
4use serde::{Deserialize, Serialize};
5
6use hirn_core::error::HirnResult;
7use hirn_core::id::MemoryId;
8use hirn_core::types::AgentId;
9use hirn_core::{DerivedArtifactKind, HydrationMode, ModalityProfile, ResourceId};
10
11use crate::db::HirnDB;
12use crate::ql::results::ScoredMemory;
13use crate::recall::{RecallResult, ResourceEvidenceSummary};
14
15const PREFERRED_PREVIEW_ARTIFACTS: [DerivedArtifactKind; 7] = [
16 DerivedArtifactKind::Preview,
17 DerivedArtifactKind::Transcript,
18 DerivedArtifactKind::Caption,
19 DerivedArtifactKind::OcrText,
20 DerivedArtifactKind::SyntaxSummary,
21 DerivedArtifactKind::SchemaSummary,
22 DerivedArtifactKind::Thumbnail,
23];
24
25const RESOURCE_PREVIEW_RERANK_WEIGHT: f32 = 0.08;
26const MAX_ATTRIBUTION_TERMS: usize = 6;
27
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub(crate) struct ResourcePreviewPackage {
30 pub(crate) resource_id: ResourceId,
31 pub(crate) role: hirn_core::EvidenceRole,
32 pub(crate) display_name: Option<String>,
33 pub(crate) modality: Option<ModalityProfile>,
34 pub(crate) artifact_kind: DerivedArtifactKind,
35 pub(crate) artifact_modality: ModalityProfile,
36 pub(crate) text_content: String,
37 pub(crate) truncated: bool,
38}
39
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct ResourceScoreAttribution {
42 pub resource_id: ResourceId,
43 pub role: hirn_core::EvidenceRole,
44 pub display_name: Option<String>,
45 pub modality: Option<ModalityProfile>,
46 pub artifact_kind: DerivedArtifactKind,
47 pub artifact_modality: ModalityProfile,
48 pub matched_terms: Vec<String>,
49 pub match_score: f32,
50 pub score_boost: f32,
51}
52
53#[derive(Debug, Clone)]
54struct CachedResourcePreview {
55 artifact_kind: DerivedArtifactKind,
56 artifact_modality: ModalityProfile,
57 text_content: String,
58 truncated: bool,
59}
60
61#[derive(Debug, Default)]
62pub(crate) struct PreviewPackageCache {
63 cached_previews: BTreeMap<ResourceId, Option<CachedResourcePreview>>,
64}
65
66#[derive(Debug, Clone, Copy)]
67pub(crate) enum PreviewPackageSurface {
68 Recall,
69 Think,
70}
71
72impl PreviewPackageSurface {
73 const fn as_label(self) -> &'static str {
74 match self {
75 Self::Recall => "recall",
76 Self::Think => "think",
77 }
78 }
79}
80
81pub(crate) async fn hydrate_resource_preview_packages_for_scored_records(
82 db: &HirnDB,
83 actor_id: &AgentId,
84 scored: &[ScoredMemory],
85 max_resource_previews_per_record: usize,
86 max_resource_preview_chars: usize,
87) -> HirnResult<BTreeMap<MemoryId, Vec<ResourcePreviewPackage>>> {
88 if max_resource_previews_per_record == 0 || max_resource_preview_chars == 0 {
89 return Ok(BTreeMap::new());
90 }
91
92 let mut preview_cache = PreviewPackageCache::default();
93 let mut packaged = BTreeMap::new();
94
95 for scored_record in scored {
96 let preview_packages = package_resource_preview_packages_for_evidence(
97 db,
98 actor_id,
99 &scored_record.resource_evidence,
100 &scored_record.resource_preview_packages,
101 max_resource_previews_per_record,
102 max_resource_preview_chars,
103 &mut preview_cache,
104 PreviewPackageSurface::Recall,
105 )
106 .await;
107 if !preview_packages.is_empty() {
108 packaged.insert(scored_record.record.id(), preview_packages);
109 }
110 }
111
112 Ok(packaged)
113}
114
115pub(crate) async fn apply_resource_preview_rerank(
116 db: &HirnDB,
117 actor_id: &AgentId,
118 query_text: &str,
119 results: &mut [RecallResult],
120 max_resource_previews_per_result: usize,
121 max_resource_preview_chars: usize,
122) -> HirnResult<()> {
123 if results.is_empty()
124 || max_resource_previews_per_result == 0
125 || max_resource_preview_chars == 0
126 {
127 return Ok(());
128 }
129
130 let query_terms = normalized_terms(query_text);
131 if query_terms.is_empty() {
132 return Ok(());
133 }
134
135 let mut preview_cache = PreviewPackageCache::default();
136 for result in results.iter_mut() {
137 let preview_packages = package_resource_preview_packages(
138 db,
139 actor_id,
140 &result.resource_evidence,
141 max_resource_previews_per_result,
142 max_resource_preview_chars,
143 &mut preview_cache,
144 )
145 .await;
146
147 result
148 .resource_preview_packages
149 .clone_from(&preview_packages);
150
151 if let Some(attribution) = best_resource_score_attribution(&query_terms, &preview_packages)
152 {
153 result.composite_score =
154 (result.composite_score + attribution.score_boost).clamp(0.0, 1.0);
155 result.resource_score_attribution = vec![attribution];
156 } else {
157 result.resource_score_attribution.clear();
158 }
159 }
160
161 results.sort_by(|left, right| {
162 right
163 .composite_score
164 .total_cmp(&left.composite_score)
165 .then_with(|| right.similarity.total_cmp(&left.similarity))
166 });
167
168 Ok(())
169}
170
171pub(crate) async fn apply_resource_preview_rerank_to_scored_records(
172 db: &HirnDB,
173 actor_id: &AgentId,
174 query_text: &str,
175 results: &mut [ScoredMemory],
176 max_resource_previews_per_result: usize,
177 max_resource_preview_chars: usize,
178) -> HirnResult<()> {
179 if results.is_empty()
180 || max_resource_previews_per_result == 0
181 || max_resource_preview_chars == 0
182 {
183 return Ok(());
184 }
185
186 let query_terms = normalized_terms(query_text);
187 if query_terms.is_empty() {
188 return Ok(());
189 }
190
191 let mut preview_cache = PreviewPackageCache::default();
192 for result in results.iter_mut() {
193 let preview_packages = package_resource_preview_packages_for_evidence(
194 db,
195 actor_id,
196 &result.resource_evidence,
197 &result.resource_preview_packages,
198 max_resource_previews_per_result,
199 max_resource_preview_chars,
200 &mut preview_cache,
201 PreviewPackageSurface::Recall,
202 )
203 .await;
204
205 result
206 .resource_preview_packages
207 .clone_from(&preview_packages);
208
209 if let Some(attribution) = best_resource_score_attribution(&query_terms, &preview_packages)
210 {
211 result.score = (result.score + attribution.score_boost).clamp(0.0, 1.0);
212 result.resource_score_attribution = vec![attribution];
213 } else {
214 result.resource_score_attribution.clear();
215 }
216 }
217
218 results.sort_by(|left, right| {
219 right.score.total_cmp(&left.score).then_with(|| {
220 right
221 .score_breakdown
222 .similarity
223 .total_cmp(&left.score_breakdown.similarity)
224 })
225 });
226
227 Ok(())
228}
229
230pub(crate) async fn package_resource_preview_packages_for_evidence(
231 db: &HirnDB,
232 actor_id: &AgentId,
233 resource_evidence: &[ResourceEvidenceSummary],
234 seeded_packages: &[ResourcePreviewPackage],
235 max_resource_previews: usize,
236 max_resource_preview_chars: usize,
237 preview_cache: &mut PreviewPackageCache,
238 surface: PreviewPackageSurface,
239) -> Vec<ResourcePreviewPackage> {
240 let started = Instant::now();
241
242 if max_resource_previews == 0 || max_resource_preview_chars == 0 {
243 return Vec::new();
244 }
245
246 if let Some(reused) = reuse_seeded_preview_packages(
247 seeded_packages,
248 max_resource_previews,
249 max_resource_preview_chars,
250 ) {
251 if !reused.is_empty() {
252 record_preview_package_resolution(surface, "seeded_reuse", started.elapsed());
253 }
254 return reused;
255 }
256
257 let has_previewable_evidence = resource_evidence
258 .iter()
259 .any(|summary| summary.has_preview && summary.can_hydrate_preview);
260
261 let packaged = package_resource_preview_packages(
262 db,
263 actor_id,
264 resource_evidence,
265 max_resource_previews,
266 max_resource_preview_chars,
267 preview_cache,
268 )
269 .await;
270
271 if has_previewable_evidence {
272 record_preview_package_resolution(surface, "hydrated_refetch", started.elapsed());
273 }
274
275 packaged
276}
277
278pub(crate) fn resource_preview_packages_to_json(
279 packages: &[ResourcePreviewPackage],
280) -> serde_json::Value {
281 serde_json::Value::Array(
282 packages
283 .iter()
284 .map(|package| {
285 serde_json::json!({
286 "resource_id": package.resource_id.to_string(),
287 "role": package.role.as_str(),
288 "display_name": package.display_name,
289 "modality": package.modality.map(|modality| modality.as_str()),
290 "artifact_kind": package.artifact_kind.as_str(),
291 "artifact_modality": package.artifact_modality.as_str(),
292 "text_content": package.text_content,
293 "truncated": package.truncated,
294 })
295 })
296 .collect(),
297 )
298}
299
300fn record_preview_package_resolution(
301 surface: PreviewPackageSurface,
302 path: &'static str,
303 elapsed: std::time::Duration,
304) {
305 metrics::counter!(
306 crate::metrics::PREVIEW_PACKAGE_PATH_TOTAL,
307 "surface" => surface.as_label(),
308 "path" => path
309 )
310 .increment(1);
311 metrics::histogram!(
312 crate::metrics::PREVIEW_PACKAGE_RESOLUTION_SECONDS,
313 "surface" => surface.as_label(),
314 "path" => path
315 )
316 .record(elapsed.as_secs_f64());
317}
318
319pub(crate) fn resource_score_attribution_to_json(
320 attributions: &[ResourceScoreAttribution],
321) -> serde_json::Value {
322 serde_json::Value::Array(
323 attributions
324 .iter()
325 .map(|attribution| {
326 serde_json::json!({
327 "resource_id": attribution.resource_id.to_string(),
328 "role": attribution.role.as_str(),
329 "display_name": attribution.display_name,
330 "modality": attribution.modality.map(|modality| modality.as_str()),
331 "artifact_kind": attribution.artifact_kind.as_str(),
332 "artifact_modality": attribution.artifact_modality.as_str(),
333 "matched_terms": attribution.matched_terms,
334 "match_score": attribution.match_score,
335 "score_boost": attribution.score_boost,
336 })
337 })
338 .collect(),
339 )
340}
341
342async fn package_resource_preview_packages(
343 db: &HirnDB,
344 actor_id: &AgentId,
345 resource_evidence: &[ResourceEvidenceSummary],
346 max_resource_previews: usize,
347 max_resource_preview_chars: usize,
348 preview_cache: &mut PreviewPackageCache,
349) -> Vec<ResourcePreviewPackage> {
350 let mut packaged = Vec::new();
351 for summary in resource_evidence
352 .iter()
353 .filter(|summary| summary.has_preview && summary.can_hydrate_preview)
354 .take(max_resource_previews)
355 {
356 let cached = if let Some(cached) = preview_cache.cached_previews.get(&summary.resource_id) {
357 cached.clone()
358 } else {
359 let preview = match db
360 .fetch_resource(actor_id, summary.resource_id, HydrationMode::Preview)
361 .await
362 {
363 Ok(Some(resource)) => {
364 select_cached_resource_preview(&resource, max_resource_preview_chars)
365 }
366 Ok(None) | Err(_) => None,
367 };
368 preview_cache
369 .cached_previews
370 .insert(summary.resource_id, preview.clone());
371 preview
372 };
373
374 if let Some(cached) = cached {
375 packaged.push(ResourcePreviewPackage {
376 resource_id: summary.resource_id,
377 role: summary.role,
378 display_name: summary.display_name.clone(),
379 modality: summary.modality,
380 artifact_kind: cached.artifact_kind,
381 artifact_modality: cached.artifact_modality,
382 text_content: cached.text_content,
383 truncated: cached.truncated,
384 });
385 }
386 }
387
388 packaged
389}
390
391fn select_cached_resource_preview(
392 hydrated: &hirn_storage::HydratedResource,
393 max_chars: usize,
394) -> Option<CachedResourcePreview> {
395 let artifact = PREFERRED_PREVIEW_ARTIFACTS
396 .iter()
397 .find_map(|kind| {
398 hydrated.artifacts.iter().find(|artifact| {
399 artifact.kind == *kind
400 && artifact
401 .text_content
402 .as_deref()
403 .is_some_and(|text| !text.trim().is_empty())
404 })
405 })
406 .or_else(|| {
407 hydrated.artifacts.iter().find(|artifact| {
408 artifact.kind.is_previewable()
409 && artifact
410 .text_content
411 .as_deref()
412 .is_some_and(|text| !text.trim().is_empty())
413 })
414 })?;
415
416 let text = artifact.text_content.as_deref()?.trim();
417 if text.is_empty() {
418 return None;
419 }
420
421 let truncated = text.chars().count() > max_chars;
422 let text_content = if truncated {
423 hirn_core::text_util::truncate_at_word_boundary(text, max_chars)
424 } else {
425 text.to_string()
426 };
427
428 Some(CachedResourcePreview {
429 artifact_kind: artifact.kind,
430 artifact_modality: artifact.modality,
431 text_content,
432 truncated,
433 })
434}
435
436fn best_resource_score_attribution(
437 query_terms: &BTreeSet<String>,
438 preview_packages: &[ResourcePreviewPackage],
439) -> Option<ResourceScoreAttribution> {
440 preview_packages
441 .iter()
442 .filter_map(|package| build_resource_score_attribution(query_terms, package))
443 .max_by(|left, right| {
444 left.match_score
445 .total_cmp(&right.match_score)
446 .then_with(|| left.score_boost.total_cmp(&right.score_boost))
447 })
448}
449
450fn build_resource_score_attribution(
451 query_terms: &BTreeSet<String>,
452 package: &ResourcePreviewPackage,
453) -> Option<ResourceScoreAttribution> {
454 let preview_terms = normalized_terms(&package.text_content);
455 if preview_terms.is_empty() {
456 return None;
457 }
458
459 let matched_terms: Vec<String> = query_terms
460 .intersection(&preview_terms)
461 .take(MAX_ATTRIBUTION_TERMS)
462 .cloned()
463 .collect();
464 if matched_terms.is_empty() {
465 return None;
466 }
467
468 let coverage = matched_terms.len() as f32 / query_terms.len() as f32;
469 let density = matched_terms.len() as f32 / preview_terms.len() as f32;
470 let match_score = (coverage * 0.75 + density * 0.25).clamp(0.0, 1.0);
471 let score_boost = (match_score * RESOURCE_PREVIEW_RERANK_WEIGHT).clamp(0.0, 1.0);
472
473 Some(ResourceScoreAttribution {
474 resource_id: package.resource_id,
475 role: package.role,
476 display_name: package.display_name.clone(),
477 modality: package.modality,
478 artifact_kind: package.artifact_kind,
479 artifact_modality: package.artifact_modality,
480 matched_terms,
481 match_score,
482 score_boost,
483 })
484}
485
486pub(crate) fn reuse_seeded_preview_packages(
487 packages: &[ResourcePreviewPackage],
488 max_resource_previews: usize,
489 max_resource_preview_chars: usize,
490) -> Option<Vec<ResourcePreviewPackage>> {
491 if max_resource_previews == 0 || max_resource_preview_chars == 0 {
492 return Some(Vec::new());
493 }
494 if packages.len() < max_resource_previews {
495 return None;
496 }
497
498 let mut reused = Vec::with_capacity(max_resource_previews);
499 for package in packages.iter().take(max_resource_previews) {
500 let visible_text = if package.truncated {
501 package
502 .text_content
503 .strip_suffix("...")
504 .unwrap_or(&package.text_content)
505 } else {
506 &package.text_content
507 };
508 let visible_chars = visible_text.chars().count();
509
510 if package.truncated && max_resource_preview_chars > visible_chars {
511 return None;
512 }
513
514 let mut reused_package = package.clone();
515 if visible_chars > max_resource_preview_chars {
516 reused_package.text_content = hirn_core::text_util::truncate_at_word_boundary(
517 visible_text,
518 max_resource_preview_chars,
519 );
520 reused_package.truncated = true;
521 } else if package.truncated {
522 reused_package
523 .text_content
524 .clone_from(&package.text_content);
525 reused_package.truncated = true;
526 } else {
527 reused_package.text_content = visible_text.to_string();
528 reused_package.truncated = false;
529 }
530 reused.push(reused_package);
531 }
532
533 Some(reused)
534}
535
536fn normalized_terms(text: &str) -> BTreeSet<String> {
537 let mut terms = BTreeSet::new();
538 let mut current = String::new();
539
540 for ch in text.chars() {
541 if ch.is_alphanumeric() {
542 current.extend(ch.to_lowercase());
543 } else {
544 push_normalized_term(&mut terms, &mut current);
545 }
546 }
547 push_normalized_term(&mut terms, &mut current);
548
549 terms
550}
551
552fn push_normalized_term(terms: &mut BTreeSet<String>, current: &mut String) {
553 if current.chars().count() >= 3 {
554 terms.insert(std::mem::take(current));
555 } else {
556 current.clear();
557 }
558}
559
560#[cfg(test)]
561mod tests {
562 use super::*;
563 use hirn_core::types::Namespace;
564
565 #[test]
566 fn resource_score_attribution_uses_query_overlap() {
567 let package = ResourcePreviewPackage {
568 resource_id: ResourceId::new(),
569 role: hirn_core::EvidenceRole::Source,
570 display_name: Some("incident.png".to_string()),
571 modality: Some(ModalityProfile::Image),
572 artifact_kind: DerivedArtifactKind::Preview,
573 artifact_modality: ModalityProfile::Text,
574 text_content: "waf latency spike timeline with annotated edge nodes".to_string(),
575 truncated: false,
576 };
577
578 let attribution = build_resource_score_attribution(
579 &normalized_terms("investigate waf latency spike"),
580 &package,
581 )
582 .expect("preview text should overlap the query");
583
584 assert!(attribution.match_score > 0.0);
585 assert!(attribution.score_boost > 0.0);
586 assert!(
587 attribution
588 .matched_terms
589 .iter()
590 .any(|term| term == "latency")
591 );
592 }
593
594 #[test]
595 fn seeded_preview_packages_can_be_reused_for_smaller_budget() {
596 let packages = vec![ResourcePreviewPackage {
597 resource_id: ResourceId::new(),
598 role: hirn_core::EvidenceRole::Attachment,
599 display_name: Some("preview".to_string()),
600 modality: Some(ModalityProfile::Image),
601 artifact_kind: DerivedArtifactKind::Preview,
602 artifact_modality: ModalityProfile::Text,
603 text_content: "alpha beta gamma delta epsilon zeta...".to_string(),
604 truncated: true,
605 }];
606
607 let reused = reuse_seeded_preview_packages(&packages, 1, 15).unwrap();
608
609 assert_eq!(reused.len(), 1);
610 assert!(reused[0].truncated);
611 assert!(reused[0].text_content.ends_with("..."));
612 assert!(reused[0].text_content.contains("alpha beta"));
613 }
614
615 #[test]
616 fn seeded_preview_packages_refetch_when_budget_needs_more_text() {
617 let packages = vec![ResourcePreviewPackage {
618 resource_id: ResourceId::new(),
619 role: hirn_core::EvidenceRole::Attachment,
620 display_name: Some("preview".to_string()),
621 modality: Some(ModalityProfile::Image),
622 artifact_kind: DerivedArtifactKind::Preview,
623 artifact_modality: ModalityProfile::Text,
624 text_content: "alpha beta gamma delta epsilon zeta...".to_string(),
625 truncated: true,
626 }];
627
628 assert!(reuse_seeded_preview_packages(&packages, 1, 128).is_none());
629 }
630
631 #[test]
632 fn cached_preview_prefers_caption_over_ocr_text_when_both_exist() {
633 let resource = hirn_core::ResourceObject::builder()
634 .modality(ModalityProfile::Image)
635 .location(hirn_core::ResourceLocation::Blob { blob_index: 0 })
636 .build()
637 .unwrap();
638 let hydrated = hirn_storage::HydratedResource {
639 resource,
640 artifacts: vec![
641 hirn_core::DerivedArtifact::builder()
642 .resource_id(ResourceId::new())
643 .kind(DerivedArtifactKind::OcrText)
644 .modality(ModalityProfile::Text)
645 .text_content("fallback ocr text")
646 .namespace(Namespace::default())
647 .build()
648 .unwrap(),
649 hirn_core::DerivedArtifact::builder()
650 .resource_id(ResourceId::new())
651 .kind(DerivedArtifactKind::Caption)
652 .modality(ModalityProfile::Text)
653 .text_content("caption text")
654 .namespace(Namespace::default())
655 .build()
656 .unwrap(),
657 ],
658 blob: None,
659 };
660
661 let preview = select_cached_resource_preview(&hydrated, 128).unwrap();
662 assert_eq!(preview.artifact_kind, DerivedArtifactKind::Caption);
663 assert_eq!(preview.text_content, "caption text");
664 }
665}