Skip to main content

kaizen/search/
extract.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2//! Event-to-search-doc projection.
3
4use crate::core::event::{Event, EventKind, SessionRecord};
5use crate::store::event_index::{paths_from_event_payload, skills_from_event_json};
6use crate::sync::redact::{redact_payload, redact_string};
7use serde_json::Value;
8use std::path::Path;
9
10#[derive(Debug, Clone, serde::Serialize)]
11pub struct SearchDoc {
12    pub session_id: String,
13    pub seq: u64,
14    pub ts_ms: u64,
15    pub agent: String,
16    pub kind: String,
17    pub text: String,
18    pub paths: Vec<String>,
19    pub skills: Vec<String>,
20    pub tokens_total: i64,
21}
22
23pub fn extract_doc(
24    event: &Event,
25    session: &SessionRecord,
26    workspace: &Path,
27    salt: &[u8; 32],
28) -> Option<SearchDoc> {
29    let kind = kind_label(&event.kind)?.to_string();
30    let mut payload = event.payload.clone();
31    redact_payload(&mut payload, workspace, salt);
32    let text = event_text(event, &payload, workspace, salt);
33    (!text.trim().is_empty()).then(|| SearchDoc {
34        session_id: event.session_id.clone(),
35        seq: event.seq,
36        ts_ms: event.ts_ms,
37        agent: session.agent.clone(),
38        kind,
39        text,
40        paths: paths_from_event_payload(&payload),
41        skills: skills_from_event_json(&payload),
42        tokens_total: tokens_total(event),
43    })
44}
45
46pub fn kind_label(kind: &EventKind) -> Option<&'static str> {
47    match kind {
48        EventKind::Message => Some("message"),
49        EventKind::ToolCall => Some("tool_use"),
50        EventKind::ToolResult => Some("tool_result"),
51        _ => None,
52    }
53}
54
55pub fn tokens_total(event: &Event) -> i64 {
56    [event.tokens_in, event.tokens_out, event.reasoning_tokens]
57        .into_iter()
58        .flatten()
59        .map(i64::from)
60        .sum()
61}
62
63pub fn redacted_event_text(event: &Event, workspace: &Path, salt: &[u8; 32]) -> String {
64    let mut payload = event.payload.clone();
65    redact_payload(&mut payload, workspace, salt);
66    event_text(event, &payload, workspace, salt)
67}
68
69pub fn snippet(text: &str, query: &str) -> String {
70    let base = text
71        .split_whitespace()
72        .take(32)
73        .collect::<Vec<_>>()
74        .join(" ");
75    highlight_terms(&base.replace('\n', " "), query)
76}
77
78fn event_text(event: &Event, payload: &Value, workspace: &Path, salt: &[u8; 32]) -> String {
79    let mut out = Vec::new();
80    if let Some(tool) = event.tool.as_deref() {
81        out.push(redact_string(tool, workspace, salt));
82    }
83    collect_strings(payload, &mut out);
84    out.join(" ")
85}
86
87fn collect_strings(value: &Value, out: &mut Vec<String>) {
88    match value {
89        Value::String(s) => out.push(s.clone()),
90        Value::Array(items) => items.iter().for_each(|v| collect_strings(v, out)),
91        Value::Object(map) => map.values().for_each(|v| collect_strings(v, out)),
92        _ => {}
93    }
94}
95
96fn highlight_terms(text: &str, query: &str) -> String {
97    query_terms(query)
98        .into_iter()
99        .fold(text.to_string(), |acc, term| {
100            acc.replace(&term, &format!("**{term}**"))
101        })
102}
103
104fn query_terms(query: &str) -> Vec<String> {
105    query
106        .split(|c: char| c.is_whitespace() || "():'\"><=".contains(c))
107        .filter(|s| s.len() > 2 && !matches!(*s, "AND" | "OR" | "NOT"))
108        .map(str::to_string)
109        .collect()
110}