use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ConversationChunk {
pub index: usize,
pub title: Option<String>,
pub messages: Vec<ChunkMessage>,
pub timestamp: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChunkMessage {
pub role: String,
pub content: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChunkSummary {
pub index: usize,
pub title: String,
pub first_message: String,
pub last_message: String,
pub message_count: usize,
pub timestamp: Option<String>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct PartialProfile {
pub identity: Option<String>,
pub themes: Option<Vec<String>>,
pub projects: Option<Vec<String>>,
pub stack: Option<Vec<String>>,
pub decisions: Option<Vec<String>>,
pub interests: Option<Vec<String>>,
pub skip_patterns: Option<Vec<String>>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct UserProfile {
pub identity: Option<String>,
pub themes: Vec<String>,
pub projects: Vec<String>,
pub stack: Vec<String>,
pub decisions: Vec<String>,
pub interests: Vec<String>,
pub skip_patterns: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChunkDecision {
pub chunk_index: usize,
pub decision: TriageDecision,
pub reason: String,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "UPPERCASE")]
pub enum TriageDecision {
Extract,
Skip,
}
impl std::fmt::Display for TriageDecision {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
TriageDecision::Extract => write!(f, "EXTRACT"),
TriageDecision::Skip => write!(f, "SKIP"),
}
}
}
pub fn chunks_to_summaries(chunks: &[ConversationChunk]) -> Vec<ChunkSummary> {
chunks
.iter()
.map(|chunk| {
let first_message = chunk
.messages
.iter()
.find(|m| m.role == "user")
.map(|m| truncate_message(&m.content, 300))
.unwrap_or_default();
let last_message = chunk
.messages
.last()
.map(|m| truncate_message(&m.content, 300))
.unwrap_or_default();
let title = chunk
.title
.clone()
.unwrap_or_else(|| "Untitled".to_string());
ChunkSummary {
index: chunk.index,
title,
first_message,
last_message,
message_count: chunk.messages.len(),
timestamp: chunk.timestamp.clone(),
}
})
.collect()
}
pub fn build_profile_batch_prompt(summaries: &[ChunkSummary]) -> String {
let mut conversations = String::new();
for summary in summaries {
let ts = summary
.timestamp
.as_deref()
.map(|t| format!(" ({})", t))
.unwrap_or_default();
conversations.push_str(&format!(
"{}. \"{}\"{} [{} messages]\n First: \"{}\"\n Last: \"{}\"\n\n",
summary.index + 1,
summary.title,
ts,
summary.message_count,
summary.first_message,
summary.last_message,
));
}
format!(
r#"Analyze these conversation excerpts and describe who this user is.
For each conversation, you see the title, the user's first message, and the final message in the thread.
[CONVERSATIONS]
{conversations}
Based on ALL conversations above, produce a structured JSON profile of this user. Include only what is evidenced — do not speculate.
Return a JSON object (no markdown fences, no commentary):
{{
"identity": "name, role, location, company if mentioned — or null if unknown",
"themes": ["recurring topics that appear multiple times"],
"projects": ["named projects or ongoing work streams"],
"stack": ["languages, frameworks, tools, cloud providers, databases"],
"decisions": ["specific choices made, e.g. 'chose Polars over Pandas for large dataset performance'"],
"interests": ["personal interests, hobbies, preferences, workflow habits"],
"skip_patterns": ["types of conversations that are generic Q&A with no personal value, e.g. 'recipe lookups', 'weather queries'"]
}}"#
)
}
pub fn parse_profile_batch_response(llm_output: &str) -> PartialProfile {
let cleaned = strip_code_fences(llm_output.trim());
match serde_json::from_str::<serde_json::Value>(&cleaned) {
Ok(serde_json::Value::Object(obj)) => parse_partial_profile_from_object(&obj),
_ => {
if let Some(start) = cleaned.find('{') {
if let Some(end) = cleaned.rfind('}') {
if start < end {
let slice = &cleaned[start..=end];
if let Ok(serde_json::Value::Object(obj)) =
serde_json::from_str::<serde_json::Value>(slice)
{
return parse_partial_profile_from_object(&obj);
}
}
}
}
PartialProfile::default()
}
}
}
pub fn build_profile_merge_prompt(partials: &[PartialProfile]) -> String {
let mut profiles_text = String::new();
for (i, partial) in partials.iter().enumerate() {
let json = serde_json::to_string_pretty(partial).unwrap_or_default();
profiles_text.push_str(&format!("--- Batch {} ---\n{}\n\n", i + 1, json));
}
format!(
r#"You are merging multiple partial user profiles into one coherent profile.
Each profile below was extracted from a different batch of the same user's conversations. Combine them by:
- Deduplicating entries that are the same concept (e.g. "Python" appearing in multiple batches)
- Keeping the most specific version when entries overlap (e.g. "Python 3.11" over "Python")
- Preserving all unique information across batches
- For identity, merge fragments into the most complete description
[PARTIAL PROFILES]
{profiles_text}
Return a single merged JSON profile (no markdown fences, no commentary):
{{
"identity": "most complete identity description, or null if no identity info found",
"themes": ["all unique recurring themes, deduplicated"],
"projects": ["all unique projects, deduplicated"],
"stack": ["all unique technologies, deduplicated, most specific versions"],
"decisions": ["all unique decisions with reasoning"],
"interests": ["all unique interests, deduplicated"],
"skip_patterns": ["all unique skip patterns, deduplicated"]
}}"#
)
}
pub fn parse_profile_response(llm_output: &str) -> UserProfile {
let cleaned = strip_code_fences(llm_output.trim());
let obj = match serde_json::from_str::<serde_json::Value>(&cleaned) {
Ok(serde_json::Value::Object(obj)) => obj,
_ => {
if let Some(start) = cleaned.find('{') {
if let Some(end) = cleaned.rfind('}') {
if start < end {
let slice = &cleaned[start..=end];
if let Ok(serde_json::Value::Object(obj)) =
serde_json::from_str::<serde_json::Value>(slice)
{
obj
} else {
return UserProfile::default();
}
} else {
return UserProfile::default();
}
} else {
return UserProfile::default();
}
} else {
return UserProfile::default();
}
}
};
UserProfile {
identity: obj
.get("identity")
.and_then(|v| v.as_str())
.filter(|s| !s.is_empty() && *s != "null")
.map(|s| s.to_string()),
themes: extract_string_array(&obj, "themes"),
projects: extract_string_array(&obj, "projects"),
stack: extract_string_array(&obj, "stack"),
decisions: extract_string_array(&obj, "decisions"),
interests: extract_string_array(&obj, "interests"),
skip_patterns: extract_string_array(&obj, "skip_patterns"),
}
}
pub fn build_triage_prompt(profile: &UserProfile, summaries: &[ChunkSummary]) -> String {
let profile_text = format_profile_for_prompt(profile);
let mut conversations = String::new();
for summary in summaries {
let ts = summary
.timestamp
.as_deref()
.map(|t| format!(" | {}", t))
.unwrap_or_default();
conversations.push_str(&format!(
"{}. Title: \"{}\"{} [{} msgs]\n First: \"{}\"\n Last: \"{}\"\n",
summary.index + 1,
summary.title,
ts,
summary.message_count,
summary.first_message,
summary.last_message,
));
}
format!(
r#"You are classifying conversations for import into a personal memory system.
[USER PROFILE]
{profile_text}
For each conversation below, decide:
- EXTRACT: Contains personal facts, decisions, preferences, project details, technical choices, or anything this specific user would want remembered long-term. When in doubt, lean toward EXTRACT.
- SKIP: Generic Q&A, one-off lookups, trivial requests with no personal value (e.g. "what's the capital of France?", recipe requests, simple translations).
[CONVERSATIONS]
{conversations}
Return a JSON array (no markdown fences, no commentary):
[{{"index": 0, "decision": "EXTRACT", "reason": "discusses their ML pipeline project"}}, ...]
Use 0-based indices matching the conversation numbers minus 1 (conversation 1 = index 0)."#
)
}
pub fn parse_triage_response(llm_output: &str) -> Vec<ChunkDecision> {
let cleaned = strip_code_fences(llm_output.trim());
let arr = match serde_json::from_str::<serde_json::Value>(&cleaned) {
Ok(serde_json::Value::Array(arr)) => arr,
_ => {
if let Some(start) = cleaned.find('[') {
if let Some(end) = cleaned.rfind(']') {
if start < end {
let slice = &cleaned[start..=end];
if let Ok(serde_json::Value::Array(arr)) =
serde_json::from_str::<serde_json::Value>(slice)
{
arr
} else {
return Vec::new();
}
} else {
return Vec::new();
}
} else {
return Vec::new();
}
} else {
return Vec::new();
}
}
};
let mut decisions = Vec::new();
for entry in arr {
let obj = match entry.as_object() {
Some(o) => o,
None => continue,
};
let chunk_index = match obj
.get("index")
.or_else(|| obj.get("chunk_index"))
.and_then(|v| v.as_u64())
{
Some(i) => i as usize,
None => continue,
};
let decision_str = obj
.get("decision")
.and_then(|v| v.as_str())
.unwrap_or("EXTRACT");
let decision = match decision_str.to_uppercase().as_str() {
"SKIP" => TriageDecision::Skip,
_ => TriageDecision::Extract, };
let reason = obj
.get("reason")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
decisions.push(ChunkDecision {
chunk_index,
decision,
reason,
});
}
decisions
}
pub fn enrich_extraction_prompt(profile: &UserProfile, base_prompt: &str) -> String {
let profile_text = format_profile_for_prompt(profile);
format!(
r#"You are extracting facts from a conversation by a user with this background:
{profile_text}
Given this context:
- Focus on facts relevant to their work, projects, and technical decisions
- Extract preferences and workflow habits specific to this user
- For decisions, always include the reasoning ("chose X because Y")
- Skip generic information that is not specific to this user
- Prioritize information that connects to their known projects and themes
{base_prompt}"#
)
}
fn strip_code_fences(s: &str) -> String {
let mut result = s.to_string();
if result.starts_with("```") {
if let Some(pos) = result.find('\n') {
result = result[pos + 1..].to_string();
}
if result.ends_with("```") {
result = result[..result.len() - 3].trim_end().to_string();
}
}
result
}
fn truncate_message(s: &str, max_len: usize) -> String {
let trimmed = s.trim();
if trimmed.len() <= max_len {
trimmed.to_string()
} else {
format!("{}...", &trimmed[..max_len])
}
}
fn extract_string_array(
obj: &serde_json::Map<String, serde_json::Value>,
key: &str,
) -> Vec<String> {
obj.get(key)
.and_then(|v| v.as_array())
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str().map(|s| s.to_string()))
.filter(|s| !s.is_empty())
.collect()
})
.unwrap_or_default()
}
fn parse_partial_profile_from_object(
obj: &serde_json::Map<String, serde_json::Value>,
) -> PartialProfile {
PartialProfile {
identity: obj
.get("identity")
.and_then(|v| v.as_str())
.filter(|s| !s.is_empty() && *s != "null")
.map(|s| s.to_string()),
themes: non_empty_array(obj, "themes"),
projects: non_empty_array(obj, "projects"),
stack: non_empty_array(obj, "stack"),
decisions: non_empty_array(obj, "decisions"),
interests: non_empty_array(obj, "interests"),
skip_patterns: non_empty_array(obj, "skip_patterns"),
}
}
fn non_empty_array(
obj: &serde_json::Map<String, serde_json::Value>,
key: &str,
) -> Option<Vec<String>> {
let arr = extract_string_array(obj, key);
if arr.is_empty() {
None
} else {
Some(arr)
}
}
fn format_profile_for_prompt(profile: &UserProfile) -> String {
let mut parts = Vec::new();
if let Some(ref identity) = profile.identity {
parts.push(format!("Identity: {}", identity));
}
if !profile.themes.is_empty() {
parts.push(format!("Recurring themes: {}", profile.themes.join(", ")));
}
if !profile.projects.is_empty() {
parts.push(format!("Active projects: {}", profile.projects.join(", ")));
}
if !profile.stack.is_empty() {
parts.push(format!("Technical stack: {}", profile.stack.join(", ")));
}
if !profile.decisions.is_empty() {
parts.push(format!(
"Key decisions:\n{}",
profile
.decisions
.iter()
.map(|d| format!(" - {}", d))
.collect::<Vec<_>>()
.join("\n")
));
}
if !profile.interests.is_empty() {
parts.push(format!("Interests: {}", profile.interests.join(", ")));
}
if !profile.skip_patterns.is_empty() {
parts.push(format!(
"Skip patterns (generic Q&A): {}",
profile.skip_patterns.join(", ")
));
}
if parts.is_empty() {
"No profile information available.".to_string()
} else {
parts.join("\n")
}
}
#[cfg(feature = "wasm")]
mod wasm_bindings {
use super::*;
use wasm_bindgen::prelude::*;
#[wasm_bindgen(js_name = "chunksToSummaries")]
pub fn wasm_chunks_to_summaries(chunks_json: &str) -> Result<JsValue, JsError> {
let chunks: Vec<ConversationChunk> = serde_json::from_str(chunks_json)
.map_err(|e| JsError::new(&format!("Invalid chunks JSON: {}", e)))?;
let summaries = chunks_to_summaries(&chunks);
serde_wasm_bindgen::to_value(&summaries).map_err(|e| JsError::new(&e.to_string()))
}
#[wasm_bindgen(js_name = "buildProfileBatchPrompt")]
pub fn wasm_build_profile_batch_prompt(summaries_json: &str) -> Result<String, JsError> {
let summaries: Vec<ChunkSummary> = serde_json::from_str(summaries_json)
.map_err(|e| JsError::new(&format!("Invalid summaries JSON: {}", e)))?;
Ok(build_profile_batch_prompt(&summaries))
}
#[wasm_bindgen(js_name = "parseProfileBatchResponse")]
pub fn wasm_parse_profile_batch_response(llm_output: &str) -> Result<JsValue, JsError> {
let profile = parse_profile_batch_response(llm_output);
serde_wasm_bindgen::to_value(&profile).map_err(|e| JsError::new(&e.to_string()))
}
#[wasm_bindgen(js_name = "buildProfileMergePrompt")]
pub fn wasm_build_profile_merge_prompt(partials_json: &str) -> Result<String, JsError> {
let partials: Vec<PartialProfile> = serde_json::from_str(partials_json)
.map_err(|e| JsError::new(&format!("Invalid partials JSON: {}", e)))?;
Ok(build_profile_merge_prompt(&partials))
}
#[wasm_bindgen(js_name = "parseProfileResponse")]
pub fn wasm_parse_profile_response(llm_output: &str) -> Result<JsValue, JsError> {
let profile = parse_profile_response(llm_output);
serde_wasm_bindgen::to_value(&profile).map_err(|e| JsError::new(&e.to_string()))
}
#[wasm_bindgen(js_name = "buildTriagePrompt")]
pub fn wasm_build_triage_prompt(
profile_json: &str,
summaries_json: &str,
) -> Result<String, JsError> {
let profile: UserProfile = serde_json::from_str(profile_json)
.map_err(|e| JsError::new(&format!("Invalid profile JSON: {}", e)))?;
let summaries: Vec<ChunkSummary> = serde_json::from_str(summaries_json)
.map_err(|e| JsError::new(&format!("Invalid summaries JSON: {}", e)))?;
Ok(build_triage_prompt(&profile, &summaries))
}
#[wasm_bindgen(js_name = "parseTriageResponse")]
pub fn wasm_parse_triage_response(llm_output: &str) -> Result<JsValue, JsError> {
let decisions = parse_triage_response(llm_output);
serde_wasm_bindgen::to_value(&decisions).map_err(|e| JsError::new(&e.to_string()))
}
#[wasm_bindgen(js_name = "enrichExtractionPrompt")]
pub fn wasm_enrich_extraction_prompt(
profile_json: &str,
base_prompt: &str,
) -> Result<String, JsError> {
let profile: UserProfile = serde_json::from_str(profile_json)
.map_err(|e| JsError::new(&format!("Invalid profile JSON: {}", e)))?;
Ok(enrich_extraction_prompt(&profile, base_prompt))
}
}
#[cfg(feature = "python")]
#[pyo3::prelude::pyfunction]
#[pyo3(name = "chunks_to_summaries")]
fn py_chunks_to_summaries(chunks_json: &str) -> pyo3::PyResult<String> {
let chunks: Vec<ConversationChunk> = serde_json::from_str(chunks_json)
.map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Invalid chunks JSON: {}", e)))?;
let summaries = chunks_to_summaries(&chunks);
serde_json::to_string(&summaries)
.map_err(|e| pyo3::exceptions::PyRuntimeError::new_err(e.to_string()))
}
#[cfg(feature = "python")]
#[pyo3::prelude::pyfunction]
#[pyo3(name = "build_profile_batch_prompt")]
fn py_build_profile_batch_prompt(summaries_json: &str) -> pyo3::PyResult<String> {
let summaries: Vec<ChunkSummary> = serde_json::from_str(summaries_json)
.map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Invalid summaries JSON: {}", e)))?;
Ok(build_profile_batch_prompt(&summaries))
}
#[cfg(feature = "python")]
#[pyo3::prelude::pyfunction]
#[pyo3(name = "parse_profile_batch_response")]
fn py_parse_profile_batch_response(llm_output: &str) -> pyo3::PyResult<String> {
let profile = parse_profile_batch_response(llm_output);
serde_json::to_string(&profile)
.map_err(|e| pyo3::exceptions::PyRuntimeError::new_err(e.to_string()))
}
#[cfg(feature = "python")]
#[pyo3::prelude::pyfunction]
#[pyo3(name = "build_profile_merge_prompt")]
fn py_build_profile_merge_prompt(partials_json: &str) -> pyo3::PyResult<String> {
let partials: Vec<PartialProfile> = serde_json::from_str(partials_json)
.map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Invalid partials JSON: {}", e)))?;
Ok(build_profile_merge_prompt(&partials))
}
#[cfg(feature = "python")]
#[pyo3::prelude::pyfunction]
#[pyo3(name = "parse_profile_response")]
fn py_parse_profile_response(llm_output: &str) -> pyo3::PyResult<String> {
let profile = parse_profile_response(llm_output);
serde_json::to_string(&profile)
.map_err(|e| pyo3::exceptions::PyRuntimeError::new_err(e.to_string()))
}
#[cfg(feature = "python")]
#[pyo3::prelude::pyfunction]
#[pyo3(name = "build_triage_prompt")]
fn py_build_triage_prompt(profile_json: &str, summaries_json: &str) -> pyo3::PyResult<String> {
let profile: UserProfile = serde_json::from_str(profile_json)
.map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Invalid profile JSON: {}", e)))?;
let summaries: Vec<ChunkSummary> = serde_json::from_str(summaries_json)
.map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Invalid summaries JSON: {}", e)))?;
Ok(build_triage_prompt(&profile, &summaries))
}
#[cfg(feature = "python")]
#[pyo3::prelude::pyfunction]
#[pyo3(name = "parse_triage_response")]
fn py_parse_triage_response(llm_output: &str) -> pyo3::PyResult<String> {
let decisions = parse_triage_response(llm_output);
serde_json::to_string(&decisions)
.map_err(|e| pyo3::exceptions::PyRuntimeError::new_err(e.to_string()))
}
#[cfg(feature = "python")]
#[pyo3::prelude::pyfunction]
#[pyo3(name = "enrich_extraction_prompt")]
fn py_enrich_extraction_prompt(profile_json: &str, base_prompt: &str) -> pyo3::PyResult<String> {
let profile: UserProfile = serde_json::from_str(profile_json)
.map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("Invalid profile JSON: {}", e)))?;
Ok(enrich_extraction_prompt(&profile, base_prompt))
}
#[cfg(feature = "python")]
pub fn register_python_functions(
m: &pyo3::prelude::Bound<'_, pyo3::prelude::PyModule>,
) -> pyo3::PyResult<()> {
use pyo3::prelude::*;
m.add_function(pyo3::wrap_pyfunction!(py_chunks_to_summaries, m)?)?;
m.add_function(pyo3::wrap_pyfunction!(py_build_profile_batch_prompt, m)?)?;
m.add_function(pyo3::wrap_pyfunction!(py_parse_profile_batch_response, m)?)?;
m.add_function(pyo3::wrap_pyfunction!(py_build_profile_merge_prompt, m)?)?;
m.add_function(pyo3::wrap_pyfunction!(py_parse_profile_response, m)?)?;
m.add_function(pyo3::wrap_pyfunction!(py_build_triage_prompt, m)?)?;
m.add_function(pyo3::wrap_pyfunction!(py_parse_triage_response, m)?)?;
m.add_function(pyo3::wrap_pyfunction!(py_enrich_extraction_prompt, m)?)?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
fn make_chunks() -> Vec<ConversationChunk> {
vec![
ConversationChunk {
index: 0,
title: Some("Kubernetes setup".to_string()),
messages: vec![
ChunkMessage {
role: "user".to_string(),
content: "How do I set up a Kubernetes cluster on GCP?".to_string(),
},
ChunkMessage {
role: "assistant".to_string(),
content: "Here's how to set up GKE...".to_string(),
},
ChunkMessage {
role: "user".to_string(),
content: "Thanks, the nodepool autoscaling config worked perfectly.".to_string(),
},
],
timestamp: Some("2025-01-15T10:00:00Z".to_string()),
},
ConversationChunk {
index: 1,
title: Some("Pasta recipe".to_string()),
messages: vec![
ChunkMessage {
role: "user".to_string(),
content: "How do I make pasta carbonara?".to_string(),
},
ChunkMessage {
role: "assistant".to_string(),
content: "Classic carbonara uses eggs, pecorino, guanciale...".to_string(),
},
],
timestamp: None,
},
ConversationChunk {
index: 2,
title: None,
messages: vec![
ChunkMessage {
role: "user".to_string(),
content: "I'm switching from pandas to polars for our 50GB dataset pipeline".to_string(),
},
ChunkMessage {
role: "assistant".to_string(),
content: "Good choice. Polars handles large datasets much better...".to_string(),
},
ChunkMessage {
role: "user".to_string(),
content: "The benchmark shows 3x speedup. Deploying to production next week.".to_string(),
},
],
timestamp: Some("2025-02-01T14:30:00Z".to_string()),
},
]
}
fn make_profile() -> UserProfile {
UserProfile {
identity: Some("Maria, senior data engineer at DataCorp, Berlin".to_string()),
themes: vec![
"data pipeline optimization".to_string(),
"cloud infrastructure".to_string(),
"team tooling".to_string(),
],
projects: vec![
"recommendation engine v2".to_string(),
"polars migration".to_string(),
],
stack: vec![
"Python".to_string(),
"Polars".to_string(),
"GCP".to_string(),
"Kubernetes".to_string(),
],
decisions: vec![
"chose Polars over Pandas for 3x performance on 50GB datasets".to_string(),
"chose GCP over AWS for existing Terraform expertise".to_string(),
],
interests: vec!["dark mode".to_string(), "JetBrains IDEs".to_string()],
skip_patterns: vec![
"recipe lookups".to_string(),
"weather queries".to_string(),
"translation requests".to_string(),
],
}
}
#[test]
fn test_chunks_to_summaries_basic() {
let chunks = make_chunks();
let summaries = chunks_to_summaries(&chunks);
assert_eq!(summaries.len(), 3);
assert_eq!(summaries[0].index, 0);
assert_eq!(summaries[0].title, "Kubernetes setup");
assert_eq!(
summaries[0].first_message,
"How do I set up a Kubernetes cluster on GCP?"
);
assert_eq!(
summaries[0].last_message,
"Thanks, the nodepool autoscaling config worked perfectly."
);
assert_eq!(summaries[0].message_count, 3);
assert_eq!(
summaries[0].timestamp,
Some("2025-01-15T10:00:00Z".to_string())
);
assert_eq!(summaries[1].index, 1);
assert_eq!(summaries[1].title, "Pasta recipe");
assert!(summaries[1].timestamp.is_none());
assert_eq!(summaries[2].index, 2);
assert_eq!(summaries[2].title, "Untitled");
}
#[test]
fn test_chunks_to_summaries_empty() {
let summaries = chunks_to_summaries(&[]);
assert!(summaries.is_empty());
}
#[test]
fn test_chunks_to_summaries_no_user_message() {
let chunks = vec![ConversationChunk {
index: 0,
title: Some("System only".to_string()),
messages: vec![ChunkMessage {
role: "assistant".to_string(),
content: "Hello, how can I help?".to_string(),
}],
timestamp: None,
}];
let summaries = chunks_to_summaries(&chunks);
assert_eq!(summaries[0].first_message, ""); assert_eq!(summaries[0].last_message, "Hello, how can I help?");
}
#[test]
fn test_chunks_to_summaries_truncation() {
let long_msg = "x".repeat(500);
let chunks = vec![ConversationChunk {
index: 0,
title: Some("Long".to_string()),
messages: vec![ChunkMessage {
role: "user".to_string(),
content: long_msg,
}],
timestamp: None,
}];
let summaries = chunks_to_summaries(&chunks);
assert_eq!(summaries[0].first_message.len(), 303); assert!(summaries[0].first_message.ends_with("..."));
}
#[test]
fn test_build_profile_batch_prompt_structure() {
let chunks = make_chunks();
let summaries = chunks_to_summaries(&chunks);
let prompt = build_profile_batch_prompt(&summaries);
assert!(prompt.contains("Analyze these conversation excerpts"));
assert!(prompt.contains("Kubernetes setup"));
assert!(prompt.contains("Pasta recipe"));
assert!(prompt.contains("\"identity\""));
assert!(prompt.contains("\"themes\""));
assert!(prompt.contains("\"projects\""));
assert!(prompt.contains("\"stack\""));
assert!(prompt.contains("\"decisions\""));
assert!(prompt.contains("\"interests\""));
assert!(prompt.contains("\"skip_patterns\""));
assert!(prompt.contains("2025-01-15T10:00:00Z"));
}
#[test]
fn test_build_profile_batch_prompt_empty() {
let prompt = build_profile_batch_prompt(&[]);
assert!(prompt.contains("Analyze these conversation excerpts"));
assert!(prompt.contains("[CONVERSATIONS]"));
}
#[test]
fn test_parse_profile_batch_response_valid_json() {
let input = r#"{
"identity": "Maria, data engineer at DataCorp",
"themes": ["data pipelines", "cloud migration"],
"projects": ["polars migration"],
"stack": ["Python", "Polars", "GCP"],
"decisions": ["chose Polars over Pandas for performance"],
"interests": ["dark mode"],
"skip_patterns": ["recipe lookups"]
}"#;
let profile = parse_profile_batch_response(input);
assert_eq!(
profile.identity,
Some("Maria, data engineer at DataCorp".to_string())
);
assert_eq!(profile.themes, Some(vec!["data pipelines".to_string(), "cloud migration".to_string()]));
assert_eq!(profile.projects, Some(vec!["polars migration".to_string()]));
assert_eq!(profile.stack, Some(vec!["Python".to_string(), "Polars".to_string(), "GCP".to_string()]));
assert_eq!(profile.decisions, Some(vec!["chose Polars over Pandas for performance".to_string()]));
assert_eq!(profile.interests, Some(vec!["dark mode".to_string()]));
assert_eq!(profile.skip_patterns, Some(vec!["recipe lookups".to_string()]));
}
#[test]
fn test_parse_profile_batch_response_with_code_fences() {
let input = r#"```json
{
"identity": "Maria, data engineer",
"themes": ["ML pipelines"],
"projects": [],
"stack": ["Python"],
"decisions": [],
"interests": [],
"skip_patterns": []
}
```"#;
let profile = parse_profile_batch_response(input);
assert_eq!(
profile.identity,
Some("Maria, data engineer".to_string())
);
assert_eq!(profile.themes, Some(vec!["ML pipelines".to_string()]));
assert!(profile.projects.is_none());
}
#[test]
fn test_parse_profile_batch_response_partial_fields() {
let input = r#"{"identity": "John", "stack": ["Rust", "TypeScript"]}"#;
let profile = parse_profile_batch_response(input);
assert_eq!(profile.identity, Some("John".to_string()));
assert_eq!(
profile.stack,
Some(vec!["Rust".to_string(), "TypeScript".to_string()])
);
assert!(profile.themes.is_none());
assert!(profile.projects.is_none());
}
#[test]
fn test_parse_profile_batch_response_null_identity() {
let input = r#"{"identity": null, "themes": ["testing"]}"#;
let profile = parse_profile_batch_response(input);
assert!(profile.identity.is_none());
assert_eq!(profile.themes, Some(vec!["testing".to_string()]));
}
#[test]
fn test_parse_profile_batch_response_invalid_json() {
let profile = parse_profile_batch_response("not json at all");
assert!(profile.identity.is_none());
assert!(profile.themes.is_none());
}
#[test]
fn test_parse_profile_batch_response_empty() {
let profile = parse_profile_batch_response("");
assert!(profile.identity.is_none());
}
#[test]
fn test_parse_profile_batch_response_json_with_surrounding_text() {
let input = r#"Here is the profile:
{"identity": "Bob the Builder", "themes": ["construction"]}
Hope this helps!"#;
let profile = parse_profile_batch_response(input);
assert_eq!(
profile.identity,
Some("Bob the Builder".to_string())
);
assert_eq!(profile.themes, Some(vec!["construction".to_string()]));
}
#[test]
fn test_build_profile_merge_prompt_structure() {
let partials = vec![
PartialProfile {
identity: Some("Maria, data engineer".to_string()),
themes: Some(vec!["data pipelines".to_string()]),
..Default::default()
},
PartialProfile {
identity: Some("Maria, senior data engineer at DataCorp".to_string()),
stack: Some(vec!["Python".to_string(), "GCP".to_string()]),
..Default::default()
},
];
let prompt = build_profile_merge_prompt(&partials);
assert!(prompt.contains("merging multiple partial user profiles"));
assert!(prompt.contains("Batch 1"));
assert!(prompt.contains("Batch 2"));
assert!(prompt.contains("Maria, data engineer"));
assert!(prompt.contains("Maria, senior data engineer at DataCorp"));
assert!(prompt.contains("\"identity\""));
}
#[test]
fn test_build_profile_merge_prompt_single_partial() {
let partials = vec![PartialProfile {
identity: Some("Alice".to_string()),
..Default::default()
}];
let prompt = build_profile_merge_prompt(&partials);
assert!(prompt.contains("Batch 1"));
assert!(prompt.contains("Alice"));
}
#[test]
fn test_parse_profile_response_valid() {
let input = r#"{
"identity": "Maria, senior data engineer at DataCorp, Berlin",
"themes": ["data pipeline optimization", "cloud infrastructure"],
"projects": ["recommendation engine v2", "polars migration"],
"stack": ["Python", "Polars", "GCP", "Kubernetes"],
"decisions": ["chose Polars over Pandas for 3x performance"],
"interests": ["dark mode", "JetBrains IDEs"],
"skip_patterns": ["recipe lookups", "weather queries"]
}"#;
let profile = parse_profile_response(input);
assert_eq!(
profile.identity,
Some("Maria, senior data engineer at DataCorp, Berlin".to_string())
);
assert_eq!(profile.themes.len(), 2);
assert_eq!(profile.projects.len(), 2);
assert_eq!(profile.stack.len(), 4);
assert_eq!(profile.decisions.len(), 1);
assert_eq!(profile.interests.len(), 2);
assert_eq!(profile.skip_patterns.len(), 2);
}
#[test]
fn test_parse_profile_response_missing_fields() {
let input = r#"{"identity": "Bob", "themes": ["testing"]}"#;
let profile = parse_profile_response(input);
assert_eq!(profile.identity, Some("Bob".to_string()));
assert_eq!(profile.themes, vec!["testing".to_string()]);
assert!(profile.projects.is_empty());
assert!(profile.stack.is_empty());
}
#[test]
fn test_parse_profile_response_invalid_json() {
let profile = parse_profile_response("garbage");
assert!(profile.identity.is_none());
assert!(profile.themes.is_empty());
}
#[test]
fn test_parse_profile_response_string_null_identity() {
let input = r#"{"identity": "null", "themes": ["a"]}"#;
let profile = parse_profile_response(input);
assert!(profile.identity.is_none()); }
#[test]
fn test_parse_profile_response_with_fences() {
let input = "```json\n{\"identity\": \"Alice\", \"themes\": [\"coding\"]}\n```";
let profile = parse_profile_response(input);
assert_eq!(profile.identity, Some("Alice".to_string()));
assert_eq!(profile.themes, vec!["coding".to_string()]);
}
#[test]
fn test_build_triage_prompt_structure() {
let profile = make_profile();
let chunks = make_chunks();
let summaries = chunks_to_summaries(&chunks);
let prompt = build_triage_prompt(&profile, &summaries);
assert!(prompt.contains("classifying conversations"));
assert!(prompt.contains("[USER PROFILE]"));
assert!(prompt.contains("Maria, senior data engineer at DataCorp, Berlin"));
assert!(prompt.contains("data pipeline optimization"));
assert!(prompt.contains("[CONVERSATIONS]"));
assert!(prompt.contains("Kubernetes setup"));
assert!(prompt.contains("Pasta recipe"));
assert!(prompt.contains("EXTRACT"));
assert!(prompt.contains("SKIP"));
}
#[test]
fn test_build_triage_prompt_empty_profile() {
let profile = UserProfile::default();
let summaries = vec![ChunkSummary {
index: 0,
title: "Test".to_string(),
first_message: "Hello".to_string(),
last_message: "Bye".to_string(),
message_count: 2,
timestamp: None,
}];
let prompt = build_triage_prompt(&profile, &summaries);
assert!(prompt.contains("No profile information available"));
}
#[test]
fn test_parse_triage_response_valid_json() {
let input = r#"[
{"index": 0, "decision": "EXTRACT", "reason": "discusses their K8s setup"},
{"index": 1, "decision": "SKIP", "reason": "generic recipe lookup"},
{"index": 2, "decision": "EXTRACT", "reason": "polars migration decision"}
]"#;
let decisions = parse_triage_response(input);
assert_eq!(decisions.len(), 3);
assert_eq!(decisions[0].chunk_index, 0);
assert_eq!(decisions[0].decision, TriageDecision::Extract);
assert_eq!(decisions[0].reason, "discusses their K8s setup");
assert_eq!(decisions[1].chunk_index, 1);
assert_eq!(decisions[1].decision, TriageDecision::Skip);
assert_eq!(decisions[2].chunk_index, 2);
assert_eq!(decisions[2].decision, TriageDecision::Extract);
}
#[test]
fn test_parse_triage_response_with_code_fences() {
let input = "```json\n[{\"index\": 0, \"decision\": \"SKIP\", \"reason\": \"trivial\"}]\n```";
let decisions = parse_triage_response(input);
assert_eq!(decisions.len(), 1);
assert_eq!(decisions[0].decision, TriageDecision::Skip);
}
#[test]
fn test_parse_triage_response_case_insensitive() {
let input = r#"[
{"index": 0, "decision": "extract", "reason": "test"},
{"index": 1, "decision": "skip", "reason": "test"},
{"index": 2, "decision": "Skip", "reason": "test"},
{"index": 3, "decision": "EXTRACT", "reason": "test"}
]"#;
let decisions = parse_triage_response(input);
assert_eq!(decisions.len(), 4);
assert_eq!(decisions[0].decision, TriageDecision::Extract);
assert_eq!(decisions[1].decision, TriageDecision::Skip);
assert_eq!(decisions[2].decision, TriageDecision::Skip);
assert_eq!(decisions[3].decision, TriageDecision::Extract);
}
#[test]
fn test_parse_triage_response_defaults_to_extract() {
let input = r#"[{"index": 0, "decision": "UNKNOWN", "reason": "unclear"}]"#;
let decisions = parse_triage_response(input);
assert_eq!(decisions[0].decision, TriageDecision::Extract);
}
#[test]
fn test_parse_triage_response_missing_fields() {
let input = r#"[
{"decision": "SKIP", "reason": "no index"},
{"index": 1},
{"index": 2, "decision": "EXTRACT"}
]"#;
let decisions = parse_triage_response(input);
assert_eq!(decisions.len(), 2);
assert_eq!(decisions[0].chunk_index, 1);
assert_eq!(decisions[0].decision, TriageDecision::Extract); assert_eq!(decisions[1].chunk_index, 2);
assert_eq!(decisions[1].reason, ""); }
#[test]
fn test_parse_triage_response_chunk_index_field() {
let input = r#"[{"chunk_index": 5, "decision": "SKIP", "reason": "test"}]"#;
let decisions = parse_triage_response(input);
assert_eq!(decisions.len(), 1);
assert_eq!(decisions[0].chunk_index, 5);
}
#[test]
fn test_parse_triage_response_invalid_json() {
let decisions = parse_triage_response("not json");
assert!(decisions.is_empty());
}
#[test]
fn test_parse_triage_response_empty_array() {
let decisions = parse_triage_response("[]");
assert!(decisions.is_empty());
}
#[test]
fn test_parse_triage_response_json_in_text() {
let input = r#"Here are my decisions:
[{"index": 0, "decision": "EXTRACT", "reason": "important"}]
That's it."#;
let decisions = parse_triage_response(input);
assert_eq!(decisions.len(), 1);
assert_eq!(decisions[0].decision, TriageDecision::Extract);
}
#[test]
fn test_enrich_extraction_prompt_injects_profile() {
let profile = make_profile();
let base = "Extract facts from this conversation.";
let enriched = enrich_extraction_prompt(&profile, base);
assert!(enriched.contains("Maria, senior data engineer at DataCorp, Berlin"));
assert!(enriched.contains("data pipeline optimization"));
assert!(enriched.contains("recommendation engine v2"));
assert!(enriched.contains("Python"));
assert!(enriched.contains("chose Polars over Pandas"));
assert!(enriched.contains("dark mode"));
assert!(enriched.contains("recipe lookups"));
assert!(enriched.contains(base));
}
#[test]
fn test_enrich_extraction_prompt_empty_profile() {
let profile = UserProfile::default();
let base = "Extract facts.";
let enriched = enrich_extraction_prompt(&profile, base);
assert!(enriched.contains("No profile information available"));
assert!(enriched.contains(base));
}
#[test]
fn test_enrich_extraction_prompt_preserves_base() {
let profile = make_profile();
let base = "Return a JSON array of facts with type and importance.";
let enriched = enrich_extraction_prompt(&profile, base);
assert!(enriched.ends_with(base));
}
#[test]
fn test_strip_code_fences_json() {
assert_eq!(strip_code_fences("```json\n{}\n```"), "{}");
}
#[test]
fn test_strip_code_fences_bare() {
assert_eq!(strip_code_fences("```\n[]\n```"), "[]");
}
#[test]
fn test_strip_code_fences_no_fences() {
assert_eq!(strip_code_fences("plain text"), "plain text");
}
#[test]
fn test_truncate_message_short() {
assert_eq!(truncate_message("hello", 10), "hello");
}
#[test]
fn test_truncate_message_exact() {
assert_eq!(truncate_message("hello", 5), "hello");
}
#[test]
fn test_truncate_message_long() {
let result = truncate_message("hello world", 5);
assert_eq!(result, "hello...");
}
#[test]
fn test_truncate_message_trims_whitespace() {
assert_eq!(truncate_message(" hello ", 10), "hello");
}
#[test]
fn test_triage_decision_display() {
assert_eq!(format!("{}", TriageDecision::Extract), "EXTRACT");
assert_eq!(format!("{}", TriageDecision::Skip), "SKIP");
}
#[test]
fn test_triage_decision_serde_roundtrip() {
let decision = ChunkDecision {
chunk_index: 3,
decision: TriageDecision::Extract,
reason: "important project context".to_string(),
};
let json = serde_json::to_string(&decision).unwrap();
assert!(json.contains("\"EXTRACT\""));
let deserialized: ChunkDecision = serde_json::from_str(&json).unwrap();
assert_eq!(deserialized.chunk_index, 3);
assert_eq!(deserialized.decision, TriageDecision::Extract);
}
#[test]
fn test_partial_profile_serde_roundtrip() {
let profile = PartialProfile {
identity: Some("Test User".to_string()),
themes: Some(vec!["testing".to_string()]),
projects: None,
stack: Some(vec!["Rust".to_string()]),
decisions: None,
interests: None,
skip_patterns: None,
};
let json = serde_json::to_string(&profile).unwrap();
let deserialized: PartialProfile = serde_json::from_str(&json).unwrap();
assert_eq!(deserialized.identity, Some("Test User".to_string()));
assert_eq!(deserialized.themes, Some(vec!["testing".to_string()]));
assert!(deserialized.projects.is_none());
}
#[test]
fn test_user_profile_serde_roundtrip() {
let profile = make_profile();
let json = serde_json::to_string(&profile).unwrap();
let deserialized: UserProfile = serde_json::from_str(&json).unwrap();
assert_eq!(deserialized.identity, profile.identity);
assert_eq!(deserialized.themes, profile.themes);
assert_eq!(deserialized.stack, profile.stack);
}
#[test]
fn test_format_profile_for_prompt_full() {
let profile = make_profile();
let text = format_profile_for_prompt(&profile);
assert!(text.contains("Identity: Maria"));
assert!(text.contains("Recurring themes: data pipeline optimization"));
assert!(text.contains("Active projects: recommendation engine v2"));
assert!(text.contains("Technical stack: Python"));
assert!(text.contains("Key decisions:"));
assert!(text.contains(" - chose Polars over Pandas"));
assert!(text.contains("Interests: dark mode"));
assert!(text.contains("Skip patterns (generic Q&A): recipe lookups"));
}
#[test]
fn test_format_profile_for_prompt_empty() {
let profile = UserProfile::default();
let text = format_profile_for_prompt(&profile);
assert_eq!(text, "No profile information available.");
}
#[test]
fn test_full_pipeline_profile_batch_response() {
let llm_output = r#"Based on the conversations, here's the user profile:
```json
{
"identity": "Sarah Chen, ML engineer at TechStartup, San Francisco",
"themes": ["model training", "deployment pipelines", "GPU optimization"],
"projects": ["recommendation-v3", "model-serving-infra"],
"stack": ["Python", "PyTorch", "CUDA", "Docker", "AWS SageMaker"],
"decisions": ["switched from TensorFlow to PyTorch for better debugging", "chose spot instances for training to save 60% cost"],
"interests": ["vim keybindings", "mechanical keyboards"],
"skip_patterns": ["asking about weather", "general Python syntax questions"]
}
```
I hope this profile is helpful!"#;
let profile = parse_profile_batch_response(llm_output);
assert_eq!(
profile.identity,
Some("Sarah Chen, ML engineer at TechStartup, San Francisco".to_string())
);
assert_eq!(profile.themes.as_ref().unwrap().len(), 3);
assert_eq!(profile.projects.as_ref().unwrap().len(), 2);
assert_eq!(profile.stack.as_ref().unwrap().len(), 5);
assert_eq!(profile.decisions.as_ref().unwrap().len(), 2);
}
#[test]
fn test_full_pipeline_triage_response() {
let llm_output = r#"Looking at the user's profile and conversations, here are my classifications:
[
{"index": 0, "decision": "EXTRACT", "reason": "Discusses Kubernetes cluster setup on GCP, directly related to their cloud infrastructure work"},
{"index": 1, "decision": "SKIP", "reason": "Generic recipe lookup with no personal value"},
{"index": 2, "decision": "EXTRACT", "reason": "Contains key decision to switch from pandas to polars with benchmark data and deployment timeline"}
]"#;
let decisions = parse_triage_response(llm_output);
assert_eq!(decisions.len(), 3);
let extract_count = decisions
.iter()
.filter(|d| d.decision == TriageDecision::Extract)
.count();
let skip_count = decisions
.iter()
.filter(|d| d.decision == TriageDecision::Skip)
.count();
assert_eq!(extract_count, 2);
assert_eq!(skip_count, 1);
}
#[test]
fn test_parse_profile_batch_response_empty_string_identity() {
let input = r#"{"identity": "", "themes": ["test"]}"#;
let profile = parse_profile_batch_response(input);
assert!(profile.identity.is_none()); }
#[test]
fn test_chunks_to_summaries_empty_messages() {
let chunks = vec![ConversationChunk {
index: 0,
title: Some("Empty".to_string()),
messages: vec![],
timestamp: None,
}];
let summaries = chunks_to_summaries(&chunks);
assert_eq!(summaries[0].first_message, "");
assert_eq!(summaries[0].last_message, "");
assert_eq!(summaries[0].message_count, 0);
}
}