use std::collections::BTreeSet;
use serde_json::{json, Map, Value};
use super::regex::{named_email_regex, sanitize_entity_name, sanitize_fact_text};
use super::types::{ExtractionAccumulator, MemoryIngestionConfig, ParsedIngestion};
use crate::memory::types::NamespaceDocumentInput;
pub(super) fn extract_people_from_header(
value: &str,
accumulator: &mut ExtractionAccumulator,
) -> Vec<String> {
let mut people = Vec::new();
for captures in named_email_regex().captures_iter(value) {
let name = sanitize_fact_text(
captures
.name("name")
.map(|value| value.as_str())
.unwrap_or(""),
);
if name.is_empty() {
continue;
}
let canonical = sanitize_entity_name(&name);
let _ = accumulator.add_entity(&canonical, "PERSON", 0.95);
accumulator.remember_person_aliases(&canonical);
people.push(canonical);
}
people
}
pub(super) fn detect_primary_subject(text: &str) -> Option<String> {
if text.contains("OpenHuman") {
return Some("OPENHUMAN".to_string());
}
None
}
#[allow(dead_code)]
pub(super) fn enrich_document_metadata(
input: &NamespaceDocumentInput,
parsed: &ParsedIngestion,
config: &MemoryIngestionConfig,
) -> (NamespaceDocumentInput, Vec<String>) {
let mut metadata = match input.metadata.clone() {
Value::Object(map) => map,
_ => Map::new(),
};
for (key, value) in parsed.metadata.as_object().cloned().unwrap_or_default() {
metadata.insert(key, value);
}
metadata.insert(
"ingestion".to_string(),
json!({
"backend": "tinycortex_rust_heuristic",
"model_name": config.model_name,
"extraction_mode": config.extraction_mode.as_str(),
"entity_count": parsed.entities.len(),
"relation_count": parsed.relations.len(),
"preference_count": parsed.preference_count,
"decision_count": parsed.decision_count,
"chunk_count": parsed.chunk_count,
}),
);
if parsed.preference_count > 0 || parsed.decision_count > 0 {
metadata.insert("kind".to_string(), json!("profile"));
}
let mut tags = input.tags.iter().cloned().collect::<BTreeSet<_>>();
tags.extend(parsed.tags.iter().cloned());
let tags = tags.into_iter().collect::<Vec<_>>();
(
NamespaceDocumentInput {
namespace: input.namespace.clone(),
key: input.key.clone(),
title: input.title.clone(),
content: input.content.clone(),
source_type: input.source_type.clone(),
priority: input.priority.clone(),
tags: tags.clone(),
metadata: Value::Object(metadata),
category: input.category.clone(),
session_id: input.session_id.clone(),
document_id: input.document_id.clone(),
taint: input.taint,
},
tags,
)
}