tinycortex 0.1.1

Rust core for the TinyCortex memory system
Documentation
use std::collections::HashMap;

use serde_json::json;

use super::super::alias::{build_alias_map, resolve_alias, reverse_aliases};
use super::super::chunking::{build_units, find_chunk_index, split_sentences};
use super::super::header::{
    detect_primary_subject, enrich_document_metadata, extract_people_from_header,
};
use super::super::types::{
    ExtractionAccumulator, ExtractionMode, MemoryIngestionConfig, ParsedIngestion, RawEntity,
};
use crate::memory::types::{MemoryTaint, NamespaceDocumentInput};

fn sample_input() -> NamespaceDocumentInput {
    NamespaceDocumentInput {
        namespace: "global".into(),
        key: "doc-1".into(),
        title: "OpenHuman roadmap".into(),
        content: "Alice owns roadmap".into(),
        source_type: "manual".into(),
        priority: "normal".into(),
        tags: vec!["existing".into()],
        metadata: json!({"seed": true}),
        category: "core".into(),
        session_id: Some("session-1".into()),
        document_id: Some("doc-id-1".into()),
        taint: MemoryTaint::Internal,
    }
}

#[test]
fn split_sentences_breaks_on_punctuation_and_merges_tiny_fragments() {
    let parts = split_sentences("Hello world. Ok.\nNext line?");
    assert_eq!(parts.len(), 2);
    assert_eq!(parts[0], "Hello world Ok");
    assert_eq!(parts[1], "Next line?");
}

#[test]
fn build_units_respects_extraction_mode() {
    let chunks = vec!["One. Two.".to_string(), "Three".to_string()];
    let sentence_units = build_units(&chunks, ExtractionMode::Sentence);
    let chunk_units = build_units(&chunks, ExtractionMode::Chunk);

    assert_eq!(sentence_units.len(), 2);
    assert_eq!(sentence_units[0].chunk_index, 0);
    assert_eq!(sentence_units[0].text, "One Two");
    assert_eq!(sentence_units[1].chunk_index, 1);
    assert_eq!(sentence_units[1].text, "Three");

    assert_eq!(chunk_units.len(), 2);
    assert_eq!(chunk_units[0].text, "One. Two");
    assert_eq!(chunk_units[1].text, "Three");
}

#[test]
fn find_chunk_index_prefers_hint_then_wraps() {
    let chunks = vec![
        "alpha content".to_string(),
        "beta needle".to_string(),
        "gamma trailing".to_string(),
    ];
    assert_eq!(find_chunk_index(&chunks, "needle", 1), 1);
    assert_eq!(find_chunk_index(&chunks, "alpha", 2), 0);
    assert_eq!(find_chunk_index(&chunks, "missing", 2), 2);
}

#[test]
fn alias_map_builds_reverse_lookup() {
    let mut entities = HashMap::new();
    entities.insert(
        "ALICE".into(),
        RawEntity {
            name: "ALICE".into(),
            entity_type: "PERSON".into(),
            confidence: 0.8,
        },
    );
    entities.insert(
        "ALICE SMITH".into(),
        RawEntity {
            name: "ALICE SMITH".into(),
            entity_type: "PERSON".into(),
            confidence: 0.9,
        },
    );
    let aliases = build_alias_map(&entities);
    assert_eq!(
        aliases.get("ALICE").map(String::as_str),
        Some("ALICE SMITH")
    );
    assert_eq!(resolve_alias("ALICE", &aliases), "ALICE SMITH");

    let reverse = reverse_aliases(&aliases);
    assert_eq!(reverse.get("ALICE SMITH"), Some(&vec!["ALICE".to_string()]));
}

#[test]
fn enrich_document_metadata_merges_tags_and_ingestion_details() {
    let input = sample_input();
    let parsed = ParsedIngestion {
        tags: vec!["decision".into(), "existing".into()],
        metadata: json!({"kind": "profile", "extra": 1}),
        entities: vec![],
        relations: vec![],
        chunk_count: 3,
        preference_count: 1,
        decision_count: 2,
    };
    let config = MemoryIngestionConfig::default();
    let (enriched, tags) = enrich_document_metadata(&input, &parsed, &config);

    assert_eq!(tags, vec!["decision".to_string(), "existing".to_string()]);
    assert_eq!(enriched.tags, tags);
    assert_eq!(enriched.metadata["seed"], json!(true));
    assert_eq!(enriched.metadata["extra"], json!(1));
    assert_eq!(
        enriched.metadata["ingestion"]["model_name"],
        config.model_name
    );
    assert_eq!(enriched.metadata["ingestion"]["chunk_count"], json!(3));
}

#[test]
fn extract_people_from_header_collects_named_people() {
    let mut acc = ExtractionAccumulator::default();
    let people = extract_people_from_header(
        "Alice Smith <alice@example.com>, Bob Jones <bob@example.com>",
        &mut acc,
    );
    assert_eq!(
        people,
        vec!["ALICE SMITH".to_string(), "BOB JONES".to_string()]
    );
    assert!(acc.entities.contains_key("ALICE SMITH"));
    assert!(acc.entities.contains_key("BOB JONES"));
}

#[test]
fn detect_primary_subject_only_matches_openhuman() {
    assert_eq!(
        detect_primary_subject("OpenHuman desktop roadmap"),
        Some("OPENHUMAN".to_string())
    );
    assert_eq!(detect_primary_subject("General roadmap"), None);
}