normcore 0.1.1

Rust implementation baseline for NormCore normative admissibility evaluator
Documentation
use crate::json::JsonValue;
use crate::json::parse_json;
use crate::models::Ground;
use crate::models::ToolResultSpeechAct;
use crate::normative::models::KnowledgeNode;
use crate::normative::models::Scope;
use crate::normative::models::Source;
use crate::normative::models::Status;
use std::collections::BTreeMap;
use std::collections::HashSet;

pub struct KnowledgeStateBuilder;

impl KnowledgeStateBuilder {
    pub fn build(&self, tool_results: &[ToolResultSpeechAct]) -> Vec<KnowledgeNode> {
        let (nodes, _) = self.build_with_references(tool_results);
        nodes
    }

    pub fn build_with_references(
        &self,
        tool_results: &[ToolResultSpeechAct],
    ) -> (Vec<KnowledgeNode>, BTreeMap<String, Vec<String>>) {
        let mut nodes = Vec::new();
        let mut refs: BTreeMap<String, Vec<String>> = BTreeMap::new();
        for result in tool_results {
            let maybe = self.tool_result_to_knowledge(result);
            let produced = match maybe {
                None => continue,
                Some(v) => v,
            };
            let ids: Vec<String> = produced
                .iter()
                .map(|n| n.semantic_id.clone().unwrap_or_else(|| n.id.clone()))
                .collect();
            if let Some(call_id) = &result.tool_call_id
                && !ids.is_empty()
            {
                refs.insert(call_id.clone(), ids);
            }
            nodes.extend(produced);
        }
        (nodes, refs)
    }

    pub fn materialize_external_grounds(
        &self,
        knowledge_nodes: &[KnowledgeNode],
        grounds: &[Ground],
    ) -> Vec<KnowledgeNode> {
        if grounds.is_empty() {
            return knowledge_nodes.to_vec();
        }
        let existing_ids: HashSet<String> = knowledge_nodes.iter().map(|n| n.id.clone()).collect();
        let existing_semantic_ids: HashSet<String> = knowledge_nodes
            .iter()
            .filter_map(|n| n.semantic_id.clone())
            .collect();

        let mut expanded = knowledge_nodes.to_vec();
        for ground in grounds {
            if existing_ids.contains(&ground.ground_id)
                || existing_semantic_ids.contains(&ground.ground_id)
            {
                continue;
            }
            let node = KnowledgeNode::new(
                ground.ground_id.clone(),
                Source::Observed,
                Status::Confirmed,
                1.0,
                Scope::Factual,
                "strong".to_string(),
                Some(ground.ground_id.clone()),
            )
            .expect("known-valid node");
            expanded.push(node);
        }
        expanded
    }

    pub fn tool_result_to_knowledge(
        &self,
        tool_result: &ToolResultSpeechAct,
    ) -> Option<Vec<KnowledgeNode>> {
        let tool_name = if tool_result.tool_name.is_empty() {
            "unknown"
        } else {
            &tool_result.tool_name
        };
        if self.is_non_epistemic_tool(tool_name) {
            return None;
        }

        let extracted = self.extract_semantic_id(tool_result);
        if let Some(SemanticExtract::Many(ids)) = extracted.clone() {
            let mut out = Vec::new();
            for (idx, sid) in ids.into_iter().enumerate() {
                let stable = stable_id_fragment(&format!("{tool_name}:{sid}"));
                out.push(
                    KnowledgeNode::new(
                        format!("tool_{tool_name}_item{idx}_{stable}"),
                        Source::Observed,
                        Status::Confirmed,
                        1.0,
                        Scope::Factual,
                        "strong".to_string(),
                        Some(sid),
                    )
                    .expect("known-valid node"),
                );
            }
            return Some(out);
        }

        let semantic_id = match extracted {
            Some(SemanticExtract::One(v)) => Some(v),
            _ => None,
        };
        let stable = stable_id_fragment(&format!(
            "{}:{}:{}",
            tool_name,
            tool_result.result_text,
            tool_result.tool_call_id.clone().unwrap_or_default()
        ));
        Some(vec![
            KnowledgeNode::new(
                format!("tool_{tool_name}_{stable}"),
                Source::Observed,
                Status::Confirmed,
                1.0,
                Scope::Factual,
                "strong".to_string(),
                semantic_id,
            )
            .expect("known-valid node"),
        ])
    }

    fn is_non_epistemic_tool(&self, tool_name: &str) -> bool {
        let name = tool_name.to_lowercase();
        if name == "get_user_cognitive_context" {
            return true;
        }
        if name.contains("personalization") || name.contains("personal_context") {
            return true;
        }
        if name.contains("memory")
            && [
                "save",
                "note",
                "notes",
                "load",
                "consolidat",
                "distill",
                "state",
            ]
            .iter()
            .any(|k| name.contains(k))
        {
            return true;
        }
        if name.contains("profile")
            && ["save", "set", "update", "load", "consolidat"]
                .iter()
                .any(|k| name.contains(k))
        {
            return true;
        }
        [
            "remember",
            "preference",
            "preferences",
            "setting",
            "settings",
        ]
        .iter()
        .any(|k| name.contains(k))
    }

    fn extract_semantic_id(&self, tool_result: &ToolResultSpeechAct) -> Option<SemanticExtract> {
        if tool_result.result_text.trim().is_empty() {
            return None;
        }
        let Ok(data) = parse_json(&tool_result.result_text) else {
            return None;
        };

        match data {
            JsonValue::Array(items) => {
                let mut ids = Vec::new();
                for item in items {
                    if let JsonValue::Object(map) = item
                        && let Some(id) = extract_entity_id(&map)
                    {
                        ids.push(id);
                    }
                }
                if ids.is_empty() {
                    None
                } else {
                    Some(SemanticExtract::Many(ids))
                }
            }
            JsonValue::Object(map) => extract_entity_id(&map).map(SemanticExtract::One),
            _ => None,
        }
    }
}

#[derive(Clone)]
enum SemanticExtract {
    One(String),
    Many(Vec<String>),
}

fn extract_entity_id(map: &BTreeMap<String, JsonValue>) -> Option<String> {
    for (field, value) in map {
        if let Some(prefix) = field.strip_suffix("_key")
            && let Some(v) = value.as_str()
        {
            return Some(format!("{prefix}_{v}"));
        }
    }
    for (field, value) in map {
        if let Some(prefix) = field.strip_suffix("_id")
            && let Some(v) = value.as_str()
        {
            return Some(format!("{prefix}_{v}"));
        }
    }
    None
}

fn stable_id_fragment(value: &str) -> String {
    let mut hash: u64 = 1469598103934665603;
    for b in value.as_bytes() {
        hash ^= *b as u64;
        hash = hash.wrapping_mul(1099511628211);
    }
    let hex = format!("{hash:016x}");
    hex[..10].to_string()
}

#[cfg(test)]
mod tests {
    use super::KnowledgeStateBuilder;
    use super::stable_id_fragment;
    use crate::json::JsonValue;
    use crate::models::CreatorType;
    use crate::models::EvidenceType;
    use crate::models::Ground;
    use crate::models::LinkRole;
    use crate::models::ToolResultSpeechAct;
    use crate::normative::models::KnowledgeNode;
    use crate::normative::models::Scope;
    use crate::normative::models::Source;
    use crate::normative::models::Status;
    use std::collections::BTreeMap;

    fn node(id: &str, scope: Scope, strength: &str) -> KnowledgeNode {
        KnowledgeNode::new(
            id.to_string(),
            Source::Observed,
            Status::Confirmed,
            1.0,
            scope,
            strength.to_string(),
            Some(format!("sem_{id}")),
        )
        .expect("must create node")
    }

    #[test]
    fn knowledge_builder_extracts_semantic_id() {
        let builder = KnowledgeStateBuilder;
        let result = ToolResultSpeechAct {
            tool_name: "get_issue".to_string(),
            tool_call_id: None,
            arguments: BTreeMap::new(),
            result_text: "{\"issue_id\":\"123\"}".to_string(),
        };
        let node = builder
            .tool_result_to_knowledge(&result)
            .expect("must produce node");
        assert_eq!(node[0].semantic_id, Some("issue_123".to_string()));
    }

    #[test]
    fn materialize_external_grounds_injects_missing() {
        let builder = KnowledgeStateBuilder;
        let initial = vec![node("tool_weather", Scope::Factual, "strong")];
        let grounds = vec![Ground {
            citation_key: "file_hist".to_string(),
            ground_id: "archive_nyc_weather_2025-02-07".to_string(),
            role: LinkRole::Supports,
            creator: CreatorType::UpstreamPipeline,
            evidence_type: EvidenceType::Observation,
            evidence_content: None,
            signature: None,
        }];

        let out = builder.materialize_external_grounds(&initial, &grounds);
        assert!(
            out.iter()
                .any(|node| node.id == "archive_nyc_weather_2025-02-07")
        );
    }

    #[test]
    fn non_epistemic_tool_is_ignored() {
        let builder = KnowledgeStateBuilder;
        let result = ToolResultSpeechAct {
            tool_name: "save_memory".to_string(),
            tool_call_id: Some("call_1".to_string()),
            arguments: BTreeMap::from([("k".to_string(), JsonValue::String("v".to_string()))]),
            result_text: "{\"ok\":true}".to_string(),
        };
        assert_eq!(builder.tool_result_to_knowledge(&result), None);
    }

    #[test]
    fn build_returns_nodes_for_epistemic_tools() {
        let builder = KnowledgeStateBuilder;
        let tool_results = vec![ToolResultSpeechAct {
            tool_name: "get_issue".to_string(),
            tool_call_id: Some("call_1".to_string()),
            arguments: BTreeMap::new(),
            result_text: "{\"issue_id\":\"123\"}".to_string(),
        }];
        let nodes = builder.build(&tool_results);
        assert_eq!(nodes.len(), 1);
        assert_eq!(nodes[0].semantic_id.as_deref(), Some("issue_123"));
    }

    #[test]
    fn materialize_external_grounds_deduplicates_by_semantic_id() {
        let builder = KnowledgeStateBuilder;
        let initial = vec![node("tool_weather", Scope::Factual, "strong")];
        let grounds = vec![Ground {
            citation_key: "sem_tool_weather".to_string(),
            ground_id: "sem_tool_weather".to_string(),
            role: LinkRole::Supports,
            creator: CreatorType::UpstreamPipeline,
            evidence_type: EvidenceType::Observation,
            evidence_content: None,
            signature: None,
        }];
        let out = builder.materialize_external_grounds(&initial, &grounds);
        assert_eq!(out.len(), 1);
    }

    #[test]
    fn extract_semantic_id_from_array_emits_many_nodes() {
        let builder = KnowledgeStateBuilder;
        let result = ToolResultSpeechAct {
            tool_name: "list_issues".to_string(),
            tool_call_id: Some("call_list".to_string()),
            arguments: BTreeMap::new(),
            result_text: "[{\"issue_id\":\"1\"},{\"issue_id\":\"2\"}]".to_string(),
        };
        let nodes = builder
            .tool_result_to_knowledge(&result)
            .expect("must produce nodes");
        assert_eq!(nodes.len(), 2);
        assert_eq!(nodes[0].semantic_id.as_deref(), Some("issue_1"));
        assert_eq!(nodes[1].semantic_id.as_deref(), Some("issue_2"));
    }

    #[test]
    fn stable_id_fragment_is_deterministic_and_non_empty() {
        let a = stable_id_fragment("hello");
        let b = stable_id_fragment("hello");
        let c = stable_id_fragment("world");
        assert_eq!(a, b);
        assert_ne!(a, c);
        assert_eq!(a.len(), 10);
        assert!(!a.is_empty());
    }
}