normcore 0.1.1

Rust implementation baseline for NormCore normative admissibility evaluator
Documentation
use crate::json::JsonValue;
use crate::models::CreatorType;
use crate::models::EvidenceType;
use crate::models::Ground;
use crate::models::LinkRole;
use crate::models::LinkSet;
use crate::models::Provenance;
use crate::models::StatementGroundLink;
use std::collections::BTreeMap;
use std::collections::HashSet;

pub fn parse_grounds(payload: &[JsonValue]) -> Vec<Ground> {
    let mut grounds = Vec::new();
    for item in payload {
        let Some(obj) = item.as_object() else {
            continue;
        };
        let Some(citation_key) = obj.get("citation_key").and_then(JsonValue::as_str) else {
            continue;
        };
        let Some(ground_id) = obj.get("ground_id").and_then(JsonValue::as_str) else {
            continue;
        };
        grounds.push(Ground {
            citation_key: citation_key.to_string(),
            ground_id: ground_id.to_string(),
            role: LinkRole::Supports,
            creator: CreatorType::UpstreamPipeline,
            evidence_type: EvidenceType::Observation,
            evidence_content: obj
                .get("evidence_content")
                .and_then(JsonValue::as_str)
                .map(ToString::to_string),
            signature: obj
                .get("signature")
                .and_then(JsonValue::as_str)
                .map(ToString::to_string),
        });
    }
    grounds
}

pub fn extract_citation_keys(text: &str) -> Vec<String> {
    if text.is_empty() {
        return Vec::new();
    }
    let bytes = text.as_bytes();
    let mut i = 0;
    let mut keys = Vec::new();
    let mut seen = HashSet::new();

    while i + 2 < bytes.len() {
        if bytes[i] == b'[' && bytes[i + 1] == b'@' {
            let start = i + 2;
            let mut end = start;
            while end < bytes.len() && bytes[end] != b']' {
                end += 1;
            }
            if end < bytes.len() && end > start {
                let candidate = &text[start..end];
                if is_valid_citation_key(candidate) && !seen.contains(candidate) {
                    seen.insert(candidate.to_string());
                    keys.push(candidate.to_string());
                }
            }
            i = end;
        }
        i += 1;
    }

    keys
}

fn is_valid_citation_key(key: &str) -> bool {
    let mut chars = key.chars();
    let Some(first) = chars.next() else {
        return false;
    };
    if !first.is_ascii_alphabetic() {
        return false;
    }
    chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
}

pub fn build_links_from_grounds(text: &str, grounds: &[Ground], statement_id: &str) -> LinkSet {
    let mut by_key: BTreeMap<String, Vec<&Ground>> = BTreeMap::new();
    for ground in grounds {
        by_key
            .entry(ground.citation_key.clone())
            .or_default()
            .push(ground);
    }

    let mut links = Vec::new();
    for key in extract_citation_keys(text) {
        if let Some(list) = by_key.get(&key) {
            for ground in list {
                links.push(StatementGroundLink {
                    statement_id: statement_id.to_string(),
                    ground_id: ground.ground_id.clone(),
                    role: ground.role.clone(),
                    provenance: Provenance {
                        creator: ground.creator.clone(),
                        evidence_type: ground.evidence_type.clone(),
                        evidence_content: Some(
                            ground
                                .evidence_content
                                .clone()
                                .unwrap_or_else(|| format!("citation_key={key}")),
                        ),
                        signature: ground.signature.clone(),
                    },
                });
            }
        }
    }

    LinkSet { links }
}

pub fn grounds_from_tool_call_refs(
    tool_call_refs: &std::collections::BTreeMap<String, Vec<String>>,
) -> Vec<Ground> {
    let mut out = Vec::new();
    for (citation_key, ground_ids) in tool_call_refs {
        for ground_id in ground_ids {
            out.push(Ground {
                citation_key: citation_key.clone(),
                ground_id: ground_id.clone(),
                role: LinkRole::Supports,
                creator: CreatorType::ToolObserver,
                evidence_type: EvidenceType::Observation,
                evidence_content: Some(format!("tool_call_id={citation_key}")),
                signature: None,
            });
        }
    }
    out
}

pub fn parse_openai_citations(citations: &[JsonValue]) -> Vec<JsonValue> {
    let mut out = Vec::new();
    for item in citations {
        let Some(obj) = item.as_object() else {
            continue;
        };
        let Some(kind) = obj.get("type").and_then(JsonValue::as_str) else {
            continue;
        };
        match kind {
            "file_citation" | "container_file_citation" | "file_path" => {
                if obj.get("file_id").and_then(JsonValue::as_str).is_some() {
                    out.push(item.clone());
                }
            }
            "url_citation" => {
                if obj.get("url").and_then(JsonValue::as_str).is_some() {
                    out.push(item.clone());
                }
            }
            _ => {}
        }
    }
    out
}

pub fn grounds_from_openai_citations(citations: &[JsonValue]) -> Vec<Ground> {
    let mut grounds = Vec::new();
    for citation in citations {
        let Some(ground_id) = extract_ground_id(citation) else {
            continue;
        };
        grounds.push(Ground {
            citation_key: ground_id.clone(),
            ground_id,
            role: LinkRole::Supports,
            creator: CreatorType::UpstreamPipeline,
            evidence_type: EvidenceType::Observation,
            evidence_content: Some("openai_citation".to_string()),
            signature: None,
        });
    }
    grounds
}

pub fn link_set_from_openai_citations(citations: &[JsonValue], statement_id: &str) -> LinkSet {
    let mut links = Vec::new();
    for (idx, citation) in citations.iter().enumerate() {
        let Some(ground_id) = extract_ground_id(citation) else {
            continue;
        };
        links.push(StatementGroundLink {
            statement_id: statement_id.to_string(),
            ground_id,
            role: LinkRole::Supports,
            provenance: Provenance {
                creator: CreatorType::UpstreamPipeline,
                evidence_type: EvidenceType::Observation,
                evidence_content: Some(format!("openai_citation[{idx}]")),
                signature: None,
            },
        });
    }
    LinkSet { links }
}

fn extract_ground_id(citation: &JsonValue) -> Option<String> {
    let obj = citation.as_object()?;
    let kind = obj.get("type")?.as_str()?;
    match kind {
        "file_citation" | "container_file_citation" | "file_path" => obj
            .get("file_id")
            .and_then(JsonValue::as_str)
            .map(ToString::to_string),
        "url_citation" => obj
            .get("url")
            .and_then(JsonValue::as_str)
            .map(ToString::to_string),
        _ => None,
    }
}

pub fn coerce_grounds_input(
    grounds_payload: Option<&[JsonValue]>,
    legacy_openai_citations: Option<&[JsonValue]>,
    _legacy_links: Option<&JsonValue>,
) -> Vec<Ground> {
    let mut normalized = Vec::new();

    if let Some(payload) = grounds_payload {
        let explicit = parse_grounds(payload);
        if !explicit.is_empty() {
            normalized.extend(explicit);
        } else {
            let typed = parse_openai_citations(payload);
            normalized.extend(grounds_from_openai_citations(&typed));
        }
    }

    if let Some(citations) = legacy_openai_citations {
        let typed = parse_openai_citations(citations);
        normalized.extend(grounds_from_openai_citations(&typed));
    }

    normalized
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::json::parse_json;

    #[test]
    fn extract_citation_keys_preserves_order() {
        let text = "First [@toolCall1], again [@toolCall1], then [@DocX].";
        assert_eq!(extract_citation_keys(text), vec!["toolCall1", "DocX"]);
    }

    #[test]
    fn build_links_only_for_cited_keys() {
        let grounds = vec![
            Ground {
                citation_key: "toolCall1".to_string(),
                ground_id: "issue_AGENT-8".to_string(),
                role: LinkRole::Supports,
                creator: CreatorType::UpstreamPipeline,
                evidence_type: EvidenceType::Observation,
                evidence_content: None,
                signature: None,
            },
            Ground {
                citation_key: "DocX".to_string(),
                ground_id: "file_123".to_string(),
                role: LinkRole::Supports,
                creator: CreatorType::UpstreamPipeline,
                evidence_type: EvidenceType::Observation,
                evidence_content: None,
                signature: None,
            },
        ];

        let links = build_links_from_grounds(
            "Need action [@toolCall1], nothing else.",
            &grounds,
            "final_response",
        );
        assert_eq!(links.links.len(), 1);
        assert_eq!(links.links[0].ground_id, "issue_AGENT-8");
    }

    #[test]
    fn parse_openai_citations_validates_payload() {
        let value =
            parse_json(r#"[{"type":"file_citation","file_id":"file_1","filename":"a","index":0}]"#)
                .expect("json parses");
        let JsonValue::Array(arr) = value else {
            panic!("array expected")
        };
        let out = parse_openai_citations(&arr);
        assert_eq!(out.len(), 1);
    }

    #[test]
    fn parse_grounds_reads_explicit_payload() {
        let value = parse_json(
            r#"[{"citation_key":"docA","ground_id":"file_1","evidence_content":"ctx"}]"#,
        )
        .expect("json parses");
        let JsonValue::Array(arr) = value else {
            panic!("array expected")
        };
        let grounds = parse_grounds(&arr);
        assert_eq!(grounds.len(), 1);
        assert_eq!(grounds[0].citation_key, "docA");
        assert_eq!(grounds[0].ground_id, "file_1");
    }

    #[test]
    fn extract_citation_keys_ignores_invalid_or_unclosed_patterns() {
        let text = "bad [@1bad] good [@Doc_1] unclosed [@NoEnd and tail [@Also-Good].";
        assert_eq!(extract_citation_keys(text), vec!["Doc_1"]);
    }

    #[test]
    fn parse_openai_citations_accepts_url_citation() {
        let value =
            parse_json(r#"[{"type":"url_citation","url":"https://example.com","index":0}]"#)
                .expect("json parses");
        let JsonValue::Array(arr) = value else {
            panic!("array expected")
        };
        let out = parse_openai_citations(&arr);
        assert_eq!(out.len(), 1);
        let grounds = grounds_from_openai_citations(&out);
        assert_eq!(grounds.len(), 1);
        assert_eq!(grounds[0].ground_id, "https://example.com");
    }
}