mig-assembly 0.1.60

MIG-guided EDIFACT tree assembly — parse RawSegments into typed MIG trees
Documentation
//! PID detection from EDIFACT segments.
//!
//! Determines the Pruefidentifikator (PID) from a list of parsed EDIFACT
//! segments. Most BDEW message types embed the PID directly in RFF+Z13;
//! APERAK/CONTRL-class messages don't and need message-type-specific
//! BGM-code lookup tables instead.

use crate::tokenize::OwnedSegment;
use crate::AssemblyError;

/// Detect the PID (Pruefidentifikator) from a list of parsed EDIFACT segments.
///
/// Resolution order:
///   1. RFF+Z13 at message level (UTILMD, MSCONS, ORDERS, INVOIC, …).
///   2. Message-type-specific heuristics based on UNH's message-type field:
///      - APERAK: BGM document-code (313 → 92001, 312 → 92002).
///      - UTILMD: BGM+STS combination, else BGM alone.
///
/// Returns the PID as a string (e.g., "55001").
pub fn detect_pid(segments: &[OwnedSegment]) -> Result<String, AssemblyError> {
    // Strategy 1: Look for RFF+Z13 which directly contains the PID number.
    // In UTILMD/MSCONS/ORDERS/… the PID reference lives in SG1/SG6 as
    // RFF+Z13:<pid_number>.
    for seg in segments {
        if seg.is("RFF") {
            let qualifier = seg.get_component(0, 0);
            let reference = seg.get_component(0, 1);
            if qualifier == "Z13" && !reference.is_empty() {
                return Ok(reference.to_string());
            }
        }
    }

    // Strategy 2: Dispatch on UNH message type — each class of message
    // encodes its PID differently, and applying UTILMD's BGM table to
    // an APERAK would silently produce the wrong PID.
    let msg_type = segments
        .iter()
        .find(|s| s.is("UNH"))
        .map(|s| s.get_component(1, 0).to_string())
        .unwrap_or_default();

    match msg_type.as_str() {
        "APERAK" => resolve_aperak_pid(segments),
        "UTILMD" => resolve_utilmd_pid_fallback(segments),
        // For any other message class without RFF+Z13, we have no defined
        // resolution yet — fail explicitly rather than guess.
        _ => Err(AssemblyError::PidDetectionFailed),
    }
}

/// UTILMD fallback: BGM+STS combination, else BGM alone. Kept as a
/// separate helper now that the entry point dispatches by message type.
fn resolve_utilmd_pid_fallback(segments: &[OwnedSegment]) -> Result<String, AssemblyError> {
    let bgm = segments.iter().find(|s| s.is("BGM"));
    let sts = segments.iter().find(|s| s.is("STS"));

    match (bgm, sts) {
        (Some(bgm_seg), Some(sts_seg)) => {
            let doc_code = bgm_seg.get_element(0);
            let reason = sts_seg.get_component(1, 0);
            resolve_utilmd_pid(doc_code, reason)
        }
        (Some(bgm_seg), None) => {
            let doc_code = bgm_seg.get_element(0);
            resolve_utilmd_pid_from_bgm(doc_code)
        }
        _ => Err(AssemblyError::PidDetectionFailed),
    }
}

/// Resolve APERAK PID from the BGM document-code.
///
/// APERAK AHBs define exactly two PIDs, distinguished by BGM+C002/1001:
///   - `313` (Anwendungssystemfehlermeldung) → 92001 "Fehlermeldung"
///   - `312` (Anerkennungsmeldung)           → 92002 "Anerkennungsmeldung"
///
/// Fixture filenames in the public corpus sometimes carry one PID in the
/// name but encode the other in BGM — the BGM value is authoritative.
fn resolve_aperak_pid(segments: &[OwnedSegment]) -> Result<String, AssemblyError> {
    let bgm = segments
        .iter()
        .find(|s| s.is("BGM"))
        .ok_or(AssemblyError::PidDetectionFailed)?;
    match bgm.get_element(0) {
        "313" => Ok("92001".to_string()),
        "312" => Ok("92002".to_string()),
        _ => Err(AssemblyError::PidDetectionFailed),
    }
}

/// Resolve PID from BGM document code + STS transaction reason.
///
/// The mapping table is derived from the AHB. Common combinations:
/// - E01 (Anmeldung) + various STS reasons -> 55001-55009
/// - E02 (Abmeldung) + various STS reasons -> 55101-55109
/// - E03 (Bestellung) -> 55201-55209
fn resolve_utilmd_pid(doc_code: &str, reason: &str) -> Result<String, AssemblyError> {
    // For UTILMD, common PID mappings based on BGM doc code + STS reason
    // These are the most common ones; this can be extended as needed
    match (doc_code, reason) {
        ("E01", "Z33") => Ok("55001".to_string()),
        ("E01", "Z34") => Ok("55002".to_string()),
        ("E01", "Z35") => Ok("55003".to_string()),
        ("E02", "Z33") => Ok("55101".to_string()),
        ("E02", "Z34") => Ok("55102".to_string()),
        ("E03", "Z33") => Ok("55201".to_string()),
        _ => Err(AssemblyError::PidDetectionFailed),
    }
}

/// Resolve PID from BGM document code alone (fallback).
fn resolve_utilmd_pid_from_bgm(doc_code: &str) -> Result<String, AssemblyError> {
    match doc_code {
        "E01" => Ok("55001".to_string()),
        "E02" => Ok("55101".to_string()),
        "E03" => Ok("55201".to_string()),
        _ => Err(AssemblyError::PidDetectionFailed),
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn make_segment(id: &str, elements: Vec<Vec<&str>>) -> OwnedSegment {
        OwnedSegment {
            id: id.to_string(),
            elements: elements
                .into_iter()
                .map(|e| e.into_iter().map(|c| c.to_string()).collect())
                .collect(),
            segment_number: 0,
        }
    }

    #[test]
    fn test_detect_pid_from_rff_z13() {
        let segments = vec![
            make_segment("UNH", vec![vec!["001"]]),
            make_segment("BGM", vec![vec!["E01"]]),
            make_segment("RFF", vec![vec!["Z13", "55001"]]),
            make_segment("UNT", vec![vec!["3", "001"]]),
        ];
        let pid = detect_pid(&segments).unwrap();
        assert_eq!(pid, "55001");
    }

    #[test]
    fn test_detect_pid_from_bgm_and_sts() {
        let segments = vec![
            make_segment("UNH", vec![vec!["001"], vec!["UTILMD", "D", "11A", "UN", "S2.1"]]),
            make_segment("BGM", vec![vec!["E01"]]),
            make_segment("STS", vec![vec![""], vec!["Z33"]]),
            make_segment("UNT", vec![vec!["3", "001"]]),
        ];
        let pid = detect_pid(&segments).unwrap();
        assert_eq!(pid, "55001");
    }

    #[test]
    fn test_detect_pid_from_bgm_only() {
        let segments = vec![
            make_segment("UNH", vec![vec!["001"], vec!["UTILMD", "D", "11A", "UN", "S2.1"]]),
            make_segment("BGM", vec![vec!["E03"]]),
            make_segment("UNT", vec![vec!["2", "001"]]),
        ];
        let pid = detect_pid(&segments).unwrap();
        assert_eq!(pid, "55201");
    }

    #[test]
    fn test_detect_pid_fails_no_bgm() {
        let segments = vec![
            make_segment("UNH", vec![vec!["001"]]),
            make_segment("UNT", vec![vec!["1", "001"]]),
        ];
        let result = detect_pid(&segments);
        assert!(result.is_err());
    }

    #[test]
    fn test_detect_pid_prefers_rff_z13_over_bgm() {
        // If both RFF+Z13 and BGM are present, RFF+Z13 wins
        let segments = vec![
            make_segment("UNH", vec![vec!["001"]]),
            make_segment("BGM", vec![vec!["E01"]]),
            make_segment("STS", vec![vec![""], vec!["Z33"]]),
            make_segment("RFF", vec![vec!["Z13", "99999"]]),
            make_segment("UNT", vec![vec!["4", "001"]]),
        ];
        let pid = detect_pid(&segments).unwrap();
        assert_eq!(pid, "99999"); // RFF+Z13 takes priority
    }

    #[test]
    fn test_detect_pid_from_parsed_edifact() {
        // Test with actual parsed EDIFACT input
        let input = b"UNA:+.? 'UNB+UNOC:3+SENDER+RECEIVER+210101:1200+REF001'UNH+MSG001+UTILMD:D:11A:UN:S2.1'BGM+E01+DOC001'RFF+Z13:55001'UNT+3+MSG001'UNZ+1+REF001'";
        let segments = crate::tokenize::parse_to_segments(input).unwrap();
        let pid = detect_pid(&segments).unwrap();
        assert_eq!(pid, "55001");
    }

    #[test]
    fn test_detect_aperak_fehlermeldung_from_bgm_313() {
        // APERAK has no RFF+Z13 — must be resolved via UNH message type
        // and BGM document code. BGM+313 = Anwendungssystemfehlermeldung → 92001.
        let segments = vec![
            make_segment("UNH", vec![vec!["MSG"], vec!["APERAK", "D", "07B", "UN", "2.1i"]]),
            make_segment("BGM", vec![vec!["313"], vec!["MSG-BGM"]]),
            make_segment("UNT", vec![vec!["3"], vec!["MSG"]]),
        ];
        let pid = detect_pid(&segments).unwrap();
        assert_eq!(pid, "92001");
    }

    #[test]
    fn test_detect_aperak_anerkennungsmeldung_from_bgm_312() {
        // BGM+312 = Anerkennungsmeldung → 92002
        let segments = vec![
            make_segment("UNH", vec![vec!["MSG"], vec!["APERAK", "D", "07B", "UN", "2.1i"]]),
            make_segment("BGM", vec![vec!["312"], vec!["MSG-BGM"]]),
            make_segment("UNT", vec![vec!["3"], vec!["MSG"]]),
        ];
        let pid = detect_pid(&segments).unwrap();
        assert_eq!(pid, "92002");
    }

    #[test]
    fn test_detect_aperak_unknown_bgm_code_fails() {
        let segments = vec![
            make_segment("UNH", vec![vec!["MSG"], vec!["APERAK", "D", "07B", "UN", "2.1i"]]),
            make_segment("BGM", vec![vec!["999"], vec!["MSG-BGM"]]),
            make_segment("UNT", vec![vec!["3"], vec!["MSG"]]),
        ];
        assert!(detect_pid(&segments).is_err());
    }

    #[test]
    fn test_aperak_detector_not_applied_to_non_aperak() {
        // BGM+312 on a UTILMD message must NOT be resolved to APERAK 92002 —
        // the dispatch happens AFTER message-type detection from UNH.
        let segments = vec![
            make_segment("UNH", vec![vec!["MSG"], vec!["UTILMD", "D", "11A", "UN", "S2.1"]]),
            make_segment("BGM", vec![vec!["312"], vec!["MSG-BGM"]]),
            make_segment("UNT", vec![vec!["3"], vec!["MSG"]]),
        ];
        // Should NOT match APERAK table; UTILMD fallback has no entry for "312"
        let result = detect_pid(&segments);
        assert!(result.is_err() || result.as_ref().map(|s| !s.starts_with("92")).unwrap_or(true),
                "UTILMD BGM+312 must not resolve to APERAK PID, got {:?}", result);
    }
}