Skip to main content

redispatch_xml/
parse.rs

1//! Two-phase XML parsing pipeline for Redispatch 2.0 documents.
2//!
3//! ## Pipeline
4//!
5//! 1. **Detect** — scan the opening bytes of the input to identify the root
6//!    element name and, where present, the `xmlns` namespace.
7//! 2. **Deserialize** — pass the full input to [`quick_xml::de::from_str`].
8//! 3. **Validate namespace** — for document types that carry a `targetNamespace`,
9//!    confirm the detected namespace matches the expected value.
10//!
11//! No libxml2 / XSD validation is performed at parse time. Use the
12//! [`crate::validation`] module for post-parse semantic/structural checks.
13
14use crate::documents::{
15    self, AcknowledgementDocument, ActivationDocument, DocumentType, Kaskade, Kostenblatt,
16    NetworkConstraintDocument, PlannedResourceScheduleDocument, Stammdaten,
17    StatusRequestMarketDocument, UnavailabilityMarketDocument,
18};
19use crate::error::RedispatchXmlError;
20
21// ── Document sum type ─────────────────────────────────────────────────────────
22
23/// A parsed Redispatch 2.0 document (any of the nine supported types).
24#[derive(Debug, Clone, PartialEq)]
25pub enum Document {
26    Activation(Box<ActivationDocument>),
27    PlannedResourceSchedule(Box<PlannedResourceScheduleDocument>),
28    Acknowledgement(Box<AcknowledgementDocument>),
29    Stammdaten(Box<Stammdaten>),
30    StatusRequest(Box<StatusRequestMarketDocument>),
31    Unavailability(Box<UnavailabilityMarketDocument>),
32    Kaskade(Box<Kaskade>),
33    NetworkConstraint(Box<NetworkConstraintDocument>),
34    Kostenblatt(Box<Kostenblatt>),
35}
36
37impl Document {
38    /// Return the [`DocumentType`] variant for this document.
39    pub fn document_type(&self) -> DocumentType {
40        match self {
41            Self::Activation(_) => DocumentType::Activation,
42            Self::PlannedResourceSchedule(_) => DocumentType::PlannedResourceSchedule,
43            Self::Acknowledgement(_) => DocumentType::Acknowledgement,
44            Self::Stammdaten(_) => DocumentType::Stammdaten,
45            Self::StatusRequest(_) => DocumentType::StatusRequest,
46            Self::Unavailability(_) => DocumentType::Unavailability,
47            Self::Kaskade(_) => DocumentType::Kaskade,
48            Self::NetworkConstraint(_) => DocumentType::NetworkConstraint,
49            Self::Kostenblatt(_) => DocumentType::Kostenblatt,
50        }
51    }
52
53    /// Return the document's primary identifier (mRID or `DocumentIdentification`).
54    ///
55    /// This is the correlation key used by the process engine to route inbound
56    /// documents to the correct workflow instance.
57    pub fn mrid(&self) -> &str {
58        match self {
59            Self::Activation(d) => d.document_identification.v.as_str(),
60            Self::PlannedResourceSchedule(d) => d.document_identification.v.as_str(),
61            Self::Acknowledgement(d) => d.document_identification.v.as_str(),
62            Self::Stammdaten(d) => d.document_identification.as_str(),
63            Self::StatusRequest(d) => d.m_rid.as_str(),
64            Self::Unavailability(d) => d.m_rid.as_str(),
65            Self::Kaskade(d) => d.m_rid.as_str(),
66            Self::NetworkConstraint(d) => d.document_identification.v.as_str(),
67            Self::Kostenblatt(d) => d.document_identification.v.as_str(),
68        }
69    }
70
71    /// Return the 13-digit GLN / EIC of the document sender.
72    pub fn sender_id(&self) -> &str {
73        match self {
74            Self::Activation(d) => d.sender_identification.v.as_str(),
75            Self::PlannedResourceSchedule(d) => d.sender_identification.v.as_str(),
76            Self::Acknowledgement(d) => d.sender_identification.v.as_str(),
77            Self::Stammdaten(d) => d.sender.code.as_str(),
78            Self::StatusRequest(d) => d.sender_market_participant.m_rid.value.as_str(),
79            Self::Unavailability(d) => d.sender_market_participant.m_rid.value.as_str(),
80            Self::Kaskade(d) => d.sender_market_participant.m_rid.value.as_str(),
81            Self::NetworkConstraint(d) => d.sender_identification.v.as_str(),
82            Self::Kostenblatt(d) => d.sender_identification.v.as_str(),
83        }
84    }
85
86    /// Return the 13-digit GLN / EIC of the document receiver.
87    pub fn receiver_id(&self) -> &str {
88        match self {
89            Self::Activation(d) => d.receiver_identification.v.as_str(),
90            Self::PlannedResourceSchedule(d) => d.receiver_identification.v.as_str(),
91            Self::Acknowledgement(d) => d.receiver_identification.v.as_str(),
92            Self::Stammdaten(d) => d.empfaenger.code.as_str(),
93            Self::StatusRequest(d) => d.receiver_market_participant.m_rid.value.as_str(),
94            Self::Unavailability(d) => d.receiver_market_participant.m_rid.value.as_str(),
95            Self::Kaskade(d) => d.receiver_market_participant.m_rid.value.as_str(),
96            Self::NetworkConstraint(d) => d.receiver_identification.v.as_str(),
97            Self::Kostenblatt(d) => d.receiver_identification.v.as_str(),
98        }
99    }
100}
101
102// ── From<T> for Document ──────────────────────────────────────────────────────
103
104impl From<ActivationDocument> for Document {
105    fn from(d: ActivationDocument) -> Self {
106        Self::Activation(Box::new(d))
107    }
108}
109impl From<PlannedResourceScheduleDocument> for Document {
110    fn from(d: PlannedResourceScheduleDocument) -> Self {
111        Self::PlannedResourceSchedule(Box::new(d))
112    }
113}
114impl From<AcknowledgementDocument> for Document {
115    fn from(d: AcknowledgementDocument) -> Self {
116        Self::Acknowledgement(Box::new(d))
117    }
118}
119impl From<Stammdaten> for Document {
120    fn from(d: Stammdaten) -> Self {
121        Self::Stammdaten(Box::new(d))
122    }
123}
124impl From<StatusRequestMarketDocument> for Document {
125    fn from(d: StatusRequestMarketDocument) -> Self {
126        Self::StatusRequest(Box::new(d))
127    }
128}
129impl From<UnavailabilityMarketDocument> for Document {
130    fn from(d: UnavailabilityMarketDocument) -> Self {
131        Self::Unavailability(Box::new(d))
132    }
133}
134impl From<Kaskade> for Document {
135    fn from(d: Kaskade) -> Self {
136        Self::Kaskade(Box::new(d))
137    }
138}
139impl From<NetworkConstraintDocument> for Document {
140    fn from(d: NetworkConstraintDocument) -> Self {
141        Self::NetworkConstraint(Box::new(d))
142    }
143}
144impl From<documents::Kostenblatt> for Document {
145    fn from(d: documents::Kostenblatt) -> Self {
146        Self::Kostenblatt(Box::new(d))
147    }
148}
149
150// ── Detection ─────────────────────────────────────────────────────────────────
151
152/// Scan the first 4 KiB of `xml` for the first element start tag and optional
153/// `xmlns` attribute, returning `(root_element_local_name, Option<namespace>)`.
154///
155/// This is intentionally a lightweight byte scan — not a full XML parse — so
156/// that detection is fast even for large documents.
157fn detect_root(xml: &[u8]) -> (String, Option<String>) {
158    // Strip UTF-8 BOM (U+FEFF, encoded as EF BB BF) if present.
159    let xml = xml.strip_prefix(b"\xEF\xBB\xBF").unwrap_or(xml);
160
161    // Work with only the first 4096 bytes.
162    let window = &xml[..xml.len().min(4096)];
163    let text = String::from_utf8_lossy(window);
164
165    // Find the first '<' that is not '<?' or '<!'.
166    let mut root_name = String::new();
167    let mut namespace = None;
168
169    for i in 0..text.len() {
170        let ch = text.as_bytes()[i];
171        if ch != b'<' {
172            continue;
173        }
174        let rest = &text[i + 1..];
175        if rest.starts_with('?') || rest.starts_with('!') {
176            continue;
177        }
178        // Extract the local name (up to first space, '>' or '/').
179        let name_end = rest
180            .find(|c: char| c.is_whitespace() || c == '>' || c == '/')
181            .unwrap_or(rest.len());
182        let raw_name = &rest[..name_end];
183        // Strip namespace prefix if present.
184        root_name = if let Some(pos) = raw_name.rfind(':') {
185            raw_name[pos + 1..].to_string()
186        } else {
187            raw_name.to_string()
188        };
189
190        // Scan the opening tag for xmlns="..." or xmlns:xxx="...".
191        let tag_end = rest.find('>').unwrap_or(rest.len());
192        let tag_slice = &rest[..tag_end];
193        namespace = extract_default_namespace(tag_slice);
194        break;
195    }
196
197    (root_name, namespace)
198}
199
200/// Extract the value of the first `xmlns="..."` or `xmlns:xxx="..."` attribute
201/// from a raw tag fragment.
202fn extract_default_namespace(tag: &str) -> Option<String> {
203    // Look for xmlns="..." (default namespace).
204    if let Some(pos) = tag.find("xmlns=\"") {
205        let after = &tag[pos + 7..];
206        if let Some(end) = after.find('"') {
207            return Some(after[..end].to_string());
208        }
209    }
210    // Fall back to xmlns:xxx="..." (prefixed namespace — first occurrence).
211    if let Some(pos) = tag.find("xmlns:") {
212        let after = &tag[pos..];
213        if let Some(eq) = after.find("=\"") {
214            let ns_part = &after[eq + 2..];
215            if let Some(end) = ns_part.find('"') {
216                return Some(ns_part[..end].to_string());
217            }
218        }
219    }
220    None
221}
222
223// ── Public API ────────────────────────────────────────────────────────────────
224
225/// Detect the document type of a Redispatch 2.0 XML message without fully
226/// deserializing it.
227///
228/// # Errors
229///
230/// Returns [`RedispatchXmlError::UnknownDocumentType`] if the root element is
231/// not a recognised Redispatch 2.0 document type.
232pub fn detect(xml: &[u8]) -> Result<DocumentType, RedispatchXmlError> {
233    let (root_name, _) = detect_root(xml);
234    DocumentType::from_root_element(&root_name)
235        .ok_or(RedispatchXmlError::UnknownDocumentType(root_name))
236}
237
238/// Deserialise a Redispatch 2.0 XML document into the appropriate [`Document`]
239/// variant.
240///
241/// The document type is detected automatically from the root element.
242///
243/// # Errors
244///
245/// - [`RedispatchXmlError::UnknownDocumentType`] — unrecognised root element.
246/// - [`RedispatchXmlError::Deserialize`] — XML deserialization failure.
247/// - [`RedispatchXmlError::NamespaceMismatch`] — wrong or missing namespace.
248pub fn parse(xml: &[u8]) -> Result<Document, RedispatchXmlError> {
249    let (root_name, detected_ns) = detect_root(xml);
250    let doc_type = DocumentType::from_root_element(&root_name)
251        .ok_or(RedispatchXmlError::UnknownDocumentType(root_name))?;
252
253    // Validate namespace where required.
254    if let Some(expected_ns) = doc_type.expected_namespace() {
255        match detected_ns.as_deref() {
256            Some(found) if found == expected_ns => {}
257            Some(found) => {
258                return Err(RedispatchXmlError::NamespaceMismatch {
259                    expected: expected_ns,
260                    found: found.to_string(),
261                });
262            }
263            None => {
264                return Err(RedispatchXmlError::NamespaceMismatch {
265                    expected: expected_ns,
266                    found: String::new(),
267                });
268            }
269        }
270    }
271
272    let text =
273        std::str::from_utf8(xml).map_err(|e| RedispatchXmlError::StructuralError(e.to_string()))?;
274
275    match doc_type {
276        DocumentType::Activation => {
277            let doc: ActivationDocument =
278                quick_xml::de::from_str(text).map_err(RedispatchXmlError::Deserialize)?;
279            Ok(Document::Activation(Box::new(doc)))
280        }
281        DocumentType::PlannedResourceSchedule => {
282            let doc: PlannedResourceScheduleDocument =
283                quick_xml::de::from_str(text).map_err(RedispatchXmlError::Deserialize)?;
284            Ok(Document::PlannedResourceSchedule(Box::new(doc)))
285        }
286        DocumentType::Acknowledgement => {
287            let doc: AcknowledgementDocument =
288                quick_xml::de::from_str(text).map_err(RedispatchXmlError::Deserialize)?;
289            Ok(Document::Acknowledgement(Box::new(doc)))
290        }
291        DocumentType::Stammdaten => {
292            let doc: Stammdaten =
293                quick_xml::de::from_str(text).map_err(RedispatchXmlError::Deserialize)?;
294            Ok(Document::Stammdaten(Box::new(doc)))
295        }
296        DocumentType::StatusRequest => {
297            let doc: StatusRequestMarketDocument =
298                quick_xml::de::from_str(text).map_err(RedispatchXmlError::Deserialize)?;
299            Ok(Document::StatusRequest(Box::new(doc)))
300        }
301        DocumentType::Unavailability => {
302            let doc: UnavailabilityMarketDocument =
303                quick_xml::de::from_str(text).map_err(RedispatchXmlError::Deserialize)?;
304            Ok(Document::Unavailability(Box::new(doc)))
305        }
306        DocumentType::Kaskade => {
307            let doc: Kaskade =
308                quick_xml::de::from_str(text).map_err(RedispatchXmlError::Deserialize)?;
309            Ok(Document::Kaskade(Box::new(doc)))
310        }
311        DocumentType::NetworkConstraint => {
312            let doc: NetworkConstraintDocument =
313                quick_xml::de::from_str(text).map_err(RedispatchXmlError::Deserialize)?;
314            Ok(Document::NetworkConstraint(Box::new(doc)))
315        }
316        DocumentType::Kostenblatt => {
317            let doc: documents::Kostenblatt =
318                quick_xml::de::from_str(text).map_err(RedispatchXmlError::Deserialize)?;
319            Ok(Document::Kostenblatt(Box::new(doc)))
320        }
321    }
322}
323
324/// Deserialise a Redispatch 2.0 XML document into a specific type `T`.
325///
326/// Use this when the document type is known at compile time.
327///
328/// # Errors
329///
330/// Returns [`RedispatchXmlError::Deserialize`] on parse failure.
331pub fn parse_as<T>(xml: &[u8]) -> Result<T, RedispatchXmlError>
332where
333    T: serde::de::DeserializeOwned,
334{
335    let text =
336        std::str::from_utf8(xml).map_err(|e| RedispatchXmlError::StructuralError(e.to_string()))?;
337    quick_xml::de::from_str(text).map_err(RedispatchXmlError::Deserialize)
338}
339
340/// Parse a Redispatch 2.0 XML document **and** run structural + semantic
341/// validation in one step.
342///
343/// Equivalent to calling [`parse`] followed by [`crate::validate`], but more
344/// ergonomic when you always want validation.
345///
346/// # Errors
347///
348/// Returns the first [`RedispatchXmlError`] encountered during parsing.
349/// If parsing succeeds but validation finds errors, returns the first
350/// [`RedispatchXmlError::StructuralError`].
351pub fn parse_and_validate(xml: &[u8]) -> Result<Document, RedispatchXmlError> {
352    let doc = parse(xml)?;
353    let result = crate::validation::validate(&doc);
354    result
355        .into_result()
356        .map(|_| doc)
357        .map_err(|e| RedispatchXmlError::StructuralError(e.to_string()))
358}