1use crate::error::ParseError;
5use crate::parser::security::SecurityConfig;
6use ddex_core::models::graph::{
7 MessageHeader, MessageRecipient, MessageSender, MessageType, Release, ReleaseType,
8};
9use ddex_core::models::versions::ERNVersion;
10use ddex_core::models::{Identifier, IdentifierType, LocalizedString};
11use quick_xml::{events::Event, Reader};
12use std::collections::HashMap;
13use std::io::BufRead;
14
15#[derive(Debug, Clone)]
17pub struct MultiReleaseParser {
18 version: ERNVersion,
20 security_config: SecurityConfig,
22 detailed_parsing: bool,
24 max_releases: usize,
26 stats: MultiReleaseStats,
28}
29
30#[derive(Debug, Clone, Default)]
32pub struct MultiReleaseStats {
33 pub total_releases_found: usize,
34 pub releases_parsed: usize,
35 pub main_releases: usize,
36 pub secondary_releases: usize,
37 pub elements_processed: usize,
38 pub bytes_processed: usize,
39 pub parse_duration: std::time::Duration,
40 pub release_list_count: usize,
41}
42
43#[derive(Debug, Clone)]
45pub struct MultiReleaseResult {
46 pub releases: Vec<Release>,
48 pub stats: MultiReleaseStats,
50 pub message_header: Option<MessageHeader>,
52 pub release_count: usize,
54 pub release_references: Vec<String>,
56}
57
58#[derive(Debug, Clone)]
60#[allow(dead_code)]
61struct ReleaseContext {
62 release: Release,
63 depth: usize,
64 current_element_path: Vec<String>,
65 attributes: HashMap<String, String>,
66 is_main_release: Option<bool>,
67 position: usize,
68}
69
70impl MultiReleaseParser {
71 pub fn new(version: ERNVersion) -> Self {
73 Self {
74 version,
75 security_config: SecurityConfig::default(),
76 detailed_parsing: true,
77 max_releases: 0,
78 stats: MultiReleaseStats::default(),
79 }
80 }
81
82 pub fn with_security_config(version: ERNVersion, security_config: SecurityConfig) -> Self {
84 Self {
85 version,
86 security_config,
87 detailed_parsing: true,
88 max_releases: 0,
89 stats: MultiReleaseStats::default(),
90 }
91 }
92
93 pub fn detailed_parsing(mut self, enabled: bool) -> Self {
95 self.detailed_parsing = enabled;
96 self
97 }
98
99 pub fn max_releases(mut self, max: usize) -> Self {
101 self.max_releases = max;
102 self
103 }
104
105 pub fn count_releases<R: BufRead>(&mut self, reader: R) -> Result<usize, ParseError> {
107 let start_time = std::time::Instant::now();
108 let mut xml_reader = Reader::from_reader(reader);
109 xml_reader.config_mut().trim_text(false); let mut buf = Vec::new();
112 let mut release_count = 0;
113 let mut depth = 0;
114 let mut elements_processed = 0;
115
116 loop {
117 match xml_reader.read_event_into(&mut buf) {
118 Ok(Event::Start(ref e)) => {
119 elements_processed += 1;
120 depth += 1;
121
122 if depth > self.security_config.max_element_depth {
124 return Err(ParseError::DepthLimitExceeded {
125 depth,
126 limit: self.security_config.max_element_depth,
127 });
128 }
129
130 let element_name = self.extract_element_name(e.name().as_ref())?;
131 if element_name == "Release" || element_name.ends_with(":Release") {
132 release_count += 1;
133
134 if self.max_releases > 0 && release_count >= self.max_releases {
136 break;
137 }
138 }
139 }
140 Ok(Event::End(_)) => {
141 depth = depth.saturating_sub(1);
142 }
143 Ok(Event::Empty(ref e)) => {
144 elements_processed += 1;
145 let element_name = self.extract_element_name(e.name().as_ref())?;
146 if element_name == "Release" || element_name.ends_with(":Release") {
147 release_count += 1;
148
149 if self.max_releases > 0 && release_count >= self.max_releases {
150 break;
151 }
152 }
153 }
154 Ok(Event::Eof) => break,
155 Err(e) => {
156 return Err(ParseError::XmlError(format!("XML parsing error: {}", e)));
157 }
158 _ => {} }
160 buf.clear();
161 }
162
163 self.stats.total_releases_found = release_count;
165 self.stats.elements_processed = elements_processed;
166 self.stats.bytes_processed = xml_reader.buffer_position() as usize;
167 self.stats.parse_duration = start_time.elapsed();
168
169 Ok(release_count)
170 }
171
172 pub fn parse_releases<R: BufRead>(
174 &mut self,
175 reader: R,
176 ) -> Result<MultiReleaseResult, ParseError> {
177 let start_time = std::time::Instant::now();
178 let mut xml_reader = Reader::from_reader(reader);
179 xml_reader.config_mut().trim_text(true);
180 xml_reader.config_mut().check_end_names = true;
181
182 let mut releases = Vec::new();
183 let mut buf = Vec::new();
184 let mut current_context: Option<ReleaseContext> = None;
185 let mut depth = 0;
186 let mut elements_processed = 0;
187 let mut release_references = Vec::new();
188 let mut message_header: Option<MessageHeader> = None;
189 let mut in_release_list = false;
190 let mut release_list_count = 0;
191
192 loop {
193 match xml_reader.read_event_into(&mut buf) {
194 Ok(Event::Start(ref e)) => {
195 elements_processed += 1;
196 depth += 1;
197
198 if depth > self.security_config.max_element_depth {
200 return Err(ParseError::DepthLimitExceeded {
201 depth,
202 limit: self.security_config.max_element_depth,
203 });
204 }
205
206 let element_name = self.extract_element_name(e.name().as_ref())?;
207
208 let mut attributes = HashMap::new();
210 for attr in e.attributes().flatten() {
211 let key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
212 let value = String::from_utf8_lossy(&attr.value).to_string();
213 attributes.insert(key, value);
214 }
215
216 match element_name.as_str() {
217 "ReleaseList" | "ern:ReleaseList" => {
218 in_release_list = true;
219 release_list_count += 1;
220 }
221 "Release" | "ern:Release" if in_release_list => {
222 let is_main = attributes
224 .get("IsMainRelease")
225 .or_else(|| attributes.get("isMainRelease"))
226 .map(|v| v.to_lowercase() == "true");
227
228 let release = self.create_default_release();
229 current_context = Some(ReleaseContext {
230 release,
231 depth,
232 current_element_path: vec![element_name.clone()],
233 attributes: attributes.clone(),
234 is_main_release: is_main,
235 position: xml_reader.buffer_position() as usize,
236 });
237
238 if is_main.unwrap_or(false) {
239 self.stats.main_releases += 1;
240 } else {
241 self.stats.secondary_releases += 1;
242 }
243 }
244 "MessageHeader" | "ern:MessageHeader" if message_header.is_none() => {
245 if self.detailed_parsing {
247 message_header =
248 Some(self.parse_message_header(&mut xml_reader, &mut buf)?);
249 }
250 }
251 _ => {
252 if let Some(ref mut context) = current_context {
254 context.current_element_path.push(element_name.clone());
255 self.process_release_element(
256 context,
257 &element_name,
258 &attributes,
259 &mut xml_reader,
260 &mut buf,
261 )?;
262 }
263 }
264 }
265 }
266 Ok(Event::End(ref e)) => {
267 depth = depth.saturating_sub(1);
268 let element_name = self.extract_element_name(e.name().as_ref())?;
269
270 match element_name.as_str() {
271 "ReleaseList" | "ern:ReleaseList" => {
272 in_release_list = false;
273 }
274 "Release" | "ern:Release" => {
275 if let Some(context) = current_context.take() {
276 if let Some(reference) =
278 self.extract_release_reference(&context.release)
279 {
280 release_references.push(reference);
281 }
282
283 releases.push(context.release);
284 self.stats.releases_parsed += 1;
285
286 if self.max_releases > 0 && releases.len() >= self.max_releases {
288 break;
289 }
290 }
291 }
292 _ => {
293 if let Some(ref mut context) = current_context {
294 context.current_element_path.pop();
295 }
296 }
297 }
298 }
299 Ok(Event::Empty(ref e)) => {
300 elements_processed += 1;
301 let element_name = self.extract_element_name(e.name().as_ref())?;
302
303 if (element_name == "Release" || element_name.ends_with(":Release"))
305 && in_release_list
306 {
307 let mut attributes = HashMap::new();
308 for attr in e.attributes().flatten() {
309 let key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
310 let value = String::from_utf8_lossy(&attr.value).to_string();
311 attributes.insert(key, value);
312 }
313
314 let is_main = attributes
315 .get("IsMainRelease")
316 .or_else(|| attributes.get("isMainRelease"))
317 .map(|v| v.to_lowercase() == "true");
318
319 let release = self.create_default_release();
320 releases.push(release);
321
322 if is_main.unwrap_or(false) {
323 self.stats.main_releases += 1;
324 } else {
325 self.stats.secondary_releases += 1;
326 }
327
328 self.stats.releases_parsed += 1;
329
330 if self.max_releases > 0 && releases.len() >= self.max_releases {
331 break;
332 }
333 }
334 }
335 Ok(Event::Text(ref e)) => {
336 if let Some(ref mut context) = current_context {
337 let current_pos = xml_reader.buffer_position() as usize;
339 let text = crate::utf8_utils::handle_text_node(e, current_pos)?
340 .trim()
341 .to_string();
342
343 if !text.is_empty() {
344 self.process_release_text_content(context, &text)?;
345 }
346 }
347 }
348 Ok(Event::Eof) => break,
349 Err(e) => {
350 return Err(ParseError::XmlError(format!("XML parsing error: {}", e)));
351 }
352 _ => {} }
354 buf.clear();
355 }
356
357 self.stats.total_releases_found = self.stats.releases_parsed;
359 self.stats.elements_processed = elements_processed;
360 self.stats.bytes_processed = xml_reader.buffer_position() as usize;
361 self.stats.parse_duration = start_time.elapsed();
362 self.stats.release_list_count = release_list_count;
363
364 Ok(MultiReleaseResult {
365 releases,
366 stats: self.stats.clone(),
367 message_header,
368 release_count: self.stats.releases_parsed,
369 release_references,
370 })
371 }
372
373 fn extract_element_name(&self, qname: &[u8]) -> Result<String, ParseError> {
375 let name_str = std::str::from_utf8(qname).map_err(|_| ParseError::IoError(
376 "Invalid UTF-8 in element name".to_string(),
377 ))?;
378 Ok(name_str.to_string())
379 }
380
381 fn create_default_release(&self) -> Release {
383 Release {
384 release_reference: format!("REL_{:?}_{}", self.version, chrono::Utc::now().timestamp()),
385 release_id: Vec::new(),
386 release_title: vec![LocalizedString::new("Untitled Release".to_string())],
387 release_subtitle: None,
388 release_type: None,
389 genre: Vec::new(),
390 release_resource_reference_list: Vec::new(),
391 display_artist: Vec::new(),
392 party_list: Vec::new(),
393 release_date: Vec::new(),
394 territory_code: Vec::new(),
395 excluded_territory_code: Vec::new(),
396 extensions: None,
397 attributes: None,
398 comments: None,
399 }
400 }
401
402 fn parse_message_header<R: BufRead>(
404 &self,
405 _reader: &mut Reader<R>,
406 _buf: &mut [u8],
407 ) -> Result<MessageHeader, ParseError> {
408 Ok(MessageHeader {
410 message_id: format!("MSG_{:?}", self.version),
411 message_type: MessageType::NewReleaseMessage,
412 message_created_date_time: chrono::Utc::now(),
413 message_sender: MessageSender {
414 party_id: Vec::new(),
415 party_name: Vec::new(),
416 trading_name: None,
417 extensions: None,
418 attributes: None,
419 comments: None,
420 },
421 message_recipient: MessageRecipient {
422 party_id: Vec::new(),
423 party_name: Vec::new(),
424 trading_name: None,
425 extensions: None,
426 attributes: None,
427 comments: None,
428 },
429 message_control_type: None,
430 message_thread_id: Some("MULTI_RELEASE_THREAD".to_string()),
431 extensions: None,
432 attributes: None,
433 comments: None,
434 })
435 }
436
437 fn process_release_element(
439 &self,
440 context: &mut ReleaseContext,
441 element_name: &str,
442 attributes: &HashMap<String, String>,
443 _reader: &mut Reader<impl BufRead>,
444 _buf: &mut [u8],
445 ) -> Result<(), ParseError> {
446 match element_name {
448 "ReleaseReference" | "ern:ReleaseReference" => {
449 }
451 "ReleaseId" | "ern:ReleaseId" => {
452 }
454 "ReferenceTitle" | "ern:ReferenceTitle" => {
455 }
457 "TitleText" | "ern:TitleText" => {
458 }
460 "ReleaseType" | "ern:ReleaseType" => {
461 }
463 _ => {
464 }
466 }
467
468 for (key, value) in attributes {
470 context
471 .attributes
472 .insert(format!("{}:{}", element_name, key), value.clone());
473 }
474
475 Ok(())
476 }
477
478 fn process_release_text_content(
480 &self,
481 context: &mut ReleaseContext,
482 text: &str,
483 ) -> Result<(), ParseError> {
484 let current_path = context.current_element_path.join("/");
485
486 if current_path.contains("ReleaseReference") {
487 context.release.release_reference = text.to_string();
488 } else if current_path.contains("ReleaseId") {
489 context.release.release_id.push(Identifier {
491 id_type: IdentifierType::Proprietary,
492 namespace: None,
493 value: text.to_string(),
494 });
495 } else if current_path.contains("TitleText") {
496 if !context.release.release_title.is_empty() {
498 context.release.release_title[0] = LocalizedString::new(text.to_string());
499 } else {
500 context
501 .release
502 .release_title
503 .push(LocalizedString::new(text.to_string()));
504 }
505 } else if current_path.contains("ReleaseType") {
506 context.release.release_type = Some(match text {
507 "Album" => ReleaseType::Album,
508 "Single" => ReleaseType::Single,
509 "EP" => ReleaseType::EP,
510 "Compilation" => ReleaseType::Compilation,
511 other => ReleaseType::Other(other.to_string()),
512 });
513 }
514
515 Ok(())
516 }
517
518 fn extract_release_reference(&self, release: &Release) -> Option<String> {
520 if !release.release_reference.is_empty() {
521 Some(release.release_reference.clone())
522 } else if !release.release_id.is_empty() {
523 Some(release.release_id[0].value.clone())
524 } else {
525 None
526 }
527 }
528}
529
530#[cfg(test)]
531mod tests {
532 use super::*;
533 use std::io::Cursor;
534
535 #[test]
536 fn test_release_counting() {
537 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
538 <ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43">
539 <ern:MessageHeader>
540 <ern:MessageId>MSG001</ern:MessageId>
541 </ern:MessageHeader>
542 <ern:ReleaseList>
543 <ern:Release IsMainRelease="true">
544 <ern:ReleaseReference>REL001</ern:ReleaseReference>
545 <ern:ReferenceTitle>
546 <ern:TitleText>Album One</ern:TitleText>
547 </ern:ReferenceTitle>
548 </ern:Release>
549 <ern:Release IsMainRelease="false">
550 <ern:ReleaseReference>REL002</ern:ReleaseReference>
551 <ern:ReferenceTitle>
552 <ern:TitleText>Album Two</ern:TitleText>
553 </ern:ReferenceTitle>
554 </ern:Release>
555 <ern:Release>
556 <ern:ReleaseReference>REL003</ern:ReleaseReference>
557 <ern:ReferenceTitle>
558 <ern:TitleText>Album Three</ern:TitleText>
559 </ern:ReferenceTitle>
560 </ern:Release>
561 </ern:ReleaseList>
562 </ern:NewReleaseMessage>"#;
563
564 let cursor = Cursor::new(xml.as_bytes());
565 let mut parser = MultiReleaseParser::new(ERNVersion::V4_3);
566
567 let count = parser
568 .count_releases(cursor)
569 .expect("Should count releases");
570
571 assert_eq!(count, 3);
572 assert_eq!(parser.stats.total_releases_found, 3);
573 assert!(parser.stats.elements_processed > 0);
574 assert!(parser.stats.bytes_processed > 0);
575 }
576
577 #[test]
578 fn test_multi_release_parsing() {
579 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
580 <ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43">
581 <ern:MessageHeader>
582 <ern:MessageId>MSG001</ern:MessageId>
583 <ern:MessageSender>
584 <ern:PartyName>Test Label</ern:PartyName>
585 </ern:MessageSender>
586 <ern:MessageRecipient>
587 <ern:PartyName>Test Recipient</ern:PartyName>
588 </ern:MessageRecipient>
589 <ern:MessageCreatedDateTime>2024-01-15T10:30:00Z</ern:MessageCreatedDateTime>
590 </ern:MessageHeader>
591 <ern:ReleaseList>
592 <ern:Release IsMainRelease="true">
593 <ern:ReleaseReference>MAIN_RELEASE_001</ern:ReleaseReference>
594 <ern:ReleaseId Namespace="GRid">A1-123456789-1234567890-A</ern:ReleaseId>
595 <ern:ReferenceTitle>
596 <ern:TitleText>My Main Album</ern:TitleText>
597 </ern:ReferenceTitle>
598 <ern:ReleaseType>Album</ern:ReleaseType>
599 </ern:Release>
600 <ern:Release IsMainRelease="false">
601 <ern:ReleaseReference>SECONDARY_RELEASE_002</ern:ReleaseReference>
602 <ern:ReleaseId>REL_SEC_002</ern:ReleaseId>
603 <ern:ReferenceTitle>
604 <ern:TitleText>Bonus Tracks</ern:TitleText>
605 </ern:ReferenceTitle>
606 <ern:ReleaseType>EP</ern:ReleaseType>
607 </ern:Release>
608 </ern:ReleaseList>
609 </ern:NewReleaseMessage>"#;
610
611 let cursor = Cursor::new(xml.as_bytes());
612 let mut parser = MultiReleaseParser::new(ERNVersion::V4_3).detailed_parsing(true);
613
614 let result = parser
615 .parse_releases(cursor)
616 .expect("Should parse releases");
617
618 assert_eq!(result.releases.len(), 2);
619 assert_eq!(result.release_count, 2);
620 assert_eq!(result.stats.main_releases, 1);
621 assert_eq!(result.stats.secondary_releases, 1);
622
623 let main_release = &result.releases[0];
625 assert_eq!(main_release.release_reference, "MAIN_RELEASE_001");
626 assert_eq!(main_release.release_title[0].text, "My Main Album");
627 assert_eq!(
628 main_release.release_type.as_ref().unwrap(),
629 &ReleaseType::Album
630 );
631
632 let secondary_release = &result.releases[1];
633 assert_eq!(secondary_release.release_reference, "SECONDARY_RELEASE_002");
634 assert_eq!(secondary_release.release_title[0].text, "Bonus Tracks");
635 assert_eq!(
636 secondary_release.release_type.as_ref().unwrap(),
637 &ReleaseType::EP
638 );
639
640 assert_eq!(result.release_references.len(), 2);
642 assert!(result
643 .release_references
644 .contains(&"MAIN_RELEASE_001".to_string()));
645 assert!(result
646 .release_references
647 .contains(&"SECONDARY_RELEASE_002".to_string()));
648
649 println!("Multi-release parsing stats: {:#?}", result.stats);
650 }
651
652 #[test]
653 fn test_max_releases_limit() {
654 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
655 <ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43">
656 <ern:ReleaseList>
657 <ern:Release><ern:ReleaseReference>REL001</ern:ReleaseReference></ern:Release>
658 <ern:Release><ern:ReleaseReference>REL002</ern:ReleaseReference></ern:Release>
659 <ern:Release><ern:ReleaseReference>REL003</ern:ReleaseReference></ern:Release>
660 <ern:Release><ern:ReleaseReference>REL004</ern:ReleaseReference></ern:Release>
661 <ern:Release><ern:ReleaseReference>REL005</ern:ReleaseReference></ern:Release>
662 </ern:ReleaseList>
663 </ern:NewReleaseMessage>"#;
664
665 let cursor = Cursor::new(xml.as_bytes());
666 let mut parser = MultiReleaseParser::new(ERNVersion::V4_3).max_releases(3);
667
668 let result = parser
669 .parse_releases(cursor)
670 .expect("Should parse with limit");
671
672 assert_eq!(result.releases.len(), 3);
673 assert_eq!(result.release_count, 3);
674 assert_eq!(result.stats.releases_parsed, 3);
675 }
676
677 #[test]
678 fn test_empty_and_self_closing_releases() {
679 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
680 <ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43">
681 <ern:ReleaseList>
682 <ern:Release/>
683 <ern:Release IsMainRelease="true"/>
684 <ern:Release>
685 <ern:ReleaseReference>REL003</ern:ReleaseReference>
686 </ern:Release>
687 </ern:ReleaseList>
688 </ern:NewReleaseMessage>"#;
689
690 let cursor = Cursor::new(xml.as_bytes());
691 let mut parser = MultiReleaseParser::new(ERNVersion::V4_3);
692
693 let result = parser
694 .parse_releases(cursor)
695 .expect("Should parse empty releases");
696
697 assert_eq!(result.releases.len(), 3);
698 assert_eq!(result.stats.main_releases, 1);
699 assert_eq!(result.stats.secondary_releases, 2);
700 }
701
702 #[test]
703 fn test_performance_with_many_releases() {
704 let mut xml = String::from(
706 r#"<?xml version="1.0" encoding="UTF-8"?>
707 <ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43">
708 <ern:ReleaseList>"#,
709 );
710
711 for i in 0..1000 {
712 xml.push_str(&format!(
713 r#"
714 <ern:Release IsMainRelease="{}">
715 <ern:ReleaseReference>REL{:06}</ern:ReleaseReference>
716 <ern:ReferenceTitle>
717 <ern:TitleText>Release {}</ern:TitleText>
718 </ern:ReferenceTitle>
719 </ern:Release>"#,
720 i == 0,
721 i,
722 i
723 ));
724 }
725 xml.push_str("</ern:ReleaseList></ern:NewReleaseMessage>");
726
727 let cursor = Cursor::new(xml.as_bytes());
728 let mut parser = MultiReleaseParser::new(ERNVersion::V4_3);
729
730 let start = std::time::Instant::now();
731 let count = parser
732 .count_releases(cursor)
733 .expect("Should count many releases");
734 let count_duration = start.elapsed();
735
736 assert_eq!(count, 1000);
737
738 let cursor2 = Cursor::new(xml.as_bytes());
740 let mut parser2 = MultiReleaseParser::new(ERNVersion::V4_3)
741 .detailed_parsing(true)
742 .max_releases(100); let start2 = std::time::Instant::now();
745 let result = parser2
746 .parse_releases(cursor2)
747 .expect("Should parse many releases");
748 let parse_duration = start2.elapsed();
749
750 assert_eq!(result.releases.len(), 100);
751 assert_eq!(result.stats.main_releases, 1);
752 assert_eq!(result.stats.secondary_releases, 99);
753
754 println!("Performance test results:");
755 println!(" Count 1000 releases: {:?}", count_duration);
756 println!(" Parse 100 releases: {:?}", parse_duration);
757 println!(
758 " Count throughput: {:.0} releases/sec",
759 1000.0 / count_duration.as_secs_f64()
760 );
761 println!(
762 " Parse throughput: {:.0} releases/sec",
763 100.0 / parse_duration.as_secs_f64()
764 );
765 }
766}