ddex_parser/streaming/
state.rs

1// src/streaming/state.rs
2//! State machine for streaming DDEX parser
3
4use ddex_core::models::common::Copyright;
5use ddex_core::models::{graph::*, versions::ERNVersion};
6use ddex_core::models::{Identifier, LocalizedString};
7use std::collections::HashMap;
8
9/// Parser state for streaming processing
10#[derive(Debug, Clone, Default)]
11pub enum ParserState {
12    /// Initial state - waiting for root element
13    #[default]
14    Initial,
15    /// Parsing message header
16    InHeader {
17        header: PartialMessageHeader,
18        depth: usize,
19    },
20    /// Parsing a release
21    InRelease {
22        release: PartialRelease,
23        depth: usize,
24    },
25    /// Parsing a resource
26    InResource {
27        resource: PartialResource,
28        depth: usize,
29    },
30    /// Parsing a party
31    InParty { party: PartialParty, depth: usize },
32    /// Parsing a deal
33    InDeal { deal: PartialDeal, depth: usize },
34    /// Skipping unknown element
35    Skipping {
36        start_depth: usize,
37        current_depth: usize,
38    },
39    /// Parsing complete
40    Complete,
41    /// Error state
42    Error(String),
43}
44
45/// Parsing context that tracks current state
46#[derive(Debug)]
47pub struct ParsingContext {
48    pub state: ParserState,
49    pub version: ERNVersion,
50    pub current_path: Vec<String>,
51    pub current_depth: usize,
52    pub namespace_stack: Vec<HashMap<String, String>>,
53    pub text_buffer: String,
54    pub attributes: HashMap<String, String>,
55}
56
57impl ParsingContext {
58    pub fn new(version: ERNVersion) -> Self {
59        Self {
60            state: ParserState::Initial,
61            version,
62            current_path: Vec::new(),
63            current_depth: 0,
64            namespace_stack: vec![HashMap::new()],
65            text_buffer: String::new(),
66            attributes: HashMap::new(),
67        }
68    }
69
70    pub fn push_element(&mut self, name: &str) {
71        self.current_path.push(name.to_string());
72        self.current_depth += 1;
73        // Push new namespace scope
74        let parent_scope = self.namespace_stack.last().unwrap().clone();
75        self.namespace_stack.push(parent_scope);
76    }
77
78    pub fn pop_element(&mut self) -> Option<String> {
79        self.current_depth = self.current_depth.saturating_sub(1);
80        self.namespace_stack.pop();
81        self.current_path.pop()
82    }
83
84    pub fn current_element_path(&self) -> String {
85        self.current_path.join("/")
86    }
87
88    pub fn is_at_path(&self, path: &[&str]) -> bool {
89        self.current_path.len() >= path.len()
90            && self.current_path[self.current_path.len() - path.len()..]
91                .iter()
92                .zip(path.iter())
93                .all(|(a, b)| a == b)
94    }
95
96    pub fn clear_text_buffer(&mut self) {
97        self.text_buffer.clear();
98    }
99
100    pub fn add_text(&mut self, text: &str) {
101        if !text.trim().is_empty() {
102            self.text_buffer.push_str(text);
103        }
104    }
105
106    pub fn take_text(&mut self) -> String {
107        std::mem::take(&mut self.text_buffer)
108    }
109}
110
111/// Partial release being built during streaming
112#[derive(Debug, Clone, Default)]
113pub struct PartialRelease {
114    pub release_reference: Option<String>,
115    pub release_id: Vec<Identifier>,
116    pub release_title: Vec<LocalizedString>,
117    pub display_artist: Vec<Artist>,
118    pub genre: Vec<Genre>,
119    pub release_date: Vec<ReleaseEvent>,
120    pub release_resource_reference_list: Vec<ReleaseResourceReference>,
121    pub deal_reference_list: Vec<String>,
122    pub completed_fields: usize,
123    pub memory_estimate: usize,
124}
125
126/// Partial resource being built during streaming
127#[derive(Debug, Clone, Default)]
128pub struct PartialResource {
129    pub resource_reference: Option<String>,
130    pub resource_type: Option<ResourceType>,
131    pub resource_id: Vec<Identifier>,
132    pub reference_title: Vec<LocalizedString>,
133    pub duration: Option<std::time::Duration>,
134    pub technical_details: Vec<TechnicalDetails>,
135    pub rights_controller: Vec<String>,
136    pub p_line: Vec<Copyright>,
137    pub c_line: Vec<Copyright>,
138    pub completed_fields: usize,
139    pub memory_estimate: usize,
140}
141
142/// Partial party being built during streaming
143#[derive(Debug, Clone, Default)]
144pub struct PartialParty {
145    pub party_reference: Option<String>,
146    pub party_name: Vec<LocalizedString>,
147    pub party_id: Vec<Identifier>,
148    pub role: Vec<String>,
149    pub completed_fields: usize,
150    pub memory_estimate: usize,
151}
152
153/// Partial deal being built during streaming
154#[derive(Debug, Clone, Default)]
155pub struct PartialDeal {
156    pub deal_reference: Option<String>,
157    pub deal_terms: Option<DealTerms>,
158    pub commercial_model_type: Vec<String>,
159    pub use_type: Vec<String>,
160    pub territory_code: Vec<String>,
161    pub completed_fields: usize,
162    pub memory_estimate: usize,
163}
164
165/// Partial message header being built during streaming
166#[derive(Debug, Clone, Default)]
167pub struct PartialMessageHeader {
168    pub sender: Option<MessageSender>,
169    pub recipient: Vec<MessageRecipient>,
170    pub message_created_date_time: Option<String>,
171    pub message_id: Option<Identifier>,
172    pub message_file_name: Option<String>,
173    pub completed_fields: usize,
174    pub memory_estimate: usize,
175}
176
177impl PartialRelease {
178    pub fn estimate_memory(&self) -> usize {
179        // Rough memory estimation
180        let mut size = std::mem::size_of::<PartialRelease>();
181        size += self.release_reference.as_ref().map_or(0, |s| s.len());
182        size += self.release_id.len() * std::mem::size_of::<Identifier>();
183        size += self.release_title.len() * std::mem::size_of::<LocalizedString>();
184        // Add estimates for other fields
185        size
186    }
187
188    pub fn is_complete(&self) -> bool {
189        self.release_reference.is_some() && !self.release_title.is_empty()
190    }
191
192    pub fn into_release(self) -> Release {
193        Release {
194            release_reference: self.release_reference.unwrap_or_default(),
195            release_id: self.release_id,
196            release_title: self.release_title,
197            release_subtitle: None,
198            release_type: None,
199            genre: self.genre,
200            release_resource_reference_list: self.release_resource_reference_list,
201            display_artist: self.display_artist,
202            party_list: vec![],
203            release_date: self.release_date,
204            territory_code: vec![],
205            excluded_territory_code: vec![],
206            attributes: None,
207            extensions: None,
208            comments: None,
209        }
210    }
211}
212
213impl PartialResource {
214    pub fn estimate_memory(&self) -> usize {
215        let mut size = std::mem::size_of::<PartialResource>();
216        size += self.resource_reference.as_ref().map_or(0, |s| s.len());
217        size += self
218            .resource_type
219            .as_ref()
220            .map_or(0, |_| std::mem::size_of::<ResourceType>());
221        size += self.resource_id.len() * std::mem::size_of::<Identifier>();
222        size += self.reference_title.len() * std::mem::size_of::<LocalizedString>();
223        size
224    }
225
226    pub fn is_complete(&self) -> bool {
227        self.resource_reference.is_some() && !self.reference_title.is_empty()
228    }
229
230    pub fn into_resource(self) -> Resource {
231        Resource {
232            resource_reference: self.resource_reference.unwrap_or_default(),
233            resource_type: self.resource_type.unwrap_or(ResourceType::SoundRecording),
234            resource_id: self.resource_id,
235            reference_title: self.reference_title,
236            duration: self.duration,
237            technical_details: self.technical_details,
238            rights_controller: self.rights_controller,
239            p_line: self.p_line,
240            c_line: self.c_line,
241            extensions: None,
242        }
243    }
244}