Skip to main content

prelude_xml_parser/
lib.rs

1pub mod errors;
2pub mod native;
3
4use std::{collections::HashMap, fs::read_to_string, io::Cursor, path::Path};
5
6use rayon::prelude::*;
7
8use crate::errors::Error;
9use crate::native::{
10    common::{Category, Comment, Entry, Field, LockState, Reason, State, Value},
11    site_native::{Site, SiteNative},
12    subject_native::{Form, Patient, SubjectNative},
13    user_native::{User, UserNative},
14};
15use quick_xml::events::{BytesStart, Event};
16use quick_xml::Reader;
17
18/// Parses a Prelude native XML file into a `Native` struct.
19///
20/// # Example
21///
22/// ```
23/// use std::path::Path;
24///
25/// use prelude_xml_parser::parse_site_native_file;
26///
27/// let file_path = Path::new("tests/assets/site_native.xml");
28/// let native = parse_site_native_file(&file_path).unwrap();
29///
30/// assert!(native.sites.len() >= 1, "Vector length is less than 1");
31/// ```
32pub fn parse_site_native_file(xml_path: &Path) -> Result<SiteNative, Error> {
33    check_valid_xml_file(xml_path)?;
34
35    let xml_file = read_to_string(xml_path)?;
36    let native = parse_site_native_string(&xml_file)?;
37
38    Ok(native)
39}
40
41/// Parse a string of Prelude native site XML into a `SiteNative` struct.
42///
43/// # Example
44///
45/// ```
46/// use chrono::{DateTime, Utc};
47/// use prelude_xml_parser::parse_site_native_string;
48/// use prelude_xml_parser::native::site_native::*;
49///
50/// let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
51/// <export_from_vision_EDC date="01-Jun-2024 18:17 -0500" createdBy="Paul Sanders" role="Project Manager" numberSubjectsProcessed="2">
52///
53///   <site name="Some Site" uniqueId="1681574834910" numberOfPatients="4" countOfRandomizedPatients="0" whenCreated="2023-04-15 12:08:19 -0400" creator="Paul Sanders" numberOfForms="1">
54///     <form name="demographic.form.name.site.demographics" lastModified="2023-04-15 12:08:19 -0400" whoLastModifiedName="Paul Sanders" whoLastModifiedRole="Project Manager" whenCreated="1681574834930" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="Site Demographics" formIndex="1" formGroup="Demographic" formState="In-Work">
55///       <state value="form.state.in.work" signer="Paul Sanders - Project Manager" signerUniqueId="1681162687395" dateSigned="2023-04-15 12:08:19 -0400" />
56///       <category name="Demographics" type="normal" highestIndex="0">
57///         <field name="address" type="text" dataType="string" errorCode="valid" whenCreated="2023-04-15 11:07:14 -0500" keepHistory="true" />
58///         <field name="company" type="text" dataType="string" errorCode="valid" whenCreated="2023-04-15 11:07:14 -0500" keepHistory="true">
59///           <entry id="1">
60///             <value by="Paul Sanders" byUniqueId="1681162687395" role="Project Manager" when="2023-04-15 12:08:19 -0400" xml:space="preserve">Some Company</value>
61///           </entry>
62///         </field>
63///         <field name="site_code_name" type="hidden" dataType="string" errorCode="valid" whenCreated="2023-04-15 11:07:14 -0500" keepHistory="true">
64///           <entry id="1">
65///             <value by="set from calculation" byUniqueId="" role="System" when="2023-04-15 12:08:19 -0400" xml:space="preserve">ABC-Some Site</value>
66///             <reason by="set from calculation" byUniqueId="" role="System" when="2023-04-15 12:08:19 -0400" xml:space="preserve">calculated value</reason>
67///           </entry>
68///           <entry id="2">
69///             <value by="set from calculation" byUniqueId="" role="System" when="2023-04-15 12:07:24 -0400" xml:space="preserve">Some Site</value>
70///             <reason by="set from calculation" byUniqueId="" role="System" when="2023-04-15 12:07:24 -0400" xml:space="preserve">calculated value</reason>
71///           </entry>
72///         </field>
73///       </category>
74///       <category name="Enrollment" type="normal" highestIndex="0">
75///         <field name="enrollment_closed_date" type="popUpCalendar" dataType="date" errorCode="valid" whenCreated="2023-04-15 11:07:14 -0500" keepHistory="true" />
76///         <field name="enrollment_open" type="radio" dataType="string" errorCode="valid" whenCreated="2023-04-15 11:07:14 -0500" keepHistory="true">
77///           <entry id="1">
78///             <value by="Paul Sanders" byUniqueId="1681162687395" role="Project Manager" when="2023-04-15 12:08:19 -0400" xml:space="preserve">Yes</value>
79///           </entry>
80///         </field>
81///         <field name="enrollment_open_date" type="popUpCalendar" dataType="date" errorCode="valid" whenCreated="2023-04-15 11:07:14 -0500" keepHistory="true" />
82///       </category>
83///     </form>
84///   </site>
85///
86///   <site name="Artemis" uniqueId="1691420994591" numberOfPatients="0" countOfRandomizedPatients="0" whenCreated="2023-08-07 08:14:23 -0700" creator="Paul Sanders" numberOfForms="1">
87///     <form name="demographic.form.name.site.demographics" lastModified="2023-08-07 08:14:23 -0700" whoLastModifiedName="Paul Sanders" whoLastModifiedRole="Project Manager" whenCreated="1691420994611" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="Site Demographics" formIndex="1" formGroup="Demographic" formState="In-Work">
88///       <state value="form.state.in.work" signer="Paul Sanders - Project Manager" signerUniqueId="1681162687395" dateSigned="2023-08-07 08:14:23 -0700" />
89///       <category name="Demographics" type="normal" highestIndex="0">
90///         <field name="address" type="text" dataType="string" errorCode="valid" whenCreated="2023-08-07 10:09:54 -0500" keepHistory="true">
91///           <entry id="1">
92///             <value by="Paul Sanders" byUniqueId="1681162687395" role="Project Manager" when="2023-08-07 08:14:21 -0700" xml:space="preserve">1111 Moon Drive</value>
93///           </entry>
94///           <comment id="1">
95///             <value by="Paul Sanders" byUniqueId="1681162687395" role="Project Manager" when="2023-08-07 08:14:21 -0700" xml:space="preserve">Some comment</value>
96///           </comment>
97///         </field>
98///       </category>
99///     </form>
100///   </site>
101///
102/// </export_from_vision_EDC>
103/// "#;
104///
105/// let expected = SiteNative {
106///     sites: vec![
107///         Site {
108///             name: "Some Site".to_string(),
109///             unique_id: "1681574834910".to_string(),
110///             number_of_patients: 4,
111///             count_of_randomized_patients: 0,
112///             when_created: Some(DateTime::parse_from_rfc3339("2023-04-15T16:08:19Z")
113///                 .unwrap()
114///                 .with_timezone(&Utc)),
115///             creator: "Paul Sanders".to_string(),
116///             number_of_forms: 1,
117///             forms: Some(vec![Form {
118///                 name: "demographic.form.name.site.demographics".to_string(),
119///                 last_modified: Some(
120///                     DateTime::parse_from_rfc3339("2023-04-15T16:08:19Z")
121///                         .unwrap()
122///                         .with_timezone(&Utc),
123///                 ),
124///                 who_last_modified_name: Some("Paul Sanders".to_string()),
125///                 who_last_modified_role: Some("Project Manager".to_string()),
126///                 when_created: 1681574834930,
127///                 has_errors: false,
128///                 has_warnings: false,
129///                 locked: false,
130///                 user: None,
131///                 date_time_changed: None,
132///                 form_title: "Site Demographics".to_string(),
133///                 form_index: 1,
134///                 form_group: Some("Demographic".to_string()),
135///                 form_state: "In-Work".to_string(),
136///                 lock_state: None,
137///                 states: Some(vec![State {
138///                     value: "form.state.in.work".to_string(),
139///                     signer: "Paul Sanders - Project Manager".to_string(),
140///                     signer_unique_id: "1681162687395".to_string(),
141///                     date_signed: Some(
142///                         DateTime::parse_from_rfc3339("2023-04-15T16:08:19Z")
143///                             .unwrap()
144///                             .with_timezone(&Utc),
145///                     ),
146///                 }]),
147///                 categories: Some(vec![
148///                     Category {
149///                         name: "Demographics".to_string(),
150///                         category_type: "normal".to_string(),
151///                         highest_index: 0,
152///                         fields: Some(vec![
153///                             Field {
154///                                 name: "address".to_string(),
155///                                 field_type: "text".to_string(),
156///                                 data_type: Some("string".to_string()),
157///                                 error_code: "valid".to_string(),
158///                                 when_created: Some(DateTime::parse_from_rfc3339(
159///                                     "2023-04-15T16:07:14Z",
160///                                 )
161///                                 .unwrap()
162///                                 .with_timezone(&Utc)),
163///                                 keep_history: true,
164///                                 entries: None,
165///                                 comments: None,
166///                             },
167///                             Field {
168///                                 name: "company".to_string(),
169///                                 field_type: "text".to_string(),
170///                                 data_type: Some("string".to_string()),
171///                                 error_code: "valid".to_string(),
172///                                 when_created: Some(DateTime::parse_from_rfc3339(
173///                                     "2023-04-15T16:07:14Z",
174///                                 )
175///                                 .unwrap()
176///                                 .with_timezone(&Utc)),
177///                                 keep_history: true,
178///                                 entries: Some(vec![Entry {
179///                                     entry_id: "1".to_string(),
180///                                     reviewed_by: None,
181///                                     reviewed_by_unique_id: None,
182///                                     reviewed_by_when: None,
183///                                     value: Some(Value {
184///                                         by: "Paul Sanders".to_string(),
185///                                         by_unique_id: Some("1681162687395".to_string()),
186///                                         role: "Project Manager".to_string(),
187///                                         when: Some(DateTime::parse_from_rfc3339(
188///                                             "2023-04-15T16:08:19Z",
189///                                         )
190///                                         .unwrap()
191///                                         .with_timezone(&Utc)),
192///                                         value: "Some Company".to_string(),
193///                                     }),
194///                                     reason: None,
195///                                 }]),
196///                                 comments: None,
197///                             },
198///                             Field {
199///                                 name: "site_code_name".to_string(),
200///                                 field_type: "hidden".to_string(),
201///                                 data_type: Some("string".to_string()),
202///                                 error_code: "valid".to_string(),
203///                                 when_created: Some(DateTime::parse_from_rfc3339(
204///                                     "2023-04-15T16:07:14Z",
205///                                 )
206///                                 .unwrap()
207///                                 .with_timezone(&Utc)),
208///                                 keep_history: true,
209///                                 entries: Some(vec![
210///                                     Entry {
211///                                         entry_id: "1".to_string(),
212///                                         reviewed_by: None,
213///                                         reviewed_by_unique_id: None,
214///                                         reviewed_by_when: None,
215///                                         value: Some(Value {
216///                                             by: "set from calculation".to_string(),
217///                                             by_unique_id: None,
218///                                             role: "System".to_string(),
219///                                             when: Some(DateTime::parse_from_rfc3339(
220///                                                 "2023-04-15T16:08:19Z",
221///                                             )
222///                                             .unwrap()
223///                                             .with_timezone(&Utc)),
224///                                             value: "ABC-Some Site".to_string(),
225///                                         }),
226///                                         reason: Some(Reason {
227///                                             by: "set from calculation".to_string(),
228///                                             by_unique_id: None,
229///                                             role: "System".to_string(),
230///                                             when: Some(DateTime::parse_from_rfc3339(
231///                                                 "2023-04-15T16:08:19Z",
232///                                             )
233///                                             .unwrap()
234///                                             .with_timezone(&Utc)),
235///                                             value: "calculated value".to_string(),
236///                                         }),
237///                                     },
238///                                     Entry {
239///                                         entry_id: "2".to_string(),
240///                                         reviewed_by: None,
241///                                         reviewed_by_unique_id: None,
242///                                         reviewed_by_when: None,
243///                                         value: Some(Value {
244///                                             by: "set from calculation".to_string(),
245///                                             by_unique_id: None,
246///                                             role: "System".to_string(),
247///                                             when: Some(DateTime::parse_from_rfc3339(
248///                                                 "2023-04-15T16:07:24Z",
249///                                             )
250///                                             .unwrap()
251///                                             .with_timezone(&Utc)),
252///                                             value: "Some Site".to_string(),
253///                                         }),
254///                                         reason: Some(Reason {
255///                                             by: "set from calculation".to_string(),
256///                                             by_unique_id: None,
257///                                             role: "System".to_string(),
258///                                             when: Some(DateTime::parse_from_rfc3339(
259///                                                 "2023-04-15T16:07:24Z",
260///                                             )
261///                                             .unwrap()
262///                                             .with_timezone(&Utc)),
263///                                             value: "calculated value".to_string(),
264///                                         }),
265///                                     },
266///                                 ]),
267///                                 comments: None,
268///                             },
269///                         ]),
270///                     },
271///                     Category {
272///                         name: "Enrollment".to_string(),
273///                         category_type: "normal".to_string(),
274///                         highest_index: 0,
275///                         fields: Some(vec![
276///                             Field {
277///                                 name: "enrollment_closed_date".to_string(),
278///                                 field_type: "popUpCalendar".to_string(),
279///                                 data_type: Some("date".to_string()),
280///                                 error_code: "valid".to_string(),
281///                                 when_created: Some(DateTime::parse_from_rfc3339(
282///                                     "2023-04-15T16:07:14Z",
283///                                 )
284///                                 .unwrap()
285///                                 .with_timezone(&Utc)),
286///                                 keep_history: true,
287///                                 entries: None,
288///                                 comments: None,
289///                             },
290///                             Field {
291///                                 name: "enrollment_open".to_string(),
292///                                 field_type: "radio".to_string(),
293///                                 data_type: Some("string".to_string()),
294///                                 error_code: "valid".to_string(),
295///                                 when_created: Some(DateTime::parse_from_rfc3339(
296///                                     "2023-04-15T16:07:14Z",
297///                                 )
298///                                 .unwrap()
299///                                 .with_timezone(&Utc)),
300///                                 keep_history: true,
301///                                 entries: Some(vec![Entry {
302///                                     entry_id: "1".to_string(),
303///                                     reviewed_by: None,
304///                                     reviewed_by_unique_id: None,
305///                                     reviewed_by_when: None,
306///                                     value: Some(Value {
307///                                         by: "Paul Sanders".to_string(),
308///                                         by_unique_id: Some("1681162687395".to_string()),
309///                                         role: "Project Manager".to_string(),
310///                                         when: Some(DateTime::parse_from_rfc3339(
311///                                             "2023-04-15T16:08:19Z",
312///                                         )
313///                                         .unwrap()
314///                                         .with_timezone(&Utc)),
315///                                         value: "Yes".to_string(),
316///                                     }),
317///                                     reason: None,
318///                                 }]),
319///                                 comments: None,
320///                             },
321///                             Field {
322///                                 name: "enrollment_open_date".to_string(),
323///                                 field_type: "popUpCalendar".to_string(),
324///                                 data_type: Some("date".to_string()),
325///                                 error_code: "valid".to_string(),
326///                                 when_created: Some(DateTime::parse_from_rfc3339(
327///                                     "2023-04-15T16:07:14Z",
328///                                 )
329///                                 .unwrap()
330///                                 .with_timezone(&Utc)),
331///                                 keep_history: true,
332///                                 entries: None,
333///                                 comments: None,
334///                             },
335///                         ]),
336///                     },
337///                 ]),
338///             }]),
339///         },
340///         Site {
341///             name: "Artemis".to_string(),
342///             unique_id: "1691420994591".to_string(),
343///             number_of_patients: 0,
344///             count_of_randomized_patients: 0,
345///             when_created: Some(DateTime::parse_from_rfc3339("2023-08-07T15:14:23Z")
346///                 .unwrap()
347///                 .with_timezone(&Utc)),
348///             creator: "Paul Sanders".to_string(),
349///             number_of_forms: 1,
350///             forms: Some(vec![Form {
351///                 name: "demographic.form.name.site.demographics".to_string(),
352///                 last_modified: Some(
353///                     DateTime::parse_from_rfc3339("2023-08-07T15:14:23Z")
354///                         .unwrap()
355///                         .with_timezone(&Utc),
356///                 ),
357///                 who_last_modified_name: Some("Paul Sanders".to_string()),
358///                 who_last_modified_role: Some("Project Manager".to_string()),
359///                 when_created: 1691420994611,
360///                 has_errors: false,
361///                 has_warnings: false,
362///                 locked: false,
363///                 user: None,
364///                 date_time_changed: None,
365///                 form_title: "Site Demographics".to_string(),
366///                 form_index: 1,
367///                 form_group: Some("Demographic".to_string()),
368///                 form_state: "In-Work".to_string(),
369///                 lock_state: None,
370///                 states: Some(vec![State {
371///                     value: "form.state.in.work".to_string(),
372///                     signer: "Paul Sanders - Project Manager".to_string(),
373///                     signer_unique_id: "1681162687395".to_string(),
374///                     date_signed: Some(
375///                         DateTime::parse_from_rfc3339("2023-08-07T15:14:23Z")
376///                             .unwrap()
377///                             .with_timezone(&Utc),
378///                     ),
379///                 }]),
380///                 categories: Some(vec![Category {
381///                     name: "Demographics".to_string(),
382///                     category_type: "normal".to_string(),
383///                     highest_index: 0,
384///                     fields: Some(vec![Field {
385///                         name: "address".to_string(),
386///                         field_type: "text".to_string(),
387///                         data_type: Some("string".to_string()),
388///                         error_code: "valid".to_string(),
389///                         when_created: Some(DateTime::parse_from_rfc3339("2023-08-07T15:09:54Z")
390///                             .unwrap()
391///                             .with_timezone(&Utc)),
392///                         keep_history: true,
393///                         entries: Some(vec![Entry {
394///                             entry_id: "1".to_string(),
395///                             reviewed_by: None,
396///                             reviewed_by_unique_id: None,
397///                             reviewed_by_when: None,
398///                             value: Some(Value {
399///                                 by: "Paul Sanders".to_string(),
400///                                 by_unique_id: Some("1681162687395".to_string()),
401///                                 role: "Project Manager".to_string(),
402///                                 when: Some(DateTime::parse_from_rfc3339("2023-08-07T15:14:21Z")
403///                                     .unwrap()
404///                                     .with_timezone(&Utc)),
405///                                 value: "1111 Moon Drive".to_string(),
406///                             }),
407///                             reason: None,
408///                         }]),
409///                         comments: Some(vec![Comment {
410///                             comment_id: "1".to_string(),
411///                             value: Some(Value {
412///                                 by: "Paul Sanders".to_string(),
413///                                 by_unique_id: Some("1681162687395".to_string()),
414///                                 role: "Project Manager".to_string(),
415///                                 when: Some(DateTime::parse_from_rfc3339("2023-08-07T15:14:21Z")
416///                                     .unwrap()
417///                                     .with_timezone(&Utc)),
418///                                 value: "Some comment".to_string(),
419///                             }),
420///                         }]),
421///                     }]),
422///                 }]),
423///             }]),
424///         },
425///     ],
426/// };
427/// let result = parse_site_native_string(xml).unwrap();
428/// assert_eq!(result, expected);
429pub fn parse_site_native_string(xml_str: &str) -> Result<SiteNative, Error> {
430    let chunks = extract_site_chunks(xml_str);
431    let sites = chunks
432        .into_par_iter()
433        .map(parse_site_xml)
434        .collect::<Result<Vec<_>, _>>()?;
435    Ok(SiteNative { sites })
436}
437
438/// Parses a Prelude native subject XML file into a `SubjectNative` struct.
439///
440/// # Example
441///
442/// ```
443/// use std::path::Path;
444///
445/// use prelude_xml_parser::parse_subject_native_file;
446///
447/// let file_path = Path::new("tests/assets/subject_native.xml");
448/// let native = parse_subject_native_file(&file_path).unwrap();
449///
450/// assert!(native.patients.len() >= 1, "Vector length is less than 1");
451/// ```
452pub fn parse_subject_native_file(xml_path: &Path) -> Result<SubjectNative, Error> {
453    check_valid_xml_file(xml_path)?;
454
455    let xml_str = read_to_string(xml_path)?;
456    let chunks = extract_patient_chunks(&xml_str);
457    let patients = chunks
458        .into_par_iter()
459        .map(parse_patient_xml)
460        .collect::<Result<Vec<_>, _>>()?;
461    Ok(SubjectNative { patients })
462}
463
464/// Parse a string of Prelude native subject XML into a `SubjectNative` struct.
465///
466/// # Example
467///
468/// ```
469/// use chrono::{DateTime, Utc};
470/// use prelude_xml_parser::parse_subject_native_string;
471/// use prelude_xml_parser::native::common::LockState;
472/// use prelude_xml_parser::native::subject_native::*;
473///
474/// let xml = r#"<export_from_vision_EDC date="30-May-2024 10:35 -0500" createdBy="Paul Sanders" role="Project Manager" numberSubjectsProcessed="4">
475///     <patient patientId="ABC-001" uniqueId="1681574905819" whenCreated="2023-04-15 12:09:02 -0400" creator="Paul Sanders" siteName="Some Site" siteUniqueId="1681574834910" lastLanguage="English" numberOfForms="6">
476///       <form name="day.0.form.name.demographics" lastModified="2023-04-15 12:09:15 -0400" whoLastModifiedName="Paul Sanders" whoLastModifiedRole="Project Manager" whenCreated="1681574905839" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="Demographics" formIndex="1" formGroup="Day 0" formState="In-Work">
477///         <state value="form.state.in.work" signer="Paul Sanders - Project Manager" signerUniqueId="1681162687395" dateSigned="2023-04-15 12:09:02 -0400"/>
478///         <category name="Demographics" type="normal" highestIndex="0">
479///           <field name="breed" type="combo-box" dataType="string" errorCode="valid" whenCreated="2023-04-15 12:08:26 -0400" keepHistory="true">
480///             <entry id="1">
481///               <value by="Paul Sanders" byUniqueId="1681162687395" role="Project Manager" when="2023-04-15 12:09:02 -0400" xml:space="preserve">Labrador</value>
482///             </entry>
483///           </field>
484///         </category>
485///       </form>
486///     </patient>
487///     <patient patientId="DEF-002" uniqueId="1681574905820" whenCreated="2023-04-16 12:10:02 -0400" creator="Wade Watts" siteName="Another Site" siteUniqueId="1681574834911" lastLanguage="" numberOfForms="8">
488///       <form name="day.0.form.name.demographics" lastModified="2023-04-16 12:10:15 -0400" whoLastModifiedName="Barney Rubble" whoLastModifiedRole="Technician" whenCreated="1681574905838" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="Demographics" formIndex="1" formGroup="Day 0" formState="In-Work">
489///         <state value="form.state.in.work" signer="Paul Sanders - Project Manager" signerUniqueId="1681162687395" dateSigned="2023-04-16 12:10:02 -0400"/>
490///         <category name="Demographics" type="normal" highestIndex="0">
491///           <field name="breed" type="combo-box" dataType="string" errorCode="valid" whenCreated="2023-04-15 12:08:26 -0400" keepHistory="true">
492///             <entry id="1">
493///               <value by="Paul Sanders" byUniqueId="1681162687395" role="Project Manager" when="2023-04-15 12:09:02 -0400" xml:space="preserve">Labrador</value>
494///             </entry>
495///           </field>
496///         </category>
497///       </form>
498///     </patient>
499/// </export_from_vision_EDC>
500/// "#;
501///
502/// let expected = SubjectNative {
503///     patients: vec![
504///         Patient {
505///             patient_id: "ABC-001".to_string(),
506///             unique_id: "1681574905819".to_string(),
507///             when_created: Some(DateTime::parse_from_rfc3339("2023-04-15T16:09:02Z")
508///                 .unwrap()
509///                 .with_timezone(&Utc)),
510///             creator: "Paul Sanders".to_string(),
511///             site_name: "Some Site".to_string(),
512///             site_unique_id: "1681574834910".to_string(),
513///             last_language: Some("English".to_string()),
514///             number_of_forms: 6,
515///             forms: Some(vec![Form {
516///                 name: "day.0.form.name.demographics".to_string(),
517///                 last_modified: Some(DateTime::parse_from_rfc3339("2023-04-15T16:09:15Z")
518///                     .unwrap()
519///                     .with_timezone(&Utc)),
520///                 who_last_modified_name: Some("Paul Sanders".to_string()),
521///                 who_last_modified_role: Some("Project Manager".to_string()),
522///                 when_created: 1681574905839,
523///                 has_errors: false,
524///                 has_warnings: false,
525///                 locked: false,
526///                 user: None,
527///                 date_time_changed: None,
528///                 form_title: "Demographics".to_string(),
529///                 form_index: 1,
530///                 form_group: Some("Day 0".to_string()),
531///                 form_state: "In-Work".to_string(),
532///                 lock_state: None,
533///                 states: Some(vec![State {
534///                     value: "form.state.in.work".to_string(),
535///                     signer: "Paul Sanders - Project Manager".to_string(),
536///                     signer_unique_id: "1681162687395".to_string(),
537///                     date_signed: Some(
538///                         DateTime::parse_from_rfc3339("2023-04-15T16:09:02Z")
539///                             .unwrap()
540///                             .with_timezone(&Utc),
541///                     ),
542///                 }]),
543///                 categories: Some(vec![Category {
544///                     name: "Demographics".to_string(),
545///                     category_type: "normal".to_string(),
546///                     highest_index: 0,
547///                     fields: Some(vec![Field {
548///                         name: "breed".to_string(),
549///                         field_type: "combo-box".to_string(),
550///                         data_type: Some("string".to_string()),
551///                         error_code: "valid".to_string(),
552///                         when_created: Some(DateTime::parse_from_rfc3339("2023-04-15T16:08:26Z")
553///                             .unwrap()
554///                             .with_timezone(&Utc)),
555///                         keep_history: true,
556///                         entries: Some(vec![Entry {
557///                             entry_id: "1".to_string(),
558///                             reviewed_by: None,
559///                             reviewed_by_unique_id: None,
560///                             reviewed_by_when: None,
561///                             value: Some(Value {
562///                                 by: "Paul Sanders".to_string(),
563///                                 by_unique_id: Some("1681162687395".to_string()),
564///                                 role: "Project Manager".to_string(),
565///                                 when: Some(DateTime::parse_from_rfc3339("2023-04-15T16:09:02Z")
566///                                     .unwrap()
567///                                     .with_timezone(&Utc)),
568///                                 value: "Labrador".to_string(),
569///                             }),
570///                             reason: None,
571///                         }]),
572///                         comments: None,
573///                     }]),
574///                 }]),
575///             }]),
576///         },
577///         Patient {
578///             patient_id: "DEF-002".to_string(),
579///             unique_id: "1681574905820".to_string(),
580///             when_created: Some(DateTime::parse_from_rfc3339("2023-04-16T16:10:02Z")
581///                 .unwrap()
582///                 .with_timezone(&Utc)),
583///             creator: "Wade Watts".to_string(),
584///             site_name: "Another Site".to_string(),
585///             site_unique_id: "1681574834911".to_string(),
586///             last_language: None,
587///             number_of_forms: 8,
588///             forms: Some(vec![Form {
589///                 name: "day.0.form.name.demographics".to_string(),
590///                 last_modified: Some(DateTime::parse_from_rfc3339("2023-04-16T16:10:15Z")
591///                     .unwrap()
592///                     .with_timezone(&Utc)),
593///                 who_last_modified_name: Some("Barney Rubble".to_string()),
594///                 who_last_modified_role: Some("Technician".to_string()),
595///                 when_created: 1681574905838,
596///                 has_errors: false,
597///                 has_warnings: false,
598///                 locked: false,
599///                 user: None,
600///                 date_time_changed: None,
601///                 form_title: "Demographics".to_string(),
602///                 form_index: 1,
603///                 form_group: Some("Day 0".to_string()),
604///                 form_state: "In-Work".to_string(),
605///                 lock_state: None,
606///                 states: Some(vec![State {
607///                     value: "form.state.in.work".to_string(),
608///                     signer: "Paul Sanders - Project Manager".to_string(),
609///                     signer_unique_id: "1681162687395".to_string(),
610///                     date_signed: Some(
611///                         DateTime::parse_from_rfc3339("2023-04-16T16:10:02Z")
612///                             .unwrap()
613///                             .with_timezone(&Utc),
614///                     ),
615///                 }]),
616///                 categories: Some(vec![Category {
617///                     name: "Demographics".to_string(),
618///                     category_type: "normal".to_string(),
619///                     highest_index: 0,
620///                     fields: Some(vec![Field {
621///                         name: "breed".to_string(),
622///                         field_type: "combo-box".to_string(),
623///                         data_type: Some("string".to_string()),
624///                         error_code: "valid".to_string(),
625///                         when_created: Some(DateTime::parse_from_rfc3339("2023-04-15T16:08:26Z")
626///                             .unwrap()
627///                             .with_timezone(&Utc)),
628///                         keep_history: true,
629///                         entries: Some(vec![Entry {
630///                             entry_id: "1".to_string(),
631///                             reviewed_by: None,
632///                             reviewed_by_unique_id: None,
633///                             reviewed_by_when: None,
634///                             value: Some(Value {
635///                                 by: "Paul Sanders".to_string(),
636///                                 by_unique_id: Some("1681162687395".to_string()),
637///                                 role: "Project Manager".to_string(),
638///                                 when: Some(DateTime::parse_from_rfc3339("2023-04-15T16:09:02Z")
639///                                     .unwrap()
640///                                     .with_timezone(&Utc)),
641///                                 value: "Labrador".to_string(),
642///                             }),
643///                             reason: None,
644///                         }]),
645///                         comments: None,
646///                     }]),
647///                 }]),
648///             }]),
649///         },
650///     ],
651/// };
652/// let result = parse_subject_native_string(xml).unwrap();
653///
654/// assert_eq!(result, expected);
655/// ```
656pub fn parse_subject_native_string(xml_str: &str) -> Result<SubjectNative, Error> {
657    let chunks = extract_patient_chunks(xml_str);
658    let patients = chunks
659        .into_par_iter()
660        .map(parse_patient_xml)
661        .collect::<Result<Vec<_>, _>>()?;
662    Ok(SubjectNative { patients })
663}
664
665fn extract_attributes(e: &BytesStart) -> Result<HashMap<String, String>, Error> {
666    let mut attrs = HashMap::new();
667    for attr in e.attributes() {
668        let attr = attr.map_err(|e| {
669            Error::ParsingError(quick_xml::de::DeError::Custom(format!(
670                "Attribute error: {}",
671                e
672            )))
673        })?;
674        let key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
675        let value = String::from_utf8_lossy(&attr.value).to_string();
676        attrs.insert(key, value);
677    }
678    Ok(attrs)
679}
680
681fn extract_patient_chunks(xml: &str) -> Vec<&str> {
682    let mut chunks = Vec::new();
683    let mut pos = 0;
684    loop {
685        match xml[pos..].find("<patient ") {
686            None => break,
687            Some(rel) => {
688                let start = pos + rel;
689                match xml[start..].find("</patient>") {
690                    None => break,
691                    Some(rel_end) => {
692                        let end = start + rel_end + "</patient>".len();
693                        chunks.push(&xml[start..end]);
694                        pos = end;
695                    }
696                }
697            }
698        }
699    }
700    chunks
701}
702
703fn parse_patient_xml(patient_xml: &str) -> Result<Patient, Error> {
704    let wrapped = format!("<r>{}</r>", patient_xml);
705    let mut xml_reader = Reader::from_reader(Cursor::new(wrapped.as_bytes()));
706    xml_reader.config_mut().trim_text(true);
707
708    let mut current_patient: Option<Patient> = None;
709    let mut current_forms: Vec<Form> = Vec::new();
710    let mut current_form: Option<Form> = None;
711    let mut current_states: Vec<State> = Vec::new();
712    let mut current_categories: Vec<Category> = Vec::new();
713    let mut current_category: Option<Category> = None;
714    let mut current_fields: Vec<Field> = Vec::new();
715    let mut current_field: Option<Field> = None;
716    let mut current_entries: Vec<Entry> = Vec::new();
717    let mut current_entry: Option<Entry> = None;
718    let mut current_comments: Vec<Comment> = Vec::new();
719    let mut current_comment: Option<Comment> = None;
720    let mut current_value: Option<Value> = None;
721    let mut current_reason: Option<Reason> = None;
722    let mut text_content = String::new();
723    let mut in_form = false;
724    let mut in_category = false;
725    let mut in_field = false;
726    let mut in_entry = false;
727    let mut in_comment = false;
728    let mut in_value = false;
729    let mut in_reason = false;
730    let mut buf = Vec::new();
731
732    loop {
733        match xml_reader.read_event_into(&mut buf) {
734            Err(e) => {
735                return Err(Error::ParsingError(quick_xml::de::DeError::Custom(
736                    format!("XML reading error: {}", e),
737                )))
738            }
739            Ok(Event::Eof) => break,
740
741            Ok(Event::Start(ref e)) => {
742                let name_bytes = e.local_name();
743                if let Ok(name) = std::str::from_utf8(name_bytes.as_ref()) {
744                    match name {
745                        "patient" => {
746                            let attrs = extract_attributes(e)?;
747                            current_patient = Some(Patient::from_attributes(attrs)?);
748                            current_forms.clear();
749                        }
750                        "form" if current_patient.is_some() => {
751                            let attrs = extract_attributes(e)?;
752                            current_form = Some(Form::from_attributes(attrs)?);
753                            in_form = true;
754                            current_states.clear();
755                            current_categories.clear();
756                        }
757                        "category" if in_form => {
758                            let attrs = extract_attributes(e)?;
759                            current_category = Some(Category::from_attributes(attrs)?);
760                            in_category = true;
761                            current_fields.clear();
762                        }
763                        "field" if in_category => {
764                            let attrs = extract_attributes(e)?;
765                            current_field = Some(Field::from_attributes(attrs)?);
766                            in_field = true;
767                            current_entries.clear();
768                            current_comments.clear();
769                        }
770                        "entry" if in_field => {
771                            let attrs = extract_attributes(e)?;
772                            current_entry = Some(Entry::from_attributes(attrs)?);
773                            in_entry = true;
774                        }
775                        "comment" if in_field => {
776                            let attrs = extract_attributes(e)?;
777                            let comment_id = attrs.get("id").cloned().unwrap_or_default();
778                            current_comment = Some(Comment {
779                                comment_id,
780                                value: None,
781                            });
782                            in_comment = true;
783                        }
784                        "value" if in_entry || in_comment => {
785                            let attrs = extract_attributes(e)?;
786                            current_value = Some(Value::from_attributes(attrs)?);
787                            in_value = true;
788                            text_content.clear();
789                        }
790                        "reason" if in_entry => {
791                            let attrs = extract_attributes(e)?;
792                            current_reason = Some(Reason::from_attributes(attrs)?);
793                            in_reason = true;
794                            text_content.clear();
795                        }
796                        _ => {}
797                    }
798                }
799            }
800
801            Ok(Event::Text(e)) if (in_value || in_reason) => {
802                text_content.push_str(&String::from_utf8_lossy(&e));
803            }
804
805            Ok(Event::End(ref e)) => {
806                let name_bytes = e.local_name();
807                if let Ok(name) = std::str::from_utf8(name_bytes.as_ref()) {
808                    match name {
809                        "patient" => {
810                            if let Some(mut patient) = current_patient.take() {
811                                if !current_forms.is_empty() {
812                                    patient.set_forms(std::mem::take(&mut current_forms));
813                                }
814                                current_patient = Some(patient);
815                            }
816                        }
817                        "form" if in_form => {
818                            if let Some(mut form) = current_form.take() {
819                                if !current_states.is_empty() {
820                                    form.states = Some(std::mem::take(&mut current_states));
821                                }
822                                if !current_categories.is_empty() {
823                                    form.categories = Some(std::mem::take(&mut current_categories));
824                                }
825                                current_forms.push(form);
826                            }
827                            in_form = false;
828                        }
829                        "category" if in_category => {
830                            if let Some(mut category) = current_category.take() {
831                                if !current_fields.is_empty() {
832                                    category.fields = Some(std::mem::take(&mut current_fields));
833                                }
834                                current_categories.push(category);
835                            }
836                            in_category = false;
837                        }
838                        "field" if in_field => {
839                            if let Some(mut field) = current_field.take() {
840                                if !current_entries.is_empty() {
841                                    field.entries = Some(std::mem::take(&mut current_entries));
842                                }
843                                if !current_comments.is_empty() {
844                                    field.comments = Some(std::mem::take(&mut current_comments));
845                                }
846                                current_fields.push(field);
847                            }
848                            in_field = false;
849                        }
850                        "entry" if in_entry => {
851                            if let Some(entry) = current_entry.take() {
852                                current_entries.push(entry);
853                            }
854                            in_entry = false;
855                        }
856                        "comment" if in_comment => {
857                            if let Some(comment) = current_comment.take() {
858                                current_comments.push(comment);
859                            }
860                            in_comment = false;
861                        }
862                        "value" if in_value => {
863                            if let Some(mut value) = current_value.take() {
864                                value.value = std::mem::take(&mut text_content);
865                                if let Some(ref mut entry) = current_entry {
866                                    entry.value = Some(value);
867                                } else if let Some(ref mut comment) = current_comment {
868                                    comment.value = Some(value);
869                                }
870                            }
871                            in_value = false;
872                        }
873                        "reason" if in_reason => {
874                            if let Some(mut reason) = current_reason.take() {
875                                reason.value = std::mem::take(&mut text_content);
876                                if let Some(ref mut entry) = current_entry {
877                                    entry.reason = Some(reason);
878                                }
879                            }
880                            in_reason = false;
881                        }
882                        _ => {}
883                    }
884                }
885            }
886
887            Ok(Event::Empty(ref e)) => {
888                let name_bytes = e.local_name();
889                if let Ok(name) = std::str::from_utf8(name_bytes.as_ref()) {
890                    match name {
891                        "state" if in_form => {
892                            let attrs = extract_attributes(e)?;
893                            let state = State::from_attributes(attrs)?;
894                            current_states.push(state);
895                        }
896                        "lockState" if in_form => {
897                            let attrs = extract_attributes(e)?;
898                            let lock_state = LockState::from_attributes(attrs)?;
899                            if let Some(ref mut form) = current_form {
900                                form.lock_state = Some(lock_state);
901                            }
902                        }
903                        "value" if in_entry => {
904                            let attrs = extract_attributes(e)?;
905                            let value = Value::from_attributes(attrs)?;
906                            if let Some(ref mut entry) = current_entry {
907                                entry.value = Some(value);
908                            }
909                        }
910                        "reason" if in_entry => {
911                            let attrs = extract_attributes(e)?;
912                            let reason = Reason::from_attributes(attrs)?;
913                            if let Some(ref mut entry) = current_entry {
914                                entry.reason = Some(reason);
915                            }
916                        }
917                        _ => {}
918                    }
919                }
920            }
921
922            _ => {}
923        }
924
925        buf.clear();
926    }
927
928    current_patient.ok_or_else(|| {
929        Error::ParsingError(quick_xml::de::DeError::Custom(
930            "No patient found in chunk".to_string(),
931        ))
932    })
933}
934
935fn extract_site_chunks(xml: &str) -> Vec<&str> {
936    let mut chunks = Vec::new();
937    let mut pos = 0;
938    loop {
939        match xml[pos..].find("<site ") {
940            None => break,
941            Some(rel) => {
942                let start = pos + rel;
943                match xml[start..].find("</site>") {
944                    None => break,
945                    Some(rel_end) => {
946                        let end = start + rel_end + "</site>".len();
947                        chunks.push(&xml[start..end]);
948                        pos = end;
949                    }
950                }
951            }
952        }
953    }
954    chunks
955}
956
957fn parse_site_xml(site_xml: &str) -> Result<Site, Error> {
958    let wrapped = format!("<r>{}</r>", site_xml);
959    let mut xml_reader = Reader::from_reader(Cursor::new(wrapped.as_bytes()));
960    xml_reader.config_mut().trim_text(true);
961
962    let mut current_site: Option<Site> = None;
963    let mut current_forms: Vec<Form> = Vec::new();
964    let mut current_form: Option<Form> = None;
965    let mut current_states: Vec<State> = Vec::new();
966    let mut current_categories: Vec<Category> = Vec::new();
967    let mut current_category: Option<Category> = None;
968    let mut current_fields: Vec<Field> = Vec::new();
969    let mut current_field: Option<Field> = None;
970    let mut current_entries: Vec<Entry> = Vec::new();
971    let mut current_entry: Option<Entry> = None;
972    let mut current_comments: Vec<Comment> = Vec::new();
973    let mut current_comment: Option<Comment> = None;
974    let mut current_value: Option<Value> = None;
975    let mut current_reason: Option<Reason> = None;
976    let mut text_content = String::new();
977    let mut in_form = false;
978    let mut in_category = false;
979    let mut in_field = false;
980    let mut in_entry = false;
981    let mut in_comment = false;
982    let mut in_value = false;
983    let mut in_reason = false;
984    let mut buf = Vec::new();
985
986    loop {
987        match xml_reader.read_event_into(&mut buf) {
988            Err(e) => {
989                return Err(Error::ParsingError(quick_xml::de::DeError::Custom(
990                    format!("XML reading error: {}", e),
991                )))
992            }
993            Ok(Event::Eof) => break,
994
995            Ok(Event::Start(ref e)) => {
996                let name_bytes = e.local_name();
997                if let Ok(name) = std::str::from_utf8(name_bytes.as_ref()) {
998                    match name {
999                        "site" => {
1000                            let attrs = extract_attributes(e)?;
1001                            current_site = Some(Site::from_attributes(attrs)?);
1002                            current_forms.clear();
1003                        }
1004                        "form" if current_site.is_some() => {
1005                            let attrs = extract_attributes(e)?;
1006                            current_form = Some(Form::from_attributes(attrs)?);
1007                            in_form = true;
1008                            current_states.clear();
1009                            current_categories.clear();
1010                        }
1011                        "category" if in_form => {
1012                            let attrs = extract_attributes(e)?;
1013                            current_category = Some(Category::from_attributes(attrs)?);
1014                            in_category = true;
1015                            current_fields.clear();
1016                        }
1017                        "field" if in_category => {
1018                            let attrs = extract_attributes(e)?;
1019                            current_field = Some(Field::from_attributes(attrs)?);
1020                            in_field = true;
1021                            current_entries.clear();
1022                            current_comments.clear();
1023                        }
1024                        "entry" if in_field => {
1025                            let attrs = extract_attributes(e)?;
1026                            current_entry = Some(Entry::from_attributes(attrs)?);
1027                            in_entry = true;
1028                        }
1029                        "comment" if in_field => {
1030                            let attrs = extract_attributes(e)?;
1031                            let comment_id = attrs.get("id").cloned().unwrap_or_default();
1032                            current_comment = Some(Comment {
1033                                comment_id,
1034                                value: None,
1035                            });
1036                            in_comment = true;
1037                        }
1038                        "value" if in_entry || in_comment => {
1039                            let attrs = extract_attributes(e)?;
1040                            current_value = Some(Value::from_attributes(attrs)?);
1041                            in_value = true;
1042                            text_content.clear();
1043                        }
1044                        "reason" if in_entry => {
1045                            let attrs = extract_attributes(e)?;
1046                            current_reason = Some(Reason::from_attributes(attrs)?);
1047                            in_reason = true;
1048                            text_content.clear();
1049                        }
1050                        _ => {}
1051                    }
1052                }
1053            }
1054
1055            Ok(Event::Text(e)) if (in_value || in_reason) => {
1056                text_content.push_str(&String::from_utf8_lossy(&e));
1057            }
1058
1059            Ok(Event::End(ref e)) => {
1060                let name_bytes = e.local_name();
1061                if let Ok(name) = std::str::from_utf8(name_bytes.as_ref()) {
1062                    match name {
1063                        "site" => {
1064                            if let Some(mut site) = current_site.take() {
1065                                if !current_forms.is_empty() {
1066                                    site.set_forms(std::mem::take(&mut current_forms));
1067                                }
1068                                current_site = Some(site);
1069                            }
1070                        }
1071                        "form" if in_form => {
1072                            if let Some(mut form) = current_form.take() {
1073                                if !current_states.is_empty() {
1074                                    form.states = Some(std::mem::take(&mut current_states));
1075                                }
1076                                if !current_categories.is_empty() {
1077                                    form.categories = Some(std::mem::take(&mut current_categories));
1078                                }
1079                                current_forms.push(form);
1080                            }
1081                            in_form = false;
1082                        }
1083                        "category" if in_category => {
1084                            if let Some(mut category) = current_category.take() {
1085                                if !current_fields.is_empty() {
1086                                    category.fields = Some(std::mem::take(&mut current_fields));
1087                                }
1088                                current_categories.push(category);
1089                            }
1090                            in_category = false;
1091                        }
1092                        "field" if in_field => {
1093                            if let Some(mut field) = current_field.take() {
1094                                if !current_entries.is_empty() {
1095                                    field.entries = Some(std::mem::take(&mut current_entries));
1096                                }
1097                                if !current_comments.is_empty() {
1098                                    field.comments = Some(std::mem::take(&mut current_comments));
1099                                }
1100                                current_fields.push(field);
1101                            }
1102                            in_field = false;
1103                        }
1104                        "entry" if in_entry => {
1105                            if let Some(entry) = current_entry.take() {
1106                                current_entries.push(entry);
1107                            }
1108                            in_entry = false;
1109                        }
1110                        "comment" if in_comment => {
1111                            if let Some(comment) = current_comment.take() {
1112                                current_comments.push(comment);
1113                            }
1114                            in_comment = false;
1115                        }
1116                        "value" if in_value => {
1117                            if let Some(mut value) = current_value.take() {
1118                                value.value = std::mem::take(&mut text_content);
1119                                if let Some(ref mut entry) = current_entry {
1120                                    entry.value = Some(value);
1121                                } else if let Some(ref mut comment) = current_comment {
1122                                    comment.value = Some(value);
1123                                }
1124                            }
1125                            in_value = false;
1126                        }
1127                        "reason" if in_reason => {
1128                            if let Some(mut reason) = current_reason.take() {
1129                                reason.value = std::mem::take(&mut text_content);
1130                                if let Some(ref mut entry) = current_entry {
1131                                    entry.reason = Some(reason);
1132                                }
1133                            }
1134                            in_reason = false;
1135                        }
1136                        _ => {}
1137                    }
1138                }
1139            }
1140
1141            Ok(Event::Empty(ref e)) => {
1142                let name_bytes = e.local_name();
1143                if let Ok(name) = std::str::from_utf8(name_bytes.as_ref()) {
1144                    match name {
1145                        "state" if in_form => {
1146                            let attrs = extract_attributes(e)?;
1147                            let state = State::from_attributes(attrs)?;
1148                            current_states.push(state);
1149                        }
1150                        "lockState" if in_form => {
1151                            let attrs = extract_attributes(e)?;
1152                            let lock_state = LockState::from_attributes(attrs)?;
1153                            if let Some(ref mut form) = current_form {
1154                                form.lock_state = Some(lock_state);
1155                            }
1156                        }
1157                        "field" if in_category => {
1158                            let attrs = extract_attributes(e)?;
1159                            let field = Field::from_attributes(attrs)?;
1160                            current_fields.push(field);
1161                        }
1162                        "value" if in_entry => {
1163                            let attrs = extract_attributes(e)?;
1164                            let value = Value::from_attributes(attrs)?;
1165                            if let Some(ref mut entry) = current_entry {
1166                                entry.value = Some(value);
1167                            }
1168                        }
1169                        "reason" if in_entry => {
1170                            let attrs = extract_attributes(e)?;
1171                            let reason = Reason::from_attributes(attrs)?;
1172                            if let Some(ref mut entry) = current_entry {
1173                                entry.reason = Some(reason);
1174                            }
1175                        }
1176                        _ => {}
1177                    }
1178                }
1179            }
1180
1181            _ => {}
1182        }
1183
1184        buf.clear();
1185    }
1186
1187    current_site.ok_or_else(|| {
1188        Error::ParsingError(quick_xml::de::DeError::Custom(
1189            "No site found in chunk".to_string(),
1190        ))
1191    })
1192}
1193
1194/// Parses a Prelude native user XML file into a `UserNative` struct.
1195///
1196/// # Example
1197///
1198/// ```
1199/// use std::path::Path;
1200///
1201/// use prelude_xml_parser::parse_user_native_file;
1202///
1203/// let file_path = Path::new("tests/assets/user_native.xml");
1204/// let native = parse_user_native_file(&file_path).unwrap();
1205///
1206/// assert!(native.users.len() >= 1, "Vector length is less than 1");
1207/// ```
1208pub fn parse_user_native_file(xml_path: &Path) -> Result<UserNative, Error> {
1209    check_valid_xml_file(xml_path)?;
1210
1211    let xml_file = read_to_string(xml_path)?;
1212    let native = parse_user_native_string(&xml_file)?;
1213
1214    Ok(native)
1215}
1216
1217/// Parse a string of Prelude native user XML into a `UserNative` struct.
1218///
1219/// # Example
1220///
1221/// ```
1222/// use chrono::{DateTime, Utc};
1223/// use prelude_xml_parser::parse_user_native_string;
1224/// use prelude_xml_parser::native::user_native::*;
1225///
1226/// let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1227///   <export_from_vision_EDC date="02-Jun-2024 06:59 -0500" createdBy="Paul Sanders" role="Project Manager" numberSubjectsProcessed="3">
1228///     <user uniqueId="1691421275437" lastLanguage="" creator="Paul Sanders(1681162687395)" numberOfForms="1">
1229///       <form name="form.name.demographics" lastModified="2023-08-07 10:15:41 -0500" whoLastModifiedName="Paul Sanders" whoLastModifiedRole="Project Manager" whenCreated="1691421341578" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="User Demographics" formIndex="1" formGroup="" formState="In-Work">
1230///         <state value="form.state.in.work" signer="Paul Sanders - Project Manager" signerUniqueId="1681162687395" dateSigned="2023-08-07 10:15:41 -0500" />
1231///         <category name="demographics" type="normal" highestIndex="0">
1232///           <field name="address" type="text" dataType="string" errorCode="undefined" whenCreated="2024-01-12 14:14:09 -0600" keepHistory="true" />
1233///           <field name="email" type="text" dataType="string" errorCode="undefined" whenCreated="2023-08-07 10:15:41 -0500" keepHistory="true">
1234///             <entry id="1">
1235///               <value by="Paul Sanders" byUniqueId="1681162687395" role="Project Manager" when="2023-08-07 10:15:41 -0500" xml:space="preserve">jazz@artemis.com</value>
1236///             </entry>
1237///           </field>
1238///         </category>
1239///         <category name="Administrative" type="normal" highestIndex="0">
1240///           <field name="study_assignment" type="text" dataType="" errorCode="undefined" whenCreated="2023-08-07 10:15:41 -0500" keepHistory="true">
1241///             <entry id="1">
1242///               <value by="set from calculation" byUniqueId="" role="System" when="2023-08-07 10:15:41 -0500" xml:space="preserve">On 07-Aug-2023 10:15 -0500, Paul Sanders assigned user from another study</value>
1243///               <reason by="set from calculation" byUniqueId="" role="System" when="2023-08-07 10:15:41 -0500" xml:space="preserve">calculated value</reason>
1244///             </entry>
1245///           </field>
1246///         </category>
1247///       </form>
1248///     </user>
1249///   </export_from_vision_EDC>
1250/// "#;
1251///
1252/// let expected = UserNative {
1253///     users: vec![User {
1254///         unique_id: "1691421275437".to_string(),
1255///         last_language: None,
1256///         creator: "Paul Sanders(1681162687395)".to_string(),
1257///         number_of_forms: 1,
1258///         forms: Some(vec![Form {
1259///             name: "form.name.demographics".to_string(),
1260///             last_modified: Some(
1261///                 DateTime::parse_from_rfc3339("2023-08-07T15:15:41Z")
1262///                     .unwrap()
1263///                     .with_timezone(&Utc),
1264///             ),
1265///             who_last_modified_name: Some("Paul Sanders".to_string()),
1266///             who_last_modified_role: Some("Project Manager".to_string()),
1267///             when_created: 1691421341578,
1268///             has_errors: false,
1269///             has_warnings: false,
1270///             locked: false,
1271///             user: None,
1272///             date_time_changed: None,
1273///             form_title: "User Demographics".to_string(),
1274///             form_index: 1,
1275///             form_group: None,
1276///             form_state: "In-Work".to_string(),
1277///             lock_state: None,
1278///             states: Some(vec![State {
1279///                 value: "form.state.in.work".to_string(),
1280///                 signer: "Paul Sanders - Project Manager".to_string(),
1281///                 signer_unique_id: "1681162687395".to_string(),
1282///                 date_signed: Some(
1283///                     DateTime::parse_from_rfc3339("2023-08-07T15:15:41Z")
1284///                         .unwrap()
1285///                         .with_timezone(&Utc),
1286///                 ),
1287///             }]),
1288///             categories: Some(vec![
1289///                         Category {
1290///                             name: "demographics".to_string(),
1291///                             category_type: "normal".to_string(),
1292///                             highest_index: 0,
1293///                             fields: Some(vec![
1294///                                 Field {
1295///                                     name: "address".to_string(),
1296///                                     field_type: "text".to_string(),
1297///                                     data_type: Some("string".to_string()),
1298///                                     error_code: "undefined".to_string(),
1299///                                     when_created: Some(DateTime::parse_from_rfc3339("2024-01-12T20:14:09Z")
1300///                                         .unwrap()
1301///                                         .with_timezone(&Utc)),
1302///                                     keep_history: true,
1303///                                     entries: None,
1304///                                     comments: None,
1305///                                 },
1306///                                 Field {
1307///                                     name: "email".to_string(),
1308///                                     field_type: "text".to_string(),
1309///                                     data_type: Some("string".to_string()),
1310///                                     error_code: "undefined".to_string(),
1311///                                     when_created: Some(DateTime::parse_from_rfc3339("2023-08-07T15:15:41Z")
1312///                                         .unwrap()
1313///                                         .with_timezone(&Utc)),
1314///                                     keep_history: true,
1315///                                     entries: Some(vec![Entry {
1316///                                         entry_id: "1".to_string(),
1317///                                         reviewed_by: None,
1318///                                         reviewed_by_unique_id: None,
1319///                                         reviewed_by_when: None,
1320///                                         value: Some(Value {
1321///                                             by: "Paul Sanders".to_string(),
1322///                                             by_unique_id: Some("1681162687395".to_string()),
1323///                                             role: "Project Manager".to_string(),
1324///                                             when: Some(DateTime::parse_from_rfc3339("2023-08-07T15:15:41Z")
1325///                                                 .unwrap()
1326///                                                 .with_timezone(&Utc)),
1327///                                             value: "jazz@artemis.com".to_string(),
1328///                                         }),
1329///                                         reason: None,
1330///                                     }]),
1331///                                     comments: None,
1332///                                 },
1333///                             ]),
1334///                         },
1335///                         Category {
1336///                             name: "Administrative".to_string(),
1337///                             category_type: "normal".to_string(),
1338///                             highest_index: 0,
1339///                             fields: Some(vec![
1340///                                 Field {
1341///                                     name: "study_assignment".to_string(),
1342///                                     field_type: "text".to_string(),
1343///                                     data_type: None,
1344///                                     error_code: "undefined".to_string(),
1345///                                     when_created: Some(DateTime::parse_from_rfc3339("2023-08-07T15:15:41Z")
1346///                                         .unwrap()
1347///                                         .with_timezone(&Utc)),
1348///                                     keep_history: true,
1349///                                     entries: Some(vec![
1350///                                         Entry {
1351///                                             entry_id: "1".to_string(),
1352///                                             reviewed_by: None,
1353///                                             reviewed_by_unique_id: None,
1354///                                             reviewed_by_when: None,
1355///                                             value: Some(Value {
1356///                                                 by: "set from calculation".to_string(),
1357///                                                 by_unique_id: None,
1358///                                                 role: "System".to_string(),
1359///                                                 when: Some(DateTime::parse_from_rfc3339("2023-08-07T15:15:41Z")
1360///                                                     .unwrap()
1361///                                                     .with_timezone(&Utc)),
1362///                                                 value: "On 07-Aug-2023 10:15 -0500, Paul Sanders assigned user from another study".to_string(),
1363///                                             }),
1364///                                             reason: Some(Reason {
1365///                                                 by: "set from calculation".to_string(),
1366///                                                 by_unique_id: None,
1367///                                                 role: "System".to_string(),
1368///                                                 when: Some(DateTime::parse_from_rfc3339("2023-08-07T15:15:41Z")
1369///                                                     .unwrap()
1370///                                                     .with_timezone(&Utc)),
1371///                                                 value: "calculated value".to_string(),
1372///                                             }),
1373///                                         },
1374///                                     ]),
1375///                                     comments: None,
1376///                                 },
1377///                             ]),
1378///                         },
1379///             ]),
1380///         }]),
1381///     }],
1382/// };
1383///
1384/// let result = parse_user_native_string(xml).unwrap();
1385///
1386/// assert_eq!(result, expected);
1387/// ```
1388pub fn parse_user_native_string(xml_str: &str) -> Result<UserNative, Error> {
1389    let chunks = extract_user_chunks(xml_str);
1390    let users = chunks
1391        .into_par_iter()
1392        .map(parse_user_xml)
1393        .collect::<Result<Vec<_>, _>>()?;
1394    Ok(UserNative { users })
1395}
1396
1397fn extract_user_chunks(xml: &str) -> Vec<&str> {
1398    let mut chunks = Vec::new();
1399    let mut pos = 0;
1400    loop {
1401        match xml[pos..].find("<user ") {
1402            None => break,
1403            Some(rel) => {
1404                let start = pos + rel;
1405                match xml[start..].find("</user>") {
1406                    None => break,
1407                    Some(rel_end) => {
1408                        let end = start + rel_end + "</user>".len();
1409                        chunks.push(&xml[start..end]);
1410                        pos = end;
1411                    }
1412                }
1413            }
1414        }
1415    }
1416    chunks
1417}
1418
1419fn parse_user_xml(user_xml: &str) -> Result<User, Error> {
1420    let wrapped = format!("<r>{}</r>", user_xml);
1421    let mut xml_reader = Reader::from_reader(Cursor::new(wrapped.as_bytes()));
1422    xml_reader.config_mut().trim_text(true);
1423
1424    let mut current_user: Option<User> = None;
1425    let mut current_forms: Vec<Form> = Vec::new();
1426    let mut current_form: Option<Form> = None;
1427    let mut current_states: Vec<State> = Vec::new();
1428    let mut current_categories: Vec<Category> = Vec::new();
1429    let mut current_category: Option<Category> = None;
1430    let mut current_fields: Vec<Field> = Vec::new();
1431    let mut current_field: Option<Field> = None;
1432    let mut current_entries: Vec<Entry> = Vec::new();
1433    let mut current_entry: Option<Entry> = None;
1434    let mut current_comments: Vec<Comment> = Vec::new();
1435    let mut current_comment: Option<Comment> = None;
1436    let mut current_value: Option<Value> = None;
1437    let mut current_reason: Option<Reason> = None;
1438    let mut text_content = String::new();
1439    let mut in_form = false;
1440    let mut in_category = false;
1441    let mut in_field = false;
1442    let mut in_entry = false;
1443    let mut in_comment = false;
1444    let mut in_value = false;
1445    let mut in_reason = false;
1446    let mut buf = Vec::new();
1447
1448    loop {
1449        match xml_reader.read_event_into(&mut buf) {
1450            Err(e) => {
1451                return Err(Error::ParsingError(quick_xml::de::DeError::Custom(
1452                    format!("XML reading error: {}", e),
1453                )))
1454            }
1455            Ok(Event::Eof) => break,
1456
1457            Ok(Event::Start(ref e)) => {
1458                let name_bytes = e.local_name();
1459                if let Ok(name) = std::str::from_utf8(name_bytes.as_ref()) {
1460                    match name {
1461                        "user" => {
1462                            let attrs = extract_attributes(e)?;
1463                            current_user = Some(User::from_attributes(attrs)?);
1464                        }
1465                        "form" => {
1466                            let attrs = extract_attributes(e)?;
1467                            current_form = Some(Form::from_attributes(attrs)?);
1468                            in_form = true;
1469                        }
1470                        "category" if in_form => {
1471                            let attrs = extract_attributes(e)?;
1472                            current_category = Some(Category::from_attributes(attrs)?);
1473                            in_category = true;
1474                        }
1475                        "field" if in_category => {
1476                            let attrs = extract_attributes(e)?;
1477                            current_field = Some(Field::from_attributes(attrs)?);
1478                            in_field = true;
1479                        }
1480                        "entry" if in_field => {
1481                            let attrs = extract_attributes(e)?;
1482                            current_entry = Some(Entry::from_attributes(attrs)?);
1483                            in_entry = true;
1484                        }
1485                        "comment" if in_field => {
1486                            let attrs = extract_attributes(e)?;
1487                            let comment_id = attrs.get("id").cloned().unwrap_or_default();
1488                            current_comment = Some(Comment {
1489                                comment_id,
1490                                value: None,
1491                            });
1492                            in_comment = true;
1493                        }
1494                        "value" if in_entry || in_comment => {
1495                            let attrs = extract_attributes(e)?;
1496                            current_value = Some(Value::from_attributes(attrs)?);
1497                            in_value = true;
1498                            text_content.clear();
1499                        }
1500                        "reason" if in_entry => {
1501                            let attrs = extract_attributes(e)?;
1502                            current_reason = Some(Reason::from_attributes(attrs)?);
1503                            in_reason = true;
1504                            text_content.clear();
1505                        }
1506                        _ => {}
1507                    }
1508                }
1509            }
1510
1511            Ok(Event::Text(e)) if (in_value || in_reason) => {
1512                text_content.push_str(&String::from_utf8_lossy(&e));
1513            }
1514
1515            Ok(Event::End(ref e)) => {
1516                let name_bytes = e.local_name();
1517                if let Ok(name) = std::str::from_utf8(name_bytes.as_ref()) {
1518                    match name {
1519                        "user" => {
1520                            if let Some(mut user) = current_user.take() {
1521                                if !current_forms.is_empty() {
1522                                    user.set_forms(std::mem::take(&mut current_forms));
1523                                }
1524                                current_user = Some(user);
1525                            }
1526                        }
1527                        "form" if in_form => {
1528                            if let Some(mut form) = current_form.take() {
1529                                if !current_states.is_empty() {
1530                                    form.states = Some(std::mem::take(&mut current_states));
1531                                }
1532                                if !current_categories.is_empty() {
1533                                    form.categories = Some(std::mem::take(&mut current_categories));
1534                                }
1535                                current_forms.push(form);
1536                            }
1537                            in_form = false;
1538                        }
1539                        "category" if in_category => {
1540                            if let Some(mut category) = current_category.take() {
1541                                if !current_fields.is_empty() {
1542                                    category.fields = Some(std::mem::take(&mut current_fields));
1543                                }
1544                                current_categories.push(category);
1545                            }
1546                            in_category = false;
1547                        }
1548                        "field" if in_field => {
1549                            if let Some(mut field) = current_field.take() {
1550                                if !current_entries.is_empty() {
1551                                    field.entries = Some(std::mem::take(&mut current_entries));
1552                                }
1553                                if !current_comments.is_empty() {
1554                                    field.comments = Some(std::mem::take(&mut current_comments));
1555                                }
1556                                current_fields.push(field);
1557                            }
1558                            in_field = false;
1559                        }
1560                        "entry" if in_entry => {
1561                            if let Some(entry) = current_entry.take() {
1562                                current_entries.push(entry);
1563                            }
1564                            in_entry = false;
1565                        }
1566                        "comment" if in_comment => {
1567                            if let Some(comment) = current_comment.take() {
1568                                current_comments.push(comment);
1569                            }
1570                            in_comment = false;
1571                        }
1572                        "value" if in_value => {
1573                            if let Some(mut value) = current_value.take() {
1574                                value.value = std::mem::take(&mut text_content);
1575                                if let Some(ref mut entry) = current_entry {
1576                                    entry.value = Some(value);
1577                                } else if let Some(ref mut comment) = current_comment {
1578                                    comment.value = Some(value);
1579                                }
1580                            }
1581                            in_value = false;
1582                        }
1583                        "reason" if in_reason => {
1584                            if let Some(mut reason) = current_reason.take() {
1585                                reason.value = std::mem::take(&mut text_content);
1586                                if let Some(ref mut entry) = current_entry {
1587                                    entry.reason = Some(reason);
1588                                }
1589                            }
1590                            in_reason = false;
1591                        }
1592                        _ => {}
1593                    }
1594                }
1595            }
1596
1597            Ok(Event::Empty(ref e)) => {
1598                let name_bytes = e.local_name();
1599                if let Ok(name) = std::str::from_utf8(name_bytes.as_ref()) {
1600                    match name {
1601                        "state" if in_form => {
1602                            let attrs = extract_attributes(e)?;
1603                            let state = State::from_attributes(attrs)?;
1604                            current_states.push(state);
1605                        }
1606                        "field" if in_category => {
1607                            let attrs = extract_attributes(e)?;
1608                            let field = Field::from_attributes(attrs)?;
1609                            current_fields.push(field);
1610                        }
1611                        _ => {}
1612                    }
1613                }
1614            }
1615
1616            _ => {}
1617        }
1618        buf.clear();
1619    }
1620
1621    current_user.ok_or_else(|| {
1622        Error::ParsingError(quick_xml::de::DeError::Custom(
1623            "No user element found".to_string(),
1624        ))
1625    })
1626}
1627
1628fn check_valid_xml_file(xml_path: &Path) -> Result<(), Error> {
1629    if !xml_path.exists() {
1630        return Err(Error::FileNotFound(xml_path.to_path_buf()));
1631    }
1632
1633    if let Some(extension) = xml_path.extension() {
1634        if extension != "xml" {
1635            return Err(Error::InvalidFileType(xml_path.to_owned()));
1636        }
1637    } else {
1638        return Err(Error::Unknown);
1639    }
1640
1641    Ok(())
1642}
1643
1644#[cfg(test)]
1645mod tests {
1646    use super::*;
1647    use tempfile::{tempdir, Builder};
1648
1649    #[test]
1650    fn test_site_file_not_found_error() {
1651        let dir = tempdir().unwrap().path().to_path_buf();
1652        let result = parse_site_native_file(&dir);
1653        assert!(result.is_err());
1654        assert!(matches!(result, Err(Error::FileNotFound(_))));
1655    }
1656
1657    #[test]
1658    fn test_site_invalid_file_type_error() {
1659        let file = Builder::new()
1660            .prefix("test")
1661            .suffix(".csv")
1662            .tempfile()
1663            .unwrap();
1664        let result = parse_site_native_file(file.path());
1665
1666        assert!(result.is_err());
1667        assert!(matches!(result, Err(Error::InvalidFileType(_))));
1668    }
1669
1670    #[test]
1671    fn test_subject_file_not_found_error() {
1672        let dir = tempdir().unwrap().path().to_path_buf();
1673        let result = parse_subject_native_file(&dir);
1674        assert!(result.is_err());
1675        assert!(matches!(result, Err(Error::FileNotFound(_))));
1676    }
1677
1678    #[test]
1679    fn test_subject_invalid_file_type_error() {
1680        let file = Builder::new()
1681            .prefix("test")
1682            .suffix(".csv")
1683            .tempfile()
1684            .unwrap();
1685        let result = parse_subject_native_file(file.path());
1686
1687        assert!(result.is_err());
1688        assert!(matches!(result, Err(Error::InvalidFileType(_))));
1689    }
1690
1691    #[test]
1692    fn test_user_file_not_found_error() {
1693        let dir = tempdir().unwrap().path().to_path_buf();
1694        let result = parse_user_native_file(&dir);
1695        assert!(result.is_err());
1696        assert!(matches!(result, Err(Error::FileNotFound(_))));
1697    }
1698
1699    #[test]
1700    fn test_user_invalid_file_type_error() {
1701        let file = Builder::new()
1702            .prefix("test")
1703            .suffix(".csv")
1704            .tempfile()
1705            .unwrap();
1706        let result = parse_user_native_file(file.path());
1707
1708        assert!(result.is_err());
1709        assert!(matches!(result, Err(Error::InvalidFileType(_))));
1710    }
1711
1712    #[test]
1713    fn test_forms_parsing_regression() {
1714        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1715<export_from_vision_EDC date="30-May-2024 10:35 -0500" createdBy="Test User" role="Project Manager" numberSubjectsProcessed="1">
1716    <patient patientId="TEST-001" uniqueId="123456789" whenCreated="2023-04-15 12:09:02 -0400" creator="Test User" siteName="Test Site" siteUniqueId="987654321" lastLanguage="English" numberOfForms="2">
1717        <form name="test.form.1" lastModified="2023-04-15 12:09:15 -0400" whoLastModifiedName="Test User" whoLastModifiedRole="Tester" whenCreated="123456789" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="Test Form 1" formIndex="1" formGroup="Test Group" formState="In-Work">
1718            <state value="form.state.in.work" signer="Test User - Tester" signerUniqueId="111111111" dateSigned="2023-04-15 12:09:02 -0400"/>
1719            <category name="Test Category" type="normal" highestIndex="0">
1720                <field name="test_field" type="text" dataType="string" errorCode="valid" whenCreated="2023-04-15 12:08:26 -0400" keepHistory="true">
1721                    <entry id="1">
1722                        <value by="Test User" byUniqueId="111111111" role="Tester" when="2023-04-15 12:09:02 -0400" xml:space="preserve">Test Value</value>
1723                    </entry>
1724                </field>
1725            </category>
1726        </form>
1727        <form name="test.form.2" lastModified="2023-04-15 12:10:15 -0400" whoLastModifiedName="Test User" whoLastModifiedRole="Tester" whenCreated="123456790" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="Test Form 2" formIndex="2" formGroup="Test Group" formState="Complete">
1728            <state value="form.state.complete" signer="Test User - Tester" signerUniqueId="111111111" dateSigned="2023-04-15 12:10:02 -0400"/>
1729        </form>
1730    </patient>
1731</export_from_vision_EDC>"#;
1732
1733        let result = parse_subject_native_string(xml).expect("Should parse successfully");
1734
1735        assert_eq!(result.patients.len(), 1, "Should have exactly 1 patient");
1736
1737        let patient = &result.patients[0];
1738        assert_eq!(patient.patient_id, "TEST-001");
1739        assert_eq!(patient.number_of_forms, 2);
1740
1741        let forms = patient.forms.as_ref().expect("Patient should have forms");
1742        assert_eq!(forms.len(), 2, "Patient should have exactly 2 forms");
1743
1744        let form1 = &forms[0];
1745        assert_eq!(form1.name, "test.form.1");
1746        assert_eq!(form1.form_title, "Test Form 1");
1747        assert_eq!(form1.form_index, 1);
1748        assert_eq!(form1.form_state, "In-Work");
1749
1750        let states1 = form1.states.as_ref().expect("Form 1 should have states");
1751        assert_eq!(states1.len(), 1);
1752        assert_eq!(states1[0].value, "form.state.in.work");
1753
1754        let categories1 = form1
1755            .categories
1756            .as_ref()
1757            .expect("Form 1 should have categories");
1758        assert_eq!(categories1.len(), 1);
1759        assert_eq!(categories1[0].name, "Test Category");
1760
1761        let fields1 = categories1[0]
1762            .fields
1763            .as_ref()
1764            .expect("Category should have fields");
1765        assert_eq!(fields1.len(), 1);
1766        assert_eq!(fields1[0].name, "test_field");
1767
1768        let entries1 = fields1[0]
1769            .entries
1770            .as_ref()
1771            .expect("Field should have entries");
1772        assert_eq!(entries1.len(), 1);
1773        assert_eq!(entries1[0].entry_id, "1");
1774
1775        let value1 = entries1[0].value.as_ref().expect("Entry should have value");
1776        assert_eq!(value1.value, "Test Value");
1777        assert_eq!(value1.by, "Test User");
1778        assert_eq!(value1.role, "Tester");
1779
1780        let form2 = &forms[1];
1781        assert_eq!(form2.name, "test.form.2");
1782        assert_eq!(form2.form_title, "Test Form 2");
1783        assert_eq!(form2.form_index, 2);
1784        assert_eq!(form2.form_state, "Complete");
1785
1786        let states2 = form2.states.as_ref().expect("Form 2 should have states");
1787        assert_eq!(states2.len(), 1);
1788        assert_eq!(states2[0].value, "form.state.complete");
1789    }
1790
1791    #[test]
1792    fn test_comments_parsing_regression() {
1793        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1794<export_from_vision_EDC date="30-May-2024 10:35 -0500" createdBy="Test User" role="Project Manager" numberSubjectsProcessed="1">
1795    <patient patientId="TEST-002" uniqueId="123456790" whenCreated="2023-04-15 12:09:02 -0400" creator="Test User" siteName="Test Site" siteUniqueId="987654321" lastLanguage="English" numberOfForms="1">
1796        <form name="test.form.with.comments" lastModified="2023-04-15 12:09:15 -0400" whoLastModifiedName="Test User" whoLastModifiedRole="Tester" whenCreated="123456789" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="Test Form With Comments" formIndex="1" formGroup="Test Group" formState="In-Work">
1797            <category name="Test Category" type="normal" highestIndex="0">
1798                <field name="field_with_comments" type="text" dataType="string" errorCode="valid" whenCreated="2023-04-15 12:08:26 -0400" keepHistory="true">
1799                    <entry id="1">
1800                        <value by="Test User" byUniqueId="111111111" role="Tester" when="2023-04-15 12:09:02 -0400" xml:space="preserve">Field Value</value>
1801                    </entry>
1802                    <comment id="1">
1803                        <value by="Test User" byUniqueId="111111111" role="Tester" when="2023-04-15 12:09:05 -0400" xml:space="preserve">First comment</value>
1804                    </comment>
1805                    <comment id="2">
1806                        <value by="Another User" byUniqueId="222222222" role="Reviewer" when="2023-04-15 12:10:00 -0400" xml:space="preserve">Second comment</value>
1807                    </comment>
1808                </field>
1809                <field name="field_without_comments" type="text" dataType="string" errorCode="valid" whenCreated="2023-04-15 12:08:30 -0400" keepHistory="true">
1810                    <entry id="1">
1811                        <value by="Test User" byUniqueId="111111111" role="Tester" when="2023-04-15 12:09:10 -0400" xml:space="preserve">Another Value</value>
1812                    </entry>
1813                </field>
1814            </category>
1815        </form>
1816    </patient>
1817</export_from_vision_EDC>"#;
1818
1819        let result = parse_subject_native_string(xml).expect("Should parse successfully");
1820
1821        assert_eq!(result.patients.len(), 1, "Should have exactly 1 patient");
1822
1823        let patient = &result.patients[0];
1824        let forms = patient.forms.as_ref().expect("Patient should have forms");
1825        let form = &forms[0];
1826        let categories = form
1827            .categories
1828            .as_ref()
1829            .expect("Form should have categories");
1830        let fields = categories[0]
1831            .fields
1832            .as_ref()
1833            .expect("Category should have fields");
1834        assert_eq!(fields.len(), 2, "Should have 2 fields");
1835
1836        let field_with_comments = &fields[0];
1837        assert_eq!(field_with_comments.name, "field_with_comments");
1838
1839        let comments = field_with_comments
1840            .comments
1841            .as_ref()
1842            .expect("Field should have comments");
1843        assert_eq!(comments.len(), 2, "Should have exactly 2 comments");
1844
1845        let comment1 = &comments[0];
1846        assert_eq!(comment1.comment_id, "1");
1847        let comment1_value = comment1
1848            .value
1849            .as_ref()
1850            .expect("Comment 1 should have value");
1851        assert_eq!(comment1_value.value, "First comment");
1852        assert_eq!(comment1_value.by, "Test User");
1853        assert_eq!(comment1_value.role, "Tester");
1854
1855        let comment2 = &comments[1];
1856        assert_eq!(comment2.comment_id, "2");
1857        let comment2_value = comment2
1858            .value
1859            .as_ref()
1860            .expect("Comment 2 should have value");
1861        assert_eq!(comment2_value.value, "Second comment");
1862        assert_eq!(comment2_value.by, "Another User");
1863        assert_eq!(comment2_value.role, "Reviewer");
1864
1865        let field_without_comments = &fields[1];
1866        assert_eq!(field_without_comments.name, "field_without_comments");
1867        assert!(
1868            field_without_comments.comments.is_none(),
1869            "Field without comments should have no comments"
1870        );
1871    }
1872
1873    #[test]
1874    fn test_empty_forms_handling() {
1875        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1876<export_from_vision_EDC date="30-May-2024 10:35 -0500" createdBy="Test User" role="Project Manager" numberSubjectsProcessed="1">
1877    <patient patientId="TEST-003" uniqueId="123456791" whenCreated="2023-04-15 12:09:02 -0400" creator="Test User" siteName="Test Site" siteUniqueId="987654321" lastLanguage="English" numberOfForms="0">
1878    </patient>
1879</export_from_vision_EDC>"#;
1880
1881        let result = parse_subject_native_string(xml).expect("Should parse successfully");
1882
1883        assert_eq!(result.patients.len(), 1, "Should have exactly 1 patient");
1884
1885        let patient = &result.patients[0];
1886        assert_eq!(patient.patient_id, "TEST-003");
1887        assert_eq!(patient.number_of_forms, 0);
1888        assert!(
1889            patient.forms.is_none(),
1890            "Patient with 0 forms should have None for forms"
1891        );
1892    }
1893
1894    #[test]
1895    fn test_large_patient_forms_regression() {
1896        let mut xml = String::from(
1897            r#"<?xml version="1.0" encoding="UTF-8"?>
1898<export_from_vision_EDC date="30-May-2024 10:35 -0500" createdBy="Test User" role="Project Manager" numberSubjectsProcessed="1">
1899    <patient patientId="LARGE-TEST" uniqueId="123456792" whenCreated="2023-04-15 12:09:02 -0400" creator="Test User" siteName="Test Site" siteUniqueId="987654321" lastLanguage="English" numberOfForms="50">"#,
1900        );
1901
1902        for i in 1..=50 {
1903            xml.push_str(&format!(r#"
1904        <form name="test.form.{}" lastModified="2023-04-15 12:09:15 -0400" whoLastModifiedName="Test User" whoLastModifiedRole="Tester" whenCreated="12345678{}" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="Test Form {}" formIndex="{}" formGroup="Test Group" formState="In-Work">
1905            <state value="form.state.in.work" signer="Test User - Tester" signerUniqueId="111111111" dateSigned="2023-04-15 12:09:02 -0400"/>
1906            <category name="Category {}" type="normal" highestIndex="0">
1907                <field name="field_{}" type="text" dataType="string" errorCode="valid" whenCreated="2023-04-15 12:08:26 -0400" keepHistory="true">
1908                    <entry id="1">
1909                        <value by="Test User" byUniqueId="111111111" role="Tester" when="2023-04-15 12:09:02 -0400" xml:space="preserve">Value {}</value>
1910                    </entry>
1911                    <comment id="1">
1912                        <value by="Test User" byUniqueId="111111111" role="Tester" when="2023-04-15 12:09:05 -0400" xml:space="preserve">Comment for form {}</value>
1913                    </comment>
1914                </field>
1915            </category>
1916        </form>"#, i, i, i, i, i, i, i, i));
1917        }
1918
1919        xml.push_str(
1920            r#"
1921    </patient>
1922</export_from_vision_EDC>"#,
1923        );
1924
1925        let result =
1926            parse_subject_native_string(&xml).expect("Should parse large patient successfully");
1927
1928        assert_eq!(result.patients.len(), 1, "Should have exactly 1 patient");
1929
1930        let patient = &result.patients[0];
1931        assert_eq!(patient.patient_id, "LARGE-TEST");
1932        assert_eq!(patient.number_of_forms, 50);
1933
1934        let forms = patient.forms.as_ref().expect("Patient should have forms");
1935        assert_eq!(forms.len(), 50, "Patient should have exactly 50 forms");
1936
1937        for (i, form) in forms.iter().enumerate() {
1938            let form_num = i + 1;
1939            assert_eq!(form.name, format!("test.form.{}", form_num));
1940            assert_eq!(form.form_title, format!("Test Form {}", form_num));
1941            assert_eq!(form.form_index, form_num);
1942
1943            let categories = form
1944                .categories
1945                .as_ref()
1946                .expect("Form should have categories");
1947            assert_eq!(categories.len(), 1);
1948
1949            let fields = categories[0]
1950                .fields
1951                .as_ref()
1952                .expect("Category should have fields");
1953            assert_eq!(fields.len(), 1);
1954
1955            let entries = fields[0]
1956                .entries
1957                .as_ref()
1958                .expect("Field should have entries");
1959            assert_eq!(entries.len(), 1);
1960            assert_eq!(
1961                entries[0].value.as_ref().unwrap().value,
1962                format!("Value {}", form_num)
1963            );
1964
1965            let comments = fields[0]
1966                .comments
1967                .as_ref()
1968                .expect("Field should have comments");
1969            assert_eq!(comments.len(), 1);
1970            assert_eq!(
1971                comments[0].value.as_ref().unwrap().value,
1972                format!("Comment for form {}", form_num)
1973            );
1974        }
1975    }
1976
1977    #[test]
1978    fn test_malformed_datetime_handling() {
1979        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1980<export_from_vision_EDC date="30-May-2024 10:35 -0500" createdBy="Test User" role="Project Manager" numberSubjectsProcessed="1">
1981    <patient patientId="TEST-004" uniqueId="123456793" whenCreated="" creator="Test User" siteName="Test Site" siteUniqueId="987654321" lastLanguage="English" numberOfForms="1">
1982        <form name="test.form.malformed.dates" lastModified="" whoLastModifiedName="Test User" whoLastModifiedRole="Tester" whenCreated="123456789" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="Test Form" formIndex="1" formGroup="Test Group" formState="In-Work">
1983            <category name="Test Category" type="normal" highestIndex="0">
1984                <field name="test_field" type="text" dataType="string" errorCode="valid" whenCreated="" keepHistory="true">
1985                    <entry id="1">
1986                        <value by="Test User" byUniqueId="111111111" role="Tester" when="2023-04-15 12:09:02 -0400" xml:space="preserve">Test Value</value>
1987                    </entry>
1988                </field>
1989            </category>
1990        </form>
1991    </patient>
1992</export_from_vision_EDC>"#;
1993
1994        let result =
1995            parse_subject_native_string(xml).expect("Should handle malformed datetimes gracefully");
1996
1997        assert_eq!(result.patients.len(), 1, "Should have exactly 1 patient");
1998
1999        let patient = &result.patients[0];
2000        assert!(
2001            patient.when_created.is_none(),
2002            "Empty whenCreated should be None"
2003        );
2004
2005        let forms = patient.forms.as_ref().expect("Patient should have forms");
2006        let form = &forms[0];
2007        assert!(
2008            form.last_modified.is_none(),
2009            "Empty lastModified should be None"
2010        );
2011
2012        let categories = form
2013            .categories
2014            .as_ref()
2015            .expect("Form should have categories");
2016        let fields = categories[0]
2017            .fields
2018            .as_ref()
2019            .expect("Category should have fields");
2020        let field = &fields[0];
2021        assert!(
2022            field.when_created.is_none(),
2023            "Empty whenCreated in field should be None"
2024        );
2025    }
2026
2027    #[test]
2028    fn test_empty_datetime_in_value_and_reason() {
2029        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
2030<export_from_vision_EDC date="30-May-2024 10:35 -0500" createdBy="Test User" role="Project Manager" numberSubjectsProcessed="1">
2031  <patient patientId="TEST-001" uniqueId="123456" whenCreated="2023-04-15 12:09:02 -0400" creator="Test User" siteName="Test Site" siteUniqueId="654321" lastLanguage="" numberOfForms="1">
2032    <form name="test.form" lastModified="2023-04-15 12:09:15 -0400" whoLastModifiedName="Test User" whoLastModifiedRole="Manager" whenCreated="1681574905839" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="Test Form" formIndex="1" formGroup="Test" formState="In-Work">
2033      <state value="form.state.in.work" signer="Test User - Manager" signerUniqueId="123456" dateSigned="2023-04-15 12:09:02 -0400" />
2034      <category name="Test Category" type="normal" highestIndex="0">
2035        <field name="test_field" type="text" dataType="string" errorCode="valid" whenCreated="2023-04-15 12:08:26 -0400" keepHistory="true">
2036          <entry id="1">
2037            <value by="Test User" byUniqueId="123456" role="Manager" when="" xml:space="preserve">Test Value</value>
2038            <reason by="Test User" byUniqueId="123456" role="Manager" when="" xml:space="preserve">Test Reason</reason>
2039          </entry>
2040        </field>
2041      </category>
2042    </form>
2043  </patient>
2044</export_from_vision_EDC>"#;
2045
2046        let result = parse_subject_native_string(xml);
2047        assert!(result.is_ok(), "Should parse successfully: {:?}", result);
2048
2049        let native = result.unwrap();
2050        assert_eq!(native.patients.len(), 1, "Should have 1 patient");
2051
2052        let patient = &native.patients[0];
2053        let forms = patient.forms.as_ref().expect("Patient should have forms");
2054        let form = &forms[0];
2055        let categories = form
2056            .categories
2057            .as_ref()
2058            .expect("Form should have categories");
2059        let fields = categories[0]
2060            .fields
2061            .as_ref()
2062            .expect("Category should have fields");
2063        let field = &fields[0];
2064        let entries = field.entries.as_ref().expect("Field should have entries");
2065        let entry = &entries[0];
2066
2067        let value = entry.value.as_ref().expect("Entry should have value");
2068        assert!(
2069            value.when.is_none(),
2070            "Empty when attribute in value should be None"
2071        );
2072        assert_eq!(value.value, "Test Value");
2073
2074        let reason = entry.reason.as_ref().expect("Entry should have reason");
2075        assert!(
2076            reason.when.is_none(),
2077            "Empty when attribute in reason should be None"
2078        );
2079        assert_eq!(reason.value, "Test Reason");
2080    }
2081}