prelude_xml_parser/
lib.rs

1pub mod errors;
2pub mod native;
3
4use std::{
5    collections::HashMap,
6    fs::{read_to_string, File},
7    io::{BufReader, Cursor},
8    path::Path,
9};
10
11use crate::errors::Error;
12use crate::native::{
13    common::{Category, Comment, Entry, Field, LockState, Reason, State, Value},
14    site_native::SiteNative,
15    subject_native::{Form, Patient, SubjectNative},
16    user_native::UserNative,
17};
18use quick_xml::events::{BytesStart, Event};
19use quick_xml::Reader;
20
21/// Parses a Prelude native XML file into a `Native` stuct.
22///
23/// # Example
24///
25/// ```
26/// use std::path::Path;
27///
28/// use prelude_xml_parser::parse_site_native_file;
29///
30/// let file_path = Path::new("tests/assets/site_native.xml");
31/// let native = parse_site_native_file(&file_path).unwrap();
32///
33/// assert!(native.sites.len() >= 1, "Vector length is less than 1");
34/// ```
35pub fn parse_site_native_file(xml_path: &Path) -> Result<SiteNative, Error> {
36    check_valid_xml_file(xml_path)?;
37
38    let xml_file = read_to_string(xml_path)?;
39    let native = parse_site_native_string(&xml_file)?;
40
41    Ok(native)
42}
43
44/// Parse a string of Prelude native site XML into a `SiteNative` struct.
45///
46/// # Example
47///
48/// ```
49/// use chrono::{DateTime, Utc};
50/// use prelude_xml_parser::parse_site_native_string;
51/// use prelude_xml_parser::native::site_native::*;
52///
53/// let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
54/// <export_from_vision_EDC date="01-Jun-2024 18:17 -0500" createdBy="Paul Sanders" role="Project Manager" numberSubjectsProcessed="2">
55///
56///   <site name="Some Site" uniqueId="1681574834910" numberOfPatients="4" countOfRandomizedPatients="0" whenCreated="2023-04-15 12:08:19 -0400" creator="Paul Sanders" numberOfForms="1">
57///     <form name="demographic.form.name.site.demographics" lastModified="2023-04-15 12:08:19 -0400" whoLastModifiedName="Paul Sanders" whoLastModifiedRole="Project Manager" whenCreated="1681574834930" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="Site Demographics" formIndex="1" formGroup="Demographic" formState="In-Work">
58///       <state value="form.state.in.work" signer="Paul Sanders - Project Manager" signerUniqueId="1681162687395" dateSigned="2023-04-15 12:08:19 -0400" />
59///       <category name="Demographics" type="normal" highestIndex="0">
60///         <field name="address" type="text" dataType="string" errorCode="valid" whenCreated="2023-04-15 11:07:14 -0500" keepHistory="true" />
61///         <field name="company" type="text" dataType="string" errorCode="valid" whenCreated="2023-04-15 11:07:14 -0500" keepHistory="true">
62///           <entry id="1">
63///             <value by="Paul Sanders" byUniqueId="1681162687395" role="Project Manager" when="2023-04-15 12:08:19 -0400" xml:space="preserve">Some Company</value>
64///           </entry>
65///         </field>
66///         <field name="site_code_name" type="hidden" dataType="string" errorCode="valid" whenCreated="2023-04-15 11:07:14 -0500" keepHistory="true">
67///           <entry id="1">
68///             <value by="set from calculation" byUniqueId="" role="System" when="2023-04-15 12:08:19 -0400" xml:space="preserve">ABC-Some Site</value>
69///             <reason by="set from calculation" byUniqueId="" role="System" when="2023-04-15 12:08:19 -0400" xml:space="preserve">calculated value</reason>
70///           </entry>
71///           <entry id="2">
72///             <value by="set from calculation" byUniqueId="" role="System" when="2023-04-15 12:07:24 -0400" xml:space="preserve">Some Site</value>
73///             <reason by="set from calculation" byUniqueId="" role="System" when="2023-04-15 12:07:24 -0400" xml:space="preserve">calculated value</reason>
74///           </entry>
75///         </field>
76///       </category>
77///       <category name="Enrollment" type="normal" highestIndex="0">
78///         <field name="enrollment_closed_date" type="popUpCalendar" dataType="date" errorCode="valid" whenCreated="2023-04-15 11:07:14 -0500" keepHistory="true" />
79///         <field name="enrollment_open" type="radio" dataType="string" errorCode="valid" whenCreated="2023-04-15 11:07:14 -0500" keepHistory="true">
80///           <entry id="1">
81///             <value by="Paul Sanders" byUniqueId="1681162687395" role="Project Manager" when="2023-04-15 12:08:19 -0400" xml:space="preserve">Yes</value>
82///           </entry>
83///         </field>
84///         <field name="enrollment_open_date" type="popUpCalendar" dataType="date" errorCode="valid" whenCreated="2023-04-15 11:07:14 -0500" keepHistory="true" />
85///       </category>
86///     </form>
87///   </site>
88///
89///   <site name="Artemis" uniqueId="1691420994591" numberOfPatients="0" countOfRandomizedPatients="0" whenCreated="2023-08-07 08:14:23 -0700" creator="Paul Sanders" numberOfForms="1">
90///     <form name="demographic.form.name.site.demographics" lastModified="2023-08-07 08:14:23 -0700" whoLastModifiedName="Paul Sanders" whoLastModifiedRole="Project Manager" whenCreated="1691420994611" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="Site Demographics" formIndex="1" formGroup="Demographic" formState="In-Work">
91///       <state value="form.state.in.work" signer="Paul Sanders - Project Manager" signerUniqueId="1681162687395" dateSigned="2023-08-07 08:14:23 -0700" />
92///       <category name="Demographics" type="normal" highestIndex="0">
93///         <field name="address" type="text" dataType="string" errorCode="valid" whenCreated="2023-08-07 10:09:54 -0500" keepHistory="true">
94///           <entry id="1">
95///             <value by="Paul Sanders" byUniqueId="1681162687395" role="Project Manager" when="2023-08-07 08:14:21 -0700" xml:space="preserve">1111 Moon Drive</value>
96///           </entry>
97///           <comment id="1">
98///             <value by="Paul Sanders" byUniqueId="1681162687395" role="Project Manager" when="2023-08-07 08:14:21 -0700" xml:space="preserve">Some comment</value>
99///           </comment>
100///         </field>
101///       </category>
102///     </form>
103///   </site>
104///
105/// </export_from_vision_EDC>
106/// "#;
107///
108/// let expected = SiteNative {
109///     sites: vec![
110///         Site {
111///             name: "Some Site".to_string(),
112///             unique_id: "1681574834910".to_string(),
113///             number_of_patients: 4,
114///             count_of_randomized_patients: 0,
115///             when_created: Some(DateTime::parse_from_rfc3339("2023-04-15T16:08:19Z")
116///                 .unwrap()
117///                 .with_timezone(&Utc)),
118///             creator: "Paul Sanders".to_string(),
119///             number_of_forms: 1,
120///             forms: Some(vec![Form {
121///                 name: "demographic.form.name.site.demographics".to_string(),
122///                 last_modified: Some(
123///                     DateTime::parse_from_rfc3339("2023-04-15T16:08:19Z")
124///                         .unwrap()
125///                         .with_timezone(&Utc),
126///                 ),
127///                 who_last_modified_name: Some("Paul Sanders".to_string()),
128///                 who_last_modified_role: Some("Project Manager".to_string()),
129///                 when_created: 1681574834930,
130///                 has_errors: false,
131///                 has_warnings: false,
132///                 locked: false,
133///                 user: None,
134///                 date_time_changed: None,
135///                 form_title: "Site Demographics".to_string(),
136///                 form_index: 1,
137///                 form_group: Some("Demographic".to_string()),
138///                 form_state: "In-Work".to_string(),
139///                 lock_state: None,
140///                 states: Some(vec![State {
141///                     value: "form.state.in.work".to_string(),
142///                     signer: "Paul Sanders - Project Manager".to_string(),
143///                     signer_unique_id: "1681162687395".to_string(),
144///                     date_signed: Some(
145///                         DateTime::parse_from_rfc3339("2023-04-15T16:08:19Z")
146///                             .unwrap()
147///                             .with_timezone(&Utc),
148///                     ),
149///                 }]),
150///                 categories: Some(vec![
151///                     Category {
152///                         name: "Demographics".to_string(),
153///                         category_type: "normal".to_string(),
154///                         highest_index: 0,
155///                         fields: Some(vec![
156///                             Field {
157///                                 name: "address".to_string(),
158///                                 field_type: "text".to_string(),
159///                                 data_type: Some("string".to_string()),
160///                                 error_code: "valid".to_string(),
161///                                 when_created: Some(DateTime::parse_from_rfc3339(
162///                                     "2023-04-15T16:07:14Z",
163///                                 )
164///                                 .unwrap()
165///                                 .with_timezone(&Utc)),
166///                                 keep_history: true,
167///                                 entries: None,
168///                                 comments: None,
169///                             },
170///                             Field {
171///                                 name: "company".to_string(),
172///                                 field_type: "text".to_string(),
173///                                 data_type: Some("string".to_string()),
174///                                 error_code: "valid".to_string(),
175///                                 when_created: Some(DateTime::parse_from_rfc3339(
176///                                     "2023-04-15T16:07:14Z",
177///                                 )
178///                                 .unwrap()
179///                                 .with_timezone(&Utc)),
180///                                 keep_history: true,
181///                                 entries: Some(vec![Entry {
182///                                     entry_id: "1".to_string(),
183///                                     reviewed_by: None,
184///                                     reviewed_by_unique_id: None,
185///                                     reviewed_by_when: None,
186///                                     value: Some(Value {
187///                                         by: "Paul Sanders".to_string(),
188///                                         by_unique_id: Some("1681162687395".to_string()),
189///                                         role: "Project Manager".to_string(),
190///                                         when: Some(DateTime::parse_from_rfc3339(
191///                                             "2023-04-15T16:08:19Z",
192///                                         )
193///                                         .unwrap()
194///                                         .with_timezone(&Utc)),
195///                                         value: "Some Company".to_string(),
196///                                     }),
197///                                     reason: None,
198///                                 }]),
199///                                 comments: None,
200///                             },
201///                             Field {
202///                                 name: "site_code_name".to_string(),
203///                                 field_type: "hidden".to_string(),
204///                                 data_type: Some("string".to_string()),
205///                                 error_code: "valid".to_string(),
206///                                 when_created: Some(DateTime::parse_from_rfc3339(
207///                                     "2023-04-15T16:07:14Z",
208///                                 )
209///                                 .unwrap()
210///                                 .with_timezone(&Utc)),
211///                                 keep_history: true,
212///                                 entries: Some(vec![
213///                                     Entry {
214///                                         entry_id: "1".to_string(),
215///                                         reviewed_by: None,
216///                                         reviewed_by_unique_id: None,
217///                                         reviewed_by_when: None,
218///                                         value: Some(Value {
219///                                             by: "set from calculation".to_string(),
220///                                             by_unique_id: None,
221///                                             role: "System".to_string(),
222///                                             when: Some(DateTime::parse_from_rfc3339(
223///                                                 "2023-04-15T16:08:19Z",
224///                                             )
225///                                             .unwrap()
226///                                             .with_timezone(&Utc)),
227///                                             value: "ABC-Some Site".to_string(),
228///                                         }),
229///                                         reason: Some(Reason {
230///                                             by: "set from calculation".to_string(),
231///                                             by_unique_id: None,
232///                                             role: "System".to_string(),
233///                                             when: Some(DateTime::parse_from_rfc3339(
234///                                                 "2023-04-15T16:08:19Z",
235///                                             )
236///                                             .unwrap()
237///                                             .with_timezone(&Utc)),
238///                                             value: "calculated value".to_string(),
239///                                         }),
240///                                     },
241///                                     Entry {
242///                                         entry_id: "2".to_string(),
243///                                         reviewed_by: None,
244///                                         reviewed_by_unique_id: None,
245///                                         reviewed_by_when: None,
246///                                         value: Some(Value {
247///                                             by: "set from calculation".to_string(),
248///                                             by_unique_id: None,
249///                                             role: "System".to_string(),
250///                                             when: Some(DateTime::parse_from_rfc3339(
251///                                                 "2023-04-15T16:07:24Z",
252///                                             )
253///                                             .unwrap()
254///                                             .with_timezone(&Utc)),
255///                                             value: "Some Site".to_string(),
256///                                         }),
257///                                         reason: Some(Reason {
258///                                             by: "set from calculation".to_string(),
259///                                             by_unique_id: None,
260///                                             role: "System".to_string(),
261///                                             when: Some(DateTime::parse_from_rfc3339(
262///                                                 "2023-04-15T16:07:24Z",
263///                                             )
264///                                             .unwrap()
265///                                             .with_timezone(&Utc)),
266///                                             value: "calculated value".to_string(),
267///                                         }),
268///                                     },
269///                                 ]),
270///                                 comments: None,
271///                             },
272///                         ]),
273///                     },
274///                     Category {
275///                         name: "Enrollment".to_string(),
276///                         category_type: "normal".to_string(),
277///                         highest_index: 0,
278///                         fields: Some(vec![
279///                             Field {
280///                                 name: "enrollment_closed_date".to_string(),
281///                                 field_type: "popUpCalendar".to_string(),
282///                                 data_type: Some("date".to_string()),
283///                                 error_code: "valid".to_string(),
284///                                 when_created: Some(DateTime::parse_from_rfc3339(
285///                                     "2023-04-15T16:07:14Z",
286///                                 )
287///                                 .unwrap()
288///                                 .with_timezone(&Utc)),
289///                                 keep_history: true,
290///                                 entries: None,
291///                                 comments: None,
292///                             },
293///                             Field {
294///                                 name: "enrollment_open".to_string(),
295///                                 field_type: "radio".to_string(),
296///                                 data_type: Some("string".to_string()),
297///                                 error_code: "valid".to_string(),
298///                                 when_created: Some(DateTime::parse_from_rfc3339(
299///                                     "2023-04-15T16:07:14Z",
300///                                 )
301///                                 .unwrap()
302///                                 .with_timezone(&Utc)),
303///                                 keep_history: true,
304///                                 entries: Some(vec![Entry {
305///                                     entry_id: "1".to_string(),
306///                                     reviewed_by: None,
307///                                     reviewed_by_unique_id: None,
308///                                     reviewed_by_when: None,
309///                                     value: Some(Value {
310///                                         by: "Paul Sanders".to_string(),
311///                                         by_unique_id: Some("1681162687395".to_string()),
312///                                         role: "Project Manager".to_string(),
313///                                         when: Some(DateTime::parse_from_rfc3339(
314///                                             "2023-04-15T16:08:19Z",
315///                                         )
316///                                         .unwrap()
317///                                         .with_timezone(&Utc)),
318///                                         value: "Yes".to_string(),
319///                                     }),
320///                                     reason: None,
321///                                 }]),
322///                                 comments: None,
323///                             },
324///                             Field {
325///                                 name: "enrollment_open_date".to_string(),
326///                                 field_type: "popUpCalendar".to_string(),
327///                                 data_type: Some("date".to_string()),
328///                                 error_code: "valid".to_string(),
329///                                 when_created: Some(DateTime::parse_from_rfc3339(
330///                                     "2023-04-15T16:07:14Z",
331///                                 )
332///                                 .unwrap()
333///                                 .with_timezone(&Utc)),
334///                                 keep_history: true,
335///                                 entries: None,
336///                                 comments: None,
337///                             },
338///                         ]),
339///                     },
340///                 ]),
341///             }]),
342///         },
343///         Site {
344///             name: "Artemis".to_string(),
345///             unique_id: "1691420994591".to_string(),
346///             number_of_patients: 0,
347///             count_of_randomized_patients: 0,
348///             when_created: Some(DateTime::parse_from_rfc3339("2023-08-07T15:14:23Z")
349///                 .unwrap()
350///                 .with_timezone(&Utc)),
351///             creator: "Paul Sanders".to_string(),
352///             number_of_forms: 1,
353///             forms: Some(vec![Form {
354///                 name: "demographic.form.name.site.demographics".to_string(),
355///                 last_modified: Some(
356///                     DateTime::parse_from_rfc3339("2023-08-07T15:14:23Z")
357///                         .unwrap()
358///                         .with_timezone(&Utc),
359///                 ),
360///                 who_last_modified_name: Some("Paul Sanders".to_string()),
361///                 who_last_modified_role: Some("Project Manager".to_string()),
362///                 when_created: 1691420994611,
363///                 has_errors: false,
364///                 has_warnings: false,
365///                 locked: false,
366///                 user: None,
367///                 date_time_changed: None,
368///                 form_title: "Site Demographics".to_string(),
369///                 form_index: 1,
370///                 form_group: Some("Demographic".to_string()),
371///                 form_state: "In-Work".to_string(),
372///                 lock_state: None,
373///                 states: Some(vec![State {
374///                     value: "form.state.in.work".to_string(),
375///                     signer: "Paul Sanders - Project Manager".to_string(),
376///                     signer_unique_id: "1681162687395".to_string(),
377///                     date_signed: Some(
378///                         DateTime::parse_from_rfc3339("2023-08-07T15:14:23Z")
379///                             .unwrap()
380///                             .with_timezone(&Utc),
381///                     ),
382///                 }]),
383///                 categories: Some(vec![Category {
384///                     name: "Demographics".to_string(),
385///                     category_type: "normal".to_string(),
386///                     highest_index: 0,
387///                     fields: Some(vec![Field {
388///                         name: "address".to_string(),
389///                         field_type: "text".to_string(),
390///                         data_type: Some("string".to_string()),
391///                         error_code: "valid".to_string(),
392///                         when_created: Some(DateTime::parse_from_rfc3339("2023-08-07T15:09:54Z")
393///                             .unwrap()
394///                             .with_timezone(&Utc)),
395///                         keep_history: true,
396///                         entries: Some(vec![Entry {
397///                             entry_id: "1".to_string(),
398///                             reviewed_by: None,
399///                             reviewed_by_unique_id: None,
400///                             reviewed_by_when: None,
401///                             value: Some(Value {
402///                                 by: "Paul Sanders".to_string(),
403///                                 by_unique_id: Some("1681162687395".to_string()),
404///                                 role: "Project Manager".to_string(),
405///                                 when: Some(DateTime::parse_from_rfc3339("2023-08-07T15:14:21Z")
406///                                     .unwrap()
407///                                     .with_timezone(&Utc)),
408///                                 value: "1111 Moon Drive".to_string(),
409///                             }),
410///                             reason: None,
411///                         }]),
412///                         comments: Some(vec![Comment {
413///                             comment_id: "1".to_string(),
414///                             value: Some(Value {
415///                                 by: "Paul Sanders".to_string(),
416///                                 by_unique_id: Some("1681162687395".to_string()),
417///                                 role: "Project Manager".to_string(),
418///                                 when: Some(DateTime::parse_from_rfc3339("2023-08-07T15:14:21Z")
419///                                     .unwrap()
420///                                     .with_timezone(&Utc)),
421///                                 value: "Some comment".to_string(),
422///                             }),
423///                         }]),
424///                     }]),
425///                 }]),
426///             }]),
427///         },
428///     ],
429/// };
430/// let result = parse_site_native_string(xml).unwrap();
431/// assert_eq!(result, expected);
432pub fn parse_site_native_string(xml_str: &str) -> Result<SiteNative, Error> {
433    let native: SiteNative = quick_xml::de::from_str(xml_str)?;
434
435    Ok(native)
436}
437
438/// Parses a Prelude native subject XML file into a `SubjectNative` stuct.
439///
440/// # Example
441///
442/// ```
443/// use std::path::Path;
444///
445/// use prelude_xml_parser::parse_subject_native_file;
446///
447/// let file_path = Path::new("tests/assets/subject_native.xml");
448/// let native = parse_subject_native_file(&file_path).unwrap();
449///
450/// assert!(native.patients.len() >= 1, "Vector length is less than 1");
451/// ```
452pub fn parse_subject_native_file(xml_path: &Path) -> Result<SubjectNative, Error> {
453    check_valid_xml_file(xml_path)?;
454
455    let file = File::open(xml_path)?;
456    let buf_reader = BufReader::new(file);
457    parse_subject_native_streaming(buf_reader)
458}
459
460/// Parse a string of Prelude native subject XML into a `SubjectNative` struct.
461///
462/// # Example
463///
464/// ```
465/// use chrono::{DateTime, Utc};
466/// use prelude_xml_parser::parse_subject_native_string;
467/// use prelude_xml_parser::native::common::LockState;
468/// use prelude_xml_parser::native::subject_native::*;
469///
470/// let xml = r#"<export_from_vision_EDC date="30-May-2024 10:35 -0500" createdBy="Paul Sanders" role="Project Manager" numberSubjectsProcessed="4">
471///     <patient patientId="ABC-001" uniqueId="1681574905819" whenCreated="2023-04-15 12:09:02 -0400" creator="Paul Sanders" siteName="Some Site" siteUniqueId="1681574834910" lastLanguage="English" numberOfForms="6">
472///       <form name="day.0.form.name.demographics" lastModified="2023-04-15 12:09:15 -0400" whoLastModifiedName="Paul Sanders" whoLastModifiedRole="Project Manager" whenCreated="1681574905839" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="Demographics" formIndex="1" formGroup="Day 0" formState="In-Work">
473///         <state value="form.state.in.work" signer="Paul Sanders - Project Manager" signerUniqueId="1681162687395" dateSigned="2023-04-15 12:09:02 -0400"/>
474///         <category name="Demographics" type="normal" highestIndex="0">
475///           <field name="breed" type="combo-box" dataType="string" errorCode="valid" whenCreated="2023-04-15 12:08:26 -0400" keepHistory="true">
476///             <entry id="1">
477///               <value by="Paul Sanders" byUniqueId="1681162687395" role="Project Manager" when="2023-04-15 12:09:02 -0400" xml:space="preserve">Labrador</value>
478///             </entry>
479///           </field>
480///         </category>
481///       </form>
482///     </patient>
483///     <patient patientId="DEF-002" uniqueId="1681574905820" whenCreated="2023-04-16 12:10:02 -0400" creator="Wade Watts" siteName="Another Site" siteUniqueId="1681574834911" lastLanguage="" numberOfForms="8">
484///       <form name="day.0.form.name.demographics" lastModified="2023-04-16 12:10:15 -0400" whoLastModifiedName="Barney Rubble" whoLastModifiedRole="Technician" whenCreated="1681574905838" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="Demographics" formIndex="1" formGroup="Day 0" formState="In-Work">
485///         <state value="form.state.in.work" signer="Paul Sanders - Project Manager" signerUniqueId="1681162687395" dateSigned="2023-04-16 12:10:02 -0400"/>
486///         <category name="Demographics" type="normal" highestIndex="0">
487///           <field name="breed" type="combo-box" dataType="string" errorCode="valid" whenCreated="2023-04-15 12:08:26 -0400" keepHistory="true">
488///             <entry id="1">
489///               <value by="Paul Sanders" byUniqueId="1681162687395" role="Project Manager" when="2023-04-15 12:09:02 -0400" xml:space="preserve">Labrador</value>
490///             </entry>
491///           </field>
492///         </category>
493///       </form>
494///     </patient>
495/// </export_from_vision_EDC>
496/// "#;
497///
498/// let expected = SubjectNative {
499///     patients: vec![
500///         Patient {
501///             patient_id: "ABC-001".to_string(),
502///             unique_id: "1681574905819".to_string(),
503///             when_created: Some(DateTime::parse_from_rfc3339("2023-04-15T16:09:02Z")
504///                 .unwrap()
505///                 .with_timezone(&Utc)),
506///             creator: "Paul Sanders".to_string(),
507///             site_name: "Some Site".to_string(),
508///             site_unique_id: "1681574834910".to_string(),
509///             last_language: Some("English".to_string()),
510///             number_of_forms: 6,
511///             forms: Some(vec![Form {
512///                 name: "day.0.form.name.demographics".to_string(),
513///                 last_modified: Some(DateTime::parse_from_rfc3339("2023-04-15T16:09:15Z")
514///                     .unwrap()
515///                     .with_timezone(&Utc)),
516///                 who_last_modified_name: Some("Paul Sanders".to_string()),
517///                 who_last_modified_role: Some("Project Manager".to_string()),
518///                 when_created: 1681574905839,
519///                 has_errors: false,
520///                 has_warnings: false,
521///                 locked: false,
522///                 user: None,
523///                 date_time_changed: None,
524///                 form_title: "Demographics".to_string(),
525///                 form_index: 1,
526///                 form_group: Some("Day 0".to_string()),
527///                 form_state: "In-Work".to_string(),
528///                 lock_state: None,
529///                 states: Some(vec![State {
530///                     value: "form.state.in.work".to_string(),
531///                     signer: "Paul Sanders - Project Manager".to_string(),
532///                     signer_unique_id: "1681162687395".to_string(),
533///                     date_signed: Some(
534///                         DateTime::parse_from_rfc3339("2023-04-15T16:09:02Z")
535///                             .unwrap()
536///                             .with_timezone(&Utc),
537///                     ),
538///                 }]),
539///                 categories: Some(vec![Category {
540///                     name: "Demographics".to_string(),
541///                     category_type: "normal".to_string(),
542///                     highest_index: 0,
543///                     fields: Some(vec![Field {
544///                         name: "breed".to_string(),
545///                         field_type: "combo-box".to_string(),
546///                         data_type: Some("string".to_string()),
547///                         error_code: "valid".to_string(),
548///                         when_created: Some(DateTime::parse_from_rfc3339("2023-04-15T16:08:26Z")
549///                             .unwrap()
550///                             .with_timezone(&Utc)),
551///                         keep_history: true,
552///                         entries: Some(vec![Entry {
553///                             entry_id: "1".to_string(),
554///                             reviewed_by: None,
555///                             reviewed_by_unique_id: None,
556///                             reviewed_by_when: None,
557///                             value: Some(Value {
558///                                 by: "Paul Sanders".to_string(),
559///                                 by_unique_id: Some("1681162687395".to_string()),
560///                                 role: "Project Manager".to_string(),
561///                                 when: Some(DateTime::parse_from_rfc3339("2023-04-15T16:09:02Z")
562///                                     .unwrap()
563///                                     .with_timezone(&Utc)),
564///                                 value: "Labrador".to_string(),
565///                             }),
566///                             reason: None,
567///                         }]),
568///                         comments: None,
569///                     }]),
570///                 }]),
571///             }]),
572///         },
573///         Patient {
574///             patient_id: "DEF-002".to_string(),
575///             unique_id: "1681574905820".to_string(),
576///             when_created: Some(DateTime::parse_from_rfc3339("2023-04-16T16:10:02Z")
577///                 .unwrap()
578///                 .with_timezone(&Utc)),
579///             creator: "Wade Watts".to_string(),
580///             site_name: "Another Site".to_string(),
581///             site_unique_id: "1681574834911".to_string(),
582///             last_language: None,
583///             number_of_forms: 8,
584///             forms: Some(vec![Form {
585///                 name: "day.0.form.name.demographics".to_string(),
586///                 last_modified: Some(DateTime::parse_from_rfc3339("2023-04-16T16:10:15Z")
587///                     .unwrap()
588///                     .with_timezone(&Utc)),
589///                 who_last_modified_name: Some("Barney Rubble".to_string()),
590///                 who_last_modified_role: Some("Technician".to_string()),
591///                 when_created: 1681574905838,
592///                 has_errors: false,
593///                 has_warnings: false,
594///                 locked: false,
595///                 user: None,
596///                 date_time_changed: None,
597///                 form_title: "Demographics".to_string(),
598///                 form_index: 1,
599///                 form_group: Some("Day 0".to_string()),
600///                 form_state: "In-Work".to_string(),
601///                 lock_state: None,
602///                 states: Some(vec![State {
603///                     value: "form.state.in.work".to_string(),
604///                     signer: "Paul Sanders - Project Manager".to_string(),
605///                     signer_unique_id: "1681162687395".to_string(),
606///                     date_signed: Some(
607///                         DateTime::parse_from_rfc3339("2023-04-16T16:10:02Z")
608///                             .unwrap()
609///                             .with_timezone(&Utc),
610///                     ),
611///                 }]),
612///                 categories: Some(vec![Category {
613///                     name: "Demographics".to_string(),
614///                     category_type: "normal".to_string(),
615///                     highest_index: 0,
616///                     fields: Some(vec![Field {
617///                         name: "breed".to_string(),
618///                         field_type: "combo-box".to_string(),
619///                         data_type: Some("string".to_string()),
620///                         error_code: "valid".to_string(),
621///                         when_created: Some(DateTime::parse_from_rfc3339("2023-04-15T16:08:26Z")
622///                             .unwrap()
623///                             .with_timezone(&Utc)),
624///                         keep_history: true,
625///                         entries: Some(vec![Entry {
626///                             entry_id: "1".to_string(),
627///                             reviewed_by: None,
628///                             reviewed_by_unique_id: None,
629///                             reviewed_by_when: None,
630///                             value: Some(Value {
631///                                 by: "Paul Sanders".to_string(),
632///                                 by_unique_id: Some("1681162687395".to_string()),
633///                                 role: "Project Manager".to_string(),
634///                                 when: Some(DateTime::parse_from_rfc3339("2023-04-15T16:09:02Z")
635///                                     .unwrap()
636///                                     .with_timezone(&Utc)),
637///                                 value: "Labrador".to_string(),
638///                             }),
639///                             reason: None,
640///                         }]),
641///                         comments: None,
642///                     }]),
643///                 }]),
644///             }]),
645///         },
646///     ],
647/// };
648/// let result = parse_subject_native_string(xml).unwrap();
649///
650/// assert_eq!(result, expected);
651/// ```
652pub fn parse_subject_native_string(xml_str: &str) -> Result<SubjectNative, Error> {
653    parse_subject_native_streaming(Cursor::new(xml_str.as_bytes()))
654}
655
656fn parse_subject_native_streaming<R: std::io::BufRead>(reader: R) -> Result<SubjectNative, Error> {
657    let mut xml_reader = Reader::from_reader(reader);
658    xml_reader.config_mut().trim_text(true);
659
660    let mut patients = Vec::new();
661    let mut buf = Vec::new();
662    let mut text_content = String::new();
663
664    let mut current_patient: Option<Patient> = None;
665    let mut current_forms: Vec<Form> = Vec::new();
666    let mut current_form: Option<Form> = None;
667    let mut current_states: Vec<State> = Vec::new();
668    let mut current_categories: Vec<Category> = Vec::new();
669    let mut current_category: Option<Category> = None;
670    let mut current_fields: Vec<Field> = Vec::new();
671    let mut current_field: Option<Field> = None;
672    let mut current_entries: Vec<Entry> = Vec::new();
673    let mut current_entry: Option<Entry> = None;
674    let mut current_comments: Vec<Comment> = Vec::new();
675    let mut current_comment: Option<Comment> = None;
676    let mut current_value: Option<Value> = None;
677    let mut current_reason: Option<Reason> = None;
678
679    let mut in_patient = false;
680    let mut in_form = false;
681    let mut in_category = false;
682    let mut in_field = false;
683    let mut in_entry = false;
684    let mut in_comment = false;
685    let mut in_value = false;
686    let mut in_reason = false;
687
688    loop {
689        match xml_reader.read_event_into(&mut buf) {
690            Err(e) => {
691                return Err(Error::ParsingError(quick_xml::de::DeError::Custom(
692                    format!("XML reading error: {}", e),
693                )))
694            }
695            Ok(Event::Eof) => break,
696
697            Ok(Event::Start(ref e)) => {
698                let name_bytes = e.local_name();
699                if let Ok(name) = std::str::from_utf8(name_bytes.as_ref()) {
700                    match name {
701                        "patient" => {
702                            let attrs = extract_attributes(e)?;
703                            current_patient = Some(Patient::from_attributes(attrs)?);
704                            in_patient = true;
705                            current_forms.clear();
706                        }
707                        "form" if in_patient => {
708                            let attrs = extract_attributes(e)?;
709                            current_form = Some(Form::from_attributes(attrs)?);
710                            in_form = true;
711                            current_states.clear();
712                            current_categories.clear();
713                        }
714                        "category" if in_form => {
715                            let attrs = extract_attributes(e)?;
716                            current_category = Some(Category::from_attributes(attrs)?);
717                            in_category = true;
718                            current_fields.clear();
719                        }
720                        "field" if in_category => {
721                            let attrs = extract_attributes(e)?;
722                            current_field = Some(Field::from_attributes(attrs)?);
723                            in_field = true;
724                            current_entries.clear();
725                            current_comments.clear();
726                        }
727                        "entry" if in_field => {
728                            let attrs = extract_attributes(e)?;
729                            current_entry = Some(Entry::from_attributes(attrs)?);
730                            in_entry = true;
731                        }
732                        "comment" if in_field => {
733                            let attrs = extract_attributes(e)?;
734                            let comment_id = attrs.get("id").cloned().unwrap_or_default();
735                            current_comment = Some(Comment {
736                                comment_id,
737                                value: None,
738                            });
739                            in_comment = true;
740                        }
741                        "value" if in_entry || in_comment => {
742                            let attrs = extract_attributes(e)?;
743                            current_value = Some(Value::from_attributes(attrs)?);
744                            in_value = true;
745                            text_content.clear();
746                        }
747                        "reason" if in_entry => {
748                            let attrs = extract_attributes(e)?;
749                            current_reason = Some(Reason::from_attributes(attrs)?);
750                            in_reason = true;
751                            text_content.clear();
752                        }
753                        _ => {}
754                    }
755                }
756            }
757
758            Ok(Event::Text(e)) => {
759                if in_value || in_reason {
760                    text_content.push_str(&String::from_utf8_lossy(&e));
761                }
762            }
763
764            Ok(Event::End(ref e)) => {
765                let name_bytes = e.local_name();
766                if let Ok(name) = std::str::from_utf8(name_bytes.as_ref()) {
767                    match name {
768                        "patient" => {
769                            if let Some(mut patient) = current_patient.take() {
770                                if !current_forms.is_empty() {
771                                    patient.set_forms(current_forms.clone());
772                                }
773                                patients.push(patient);
774                            }
775                            in_patient = false;
776                            current_forms.clear();
777                        }
778                        "form" if in_form => {
779                            if let Some(mut form) = current_form.take() {
780                                if !current_states.is_empty() {
781                                    form.states = Some(current_states.clone());
782                                }
783                                if !current_categories.is_empty() {
784                                    form.categories = Some(current_categories.clone());
785                                }
786                                current_forms.push(form);
787                            }
788                            in_form = false;
789                            current_states.clear();
790                            current_categories.clear();
791                        }
792                        "category" if in_category => {
793                            if let Some(mut category) = current_category.take() {
794                                if !current_fields.is_empty() {
795                                    category.fields = Some(current_fields.clone());
796                                }
797                                current_categories.push(category);
798                            }
799                            in_category = false;
800                            current_fields.clear();
801                        }
802                        "field" if in_field => {
803                            if let Some(mut field) = current_field.take() {
804                                if !current_entries.is_empty() {
805                                    field.entries = Some(current_entries.clone());
806                                }
807                                if !current_comments.is_empty() {
808                                    field.comments = Some(current_comments.clone());
809                                }
810                                current_fields.push(field);
811                            }
812                            in_field = false;
813                            current_entries.clear();
814                            current_comments.clear();
815                        }
816                        "entry" if in_entry => {
817                            if let Some(entry) = current_entry.take() {
818                                current_entries.push(entry);
819                            }
820                            in_entry = false;
821                        }
822                        "comment" if in_comment => {
823                            if let Some(comment) = current_comment.take() {
824                                current_comments.push(comment);
825                            }
826                            in_comment = false;
827                        }
828                        "value" if in_value => {
829                            if let Some(mut value) = current_value.take() {
830                                value.value = text_content.clone();
831                                if let Some(ref mut entry) = current_entry {
832                                    entry.value = Some(value.clone());
833                                }
834                                if let Some(ref mut comment) = current_comment {
835                                    comment.value = Some(value);
836                                }
837                            }
838                            in_value = false;
839                            text_content.clear();
840                        }
841                        "reason" if in_reason => {
842                            if let Some(mut reason) = current_reason.take() {
843                                reason.value = text_content.clone();
844                                if let Some(ref mut entry) = current_entry {
845                                    entry.reason = Some(reason);
846                                }
847                            }
848                            in_reason = false;
849                            text_content.clear();
850                        }
851                        _ => {}
852                    }
853                }
854            }
855
856            Ok(Event::Empty(ref e)) => {
857                let name_bytes = e.local_name();
858                if let Ok(name) = std::str::from_utf8(name_bytes.as_ref()) {
859                    match name {
860                        "state" if in_form => {
861                            let attrs = extract_attributes(e)?;
862                            let state = State::from_attributes(attrs)?;
863                            current_states.push(state);
864                        }
865                        "lockState" if in_form => {
866                            let attrs = extract_attributes(e)?;
867                            let lock_state = LockState::from_attributes(attrs)?;
868                            if let Some(ref mut form) = current_form {
869                                form.lock_state = Some(lock_state);
870                            }
871                        }
872                        "value" if in_entry => {
873                            let attrs = extract_attributes(e)?;
874                            let value = Value::from_attributes(attrs)?;
875                            if let Some(ref mut entry) = current_entry {
876                                entry.value = Some(value);
877                            }
878                        }
879                        "reason" if in_entry => {
880                            let attrs = extract_attributes(e)?;
881                            let reason = Reason::from_attributes(attrs)?;
882                            if let Some(ref mut entry) = current_entry {
883                                entry.reason = Some(reason);
884                            }
885                        }
886                        _ => {}
887                    }
888                }
889            }
890
891            _ => {}
892        }
893
894        buf.clear();
895    }
896
897    Ok(SubjectNative { patients })
898}
899
900fn extract_attributes(e: &BytesStart) -> Result<HashMap<String, String>, Error> {
901    let mut attrs = HashMap::new();
902    for attr in e.attributes() {
903        let attr = attr.map_err(|e| {
904            Error::ParsingError(quick_xml::de::DeError::Custom(format!(
905                "Attribute error: {}",
906                e
907            )))
908        })?;
909        let key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
910        let value = String::from_utf8_lossy(&attr.value).to_string();
911        attrs.insert(key, value);
912    }
913    Ok(attrs)
914}
915
916/// Parses a Prelude native user XML file into a `UserNative` stuct.
917///
918/// # Example
919///
920/// ```
921/// use std::path::Path;
922///
923/// use prelude_xml_parser::parse_user_native_file;
924///
925/// let file_path = Path::new("tests/assets/user_native.xml");
926/// let native = parse_user_native_file(&file_path).unwrap();
927///
928/// assert!(native.users.len() >= 1, "Vector length is less than 1");
929/// ```
930pub fn parse_user_native_file(xml_path: &Path) -> Result<UserNative, Error> {
931    check_valid_xml_file(xml_path)?;
932
933    let xml_file = read_to_string(xml_path)?;
934    let native = parse_user_native_string(&xml_file)?;
935
936    Ok(native)
937}
938
939/// Parse a string of Prelude native user XML into a `UserNative` struct.
940///
941/// # Example
942///
943/// ```
944/// use chrono::{DateTime, Utc};
945/// use prelude_xml_parser::parse_user_native_string;
946/// use prelude_xml_parser::native::user_native::*;
947///
948/// let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
949///   <export_from_vision_EDC date="02-Jun-2024 06:59 -0500" createdBy="Paul Sanders" role="Project Manager" numberSubjectsProcessed="3">
950///     <user uniqueId="1691421275437" lastLanguage="" creator="Paul Sanders(1681162687395)" numberOfForms="1">
951///       <form name="form.name.demographics" lastModified="2023-08-07 10:15:41 -0500" whoLastModifiedName="Paul Sanders" whoLastModifiedRole="Project Manager" whenCreated="1691421341578" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="User Demographics" formIndex="1" formGroup="" formState="In-Work">
952///         <state value="form.state.in.work" signer="Paul Sanders - Project Manager" signerUniqueId="1681162687395" dateSigned="2023-08-07 10:15:41 -0500" />
953///         <category name="demographics" type="normal" highestIndex="0">
954///           <field name="address" type="text" dataType="string" errorCode="undefined" whenCreated="2024-01-12 14:14:09 -0600" keepHistory="true" />
955///           <field name="email" type="text" dataType="string" errorCode="undefined" whenCreated="2023-08-07 10:15:41 -0500" keepHistory="true">
956///             <entry id="1">
957///               <value by="Paul Sanders" byUniqueId="1681162687395" role="Project Manager" when="2023-08-07 10:15:41 -0500" xml:space="preserve">jazz@artemis.com</value>
958///             </entry>
959///           </field>
960///         </category>
961///         <category name="Administrative" type="normal" highestIndex="0">
962///           <field name="study_assignment" type="text" dataType="" errorCode="undefined" whenCreated="2023-08-07 10:15:41 -0500" keepHistory="true">
963///             <entry id="1">
964///               <value by="set from calculation" byUniqueId="" role="System" when="2023-08-07 10:15:41 -0500" xml:space="preserve">On 07-Aug-2023 10:15 -0500, Paul Sanders assigned user from another study</value>
965///               <reason by="set from calculation" byUniqueId="" role="System" when="2023-08-07 10:15:41 -0500" xml:space="preserve">calculated value</reason>
966///             </entry>
967///           </field>
968///         </category>
969///       </form>
970///     </user>
971///   </export_from_vision_EDC>
972/// "#;
973///
974/// let expected = UserNative {
975///     users: vec![User {
976///         unique_id: "1691421275437".to_string(),
977///         last_language: None,
978///         creator: "Paul Sanders(1681162687395)".to_string(),
979///         number_of_forms: 1,
980///         forms: Some(vec![Form {
981///             name: "form.name.demographics".to_string(),
982///             last_modified: Some(
983///                 DateTime::parse_from_rfc3339("2023-08-07T15:15:41Z")
984///                     .unwrap()
985///                     .with_timezone(&Utc),
986///             ),
987///             who_last_modified_name: Some("Paul Sanders".to_string()),
988///             who_last_modified_role: Some("Project Manager".to_string()),
989///             when_created: 1691421341578,
990///             has_errors: false,
991///             has_warnings: false,
992///             locked: false,
993///             user: None,
994///             date_time_changed: None,
995///             form_title: "User Demographics".to_string(),
996///             form_index: 1,
997///             form_group: None,
998///             form_state: "In-Work".to_string(),
999///             lock_state: None,
1000///             states: Some(vec![State {
1001///                 value: "form.state.in.work".to_string(),
1002///                 signer: "Paul Sanders - Project Manager".to_string(),
1003///                 signer_unique_id: "1681162687395".to_string(),
1004///                 date_signed: Some(
1005///                     DateTime::parse_from_rfc3339("2023-08-07T15:15:41Z")
1006///                         .unwrap()
1007///                         .with_timezone(&Utc),
1008///                 ),
1009///             }]),
1010///             categories: Some(vec![
1011///                         Category {
1012///                             name: "demographics".to_string(),
1013///                             category_type: "normal".to_string(),
1014///                             highest_index: 0,
1015///                             fields: Some(vec![
1016///                                 Field {
1017///                                     name: "address".to_string(),
1018///                                     field_type: "text".to_string(),
1019///                                     data_type: Some("string".to_string()),
1020///                                     error_code: "undefined".to_string(),
1021///                                     when_created: Some(DateTime::parse_from_rfc3339("2024-01-12T20:14:09Z")
1022///                                         .unwrap()
1023///                                         .with_timezone(&Utc)),
1024///                                     keep_history: true,
1025///                                     entries: None,
1026///                                     comments: None,
1027///                                 },
1028///                                 Field {
1029///                                     name: "email".to_string(),
1030///                                     field_type: "text".to_string(),
1031///                                     data_type: Some("string".to_string()),
1032///                                     error_code: "undefined".to_string(),
1033///                                     when_created: Some(DateTime::parse_from_rfc3339("2023-08-07T15:15:41Z")
1034///                                         .unwrap()
1035///                                         .with_timezone(&Utc)),
1036///                                     keep_history: true,
1037///                                     entries: Some(vec![Entry {
1038///                                         entry_id: "1".to_string(),
1039///                                         reviewed_by: None,
1040///                                         reviewed_by_unique_id: None,
1041///                                         reviewed_by_when: None,
1042///                                         value: Some(Value {
1043///                                             by: "Paul Sanders".to_string(),
1044///                                             by_unique_id: Some("1681162687395".to_string()),
1045///                                             role: "Project Manager".to_string(),
1046///                                             when: Some(DateTime::parse_from_rfc3339("2023-08-07T15:15:41Z")
1047///                                                 .unwrap()
1048///                                                 .with_timezone(&Utc)),
1049///                                             value: "jazz@artemis.com".to_string(),
1050///                                         }),
1051///                                         reason: None,
1052///                                     }]),
1053///                                     comments: None,
1054///                                 },
1055///                             ]),
1056///                         },
1057///                         Category {
1058///                             name: "Administrative".to_string(),
1059///                             category_type: "normal".to_string(),
1060///                             highest_index: 0,
1061///                             fields: Some(vec![
1062///                                 Field {
1063///                                     name: "study_assignment".to_string(),
1064///                                     field_type: "text".to_string(),
1065///                                     data_type: None,
1066///                                     error_code: "undefined".to_string(),
1067///                                     when_created: Some(DateTime::parse_from_rfc3339("2023-08-07T15:15:41Z")
1068///                                         .unwrap()
1069///                                         .with_timezone(&Utc)),
1070///                                     keep_history: true,
1071///                                     entries: Some(vec![
1072///                                         Entry {
1073///                                             entry_id: "1".to_string(),
1074///                                             reviewed_by: None,
1075///                                             reviewed_by_unique_id: None,
1076///                                             reviewed_by_when: None,
1077///                                             value: Some(Value {
1078///                                                 by: "set from calculation".to_string(),
1079///                                                 by_unique_id: None,
1080///                                                 role: "System".to_string(),
1081///                                                 when: Some(DateTime::parse_from_rfc3339("2023-08-07T15:15:41Z")
1082///                                                     .unwrap()
1083///                                                     .with_timezone(&Utc)),
1084///                                                 value: "On 07-Aug-2023 10:15 -0500, Paul Sanders assigned user from another study".to_string(),
1085///                                             }),
1086///                                             reason: Some(Reason {
1087///                                                 by: "set from calculation".to_string(),
1088///                                                 by_unique_id: None,
1089///                                                 role: "System".to_string(),
1090///                                                 when: Some(DateTime::parse_from_rfc3339("2023-08-07T15:15:41Z")
1091///                                                     .unwrap()
1092///                                                     .with_timezone(&Utc)),
1093///                                                 value: "calculated value".to_string(),
1094///                                             }),
1095///                                         },
1096///                                     ]),
1097///                                     comments: None,
1098///                                 },
1099///                             ]),
1100///                         },
1101///             ]),
1102///         }]),
1103///     }],
1104/// };
1105///
1106/// let result = parse_user_native_string(xml).unwrap();
1107///
1108/// assert_eq!(result, expected);
1109/// ```
1110pub fn parse_user_native_string(xml_str: &str) -> Result<UserNative, Error> {
1111    let native: UserNative = quick_xml::de::from_str(xml_str)?;
1112
1113    Ok(native)
1114}
1115
1116fn check_valid_xml_file(xml_path: &Path) -> Result<(), Error> {
1117    if !xml_path.exists() {
1118        return Err(Error::FileNotFound(xml_path.to_path_buf()));
1119    }
1120
1121    if let Some(extension) = xml_path.extension() {
1122        if extension != "xml" {
1123            return Err(Error::InvalidFileType(xml_path.to_owned()));
1124        }
1125    } else {
1126        return Err(Error::Unknown);
1127    }
1128
1129    Ok(())
1130}
1131
1132#[cfg(test)]
1133mod tests {
1134    use super::*;
1135    use tempfile::{tempdir, Builder};
1136
1137    #[test]
1138    fn test_site_file_not_found_error() {
1139        let dir = tempdir().unwrap().path().to_path_buf();
1140        let result = parse_site_native_file(&dir);
1141        assert!(result.is_err());
1142        assert!(matches!(result, Err(Error::FileNotFound(_))));
1143    }
1144
1145    #[test]
1146    fn test_site_invaid_file_type_error() {
1147        let file = Builder::new()
1148            .prefix("test")
1149            .suffix(".csv")
1150            .tempfile()
1151            .unwrap();
1152        let result = parse_site_native_file(file.path());
1153
1154        assert!(result.is_err());
1155        assert!(matches!(result, Err(Error::InvalidFileType(_))));
1156    }
1157
1158    #[test]
1159    fn test_subject_file_not_found_error() {
1160        let dir = tempdir().unwrap().path().to_path_buf();
1161        let result = parse_subject_native_file(&dir);
1162        assert!(result.is_err());
1163        assert!(matches!(result, Err(Error::FileNotFound(_))));
1164    }
1165
1166    #[test]
1167    fn test_subject_invaid_file_type_error() {
1168        let file = Builder::new()
1169            .prefix("test")
1170            .suffix(".csv")
1171            .tempfile()
1172            .unwrap();
1173        let result = parse_subject_native_file(file.path());
1174
1175        assert!(result.is_err());
1176        assert!(matches!(result, Err(Error::InvalidFileType(_))));
1177    }
1178
1179    #[test]
1180    fn test_user_file_not_found_error() {
1181        let dir = tempdir().unwrap().path().to_path_buf();
1182        let result = parse_user_native_file(&dir);
1183        assert!(result.is_err());
1184        assert!(matches!(result, Err(Error::FileNotFound(_))));
1185    }
1186
1187    #[test]
1188    fn test_user_invaid_file_type_error() {
1189        let file = Builder::new()
1190            .prefix("test")
1191            .suffix(".csv")
1192            .tempfile()
1193            .unwrap();
1194        let result = parse_user_native_file(file.path());
1195
1196        assert!(result.is_err());
1197        assert!(matches!(result, Err(Error::InvalidFileType(_))));
1198    }
1199
1200    #[test]
1201    fn test_forms_parsing_regression() {
1202        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1203<export_from_vision_EDC date="30-May-2024 10:35 -0500" createdBy="Test User" role="Project Manager" numberSubjectsProcessed="1">
1204    <patient patientId="TEST-001" uniqueId="123456789" whenCreated="2023-04-15 12:09:02 -0400" creator="Test User" siteName="Test Site" siteUniqueId="987654321" lastLanguage="English" numberOfForms="2">
1205        <form name="test.form.1" lastModified="2023-04-15 12:09:15 -0400" whoLastModifiedName="Test User" whoLastModifiedRole="Tester" whenCreated="123456789" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="Test Form 1" formIndex="1" formGroup="Test Group" formState="In-Work">
1206            <state value="form.state.in.work" signer="Test User - Tester" signerUniqueId="111111111" dateSigned="2023-04-15 12:09:02 -0400"/>
1207            <category name="Test Category" type="normal" highestIndex="0">
1208                <field name="test_field" type="text" dataType="string" errorCode="valid" whenCreated="2023-04-15 12:08:26 -0400" keepHistory="true">
1209                    <entry id="1">
1210                        <value by="Test User" byUniqueId="111111111" role="Tester" when="2023-04-15 12:09:02 -0400" xml:space="preserve">Test Value</value>
1211                    </entry>
1212                </field>
1213            </category>
1214        </form>
1215        <form name="test.form.2" lastModified="2023-04-15 12:10:15 -0400" whoLastModifiedName="Test User" whoLastModifiedRole="Tester" whenCreated="123456790" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="Test Form 2" formIndex="2" formGroup="Test Group" formState="Complete">
1216            <state value="form.state.complete" signer="Test User - Tester" signerUniqueId="111111111" dateSigned="2023-04-15 12:10:02 -0400"/>
1217        </form>
1218    </patient>
1219</export_from_vision_EDC>"#;
1220
1221        let result = parse_subject_native_string(xml).expect("Should parse successfully");
1222
1223        assert_eq!(result.patients.len(), 1, "Should have exactly 1 patient");
1224
1225        let patient = &result.patients[0];
1226        assert_eq!(patient.patient_id, "TEST-001");
1227        assert_eq!(patient.number_of_forms, 2);
1228
1229        let forms = patient.forms.as_ref().expect("Patient should have forms");
1230        assert_eq!(forms.len(), 2, "Patient should have exactly 2 forms");
1231
1232        let form1 = &forms[0];
1233        assert_eq!(form1.name, "test.form.1");
1234        assert_eq!(form1.form_title, "Test Form 1");
1235        assert_eq!(form1.form_index, 1);
1236        assert_eq!(form1.form_state, "In-Work");
1237
1238        let states1 = form1.states.as_ref().expect("Form 1 should have states");
1239        assert_eq!(states1.len(), 1);
1240        assert_eq!(states1[0].value, "form.state.in.work");
1241
1242        let categories1 = form1
1243            .categories
1244            .as_ref()
1245            .expect("Form 1 should have categories");
1246        assert_eq!(categories1.len(), 1);
1247        assert_eq!(categories1[0].name, "Test Category");
1248
1249        let fields1 = categories1[0]
1250            .fields
1251            .as_ref()
1252            .expect("Category should have fields");
1253        assert_eq!(fields1.len(), 1);
1254        assert_eq!(fields1[0].name, "test_field");
1255
1256        let entries1 = fields1[0]
1257            .entries
1258            .as_ref()
1259            .expect("Field should have entries");
1260        assert_eq!(entries1.len(), 1);
1261        assert_eq!(entries1[0].entry_id, "1");
1262
1263        let value1 = entries1[0].value.as_ref().expect("Entry should have value");
1264        assert_eq!(value1.value, "Test Value");
1265        assert_eq!(value1.by, "Test User");
1266        assert_eq!(value1.role, "Tester");
1267
1268        let form2 = &forms[1];
1269        assert_eq!(form2.name, "test.form.2");
1270        assert_eq!(form2.form_title, "Test Form 2");
1271        assert_eq!(form2.form_index, 2);
1272        assert_eq!(form2.form_state, "Complete");
1273
1274        let states2 = form2.states.as_ref().expect("Form 2 should have states");
1275        assert_eq!(states2.len(), 1);
1276        assert_eq!(states2[0].value, "form.state.complete");
1277    }
1278
1279    #[test]
1280    fn test_comments_parsing_regression() {
1281        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1282<export_from_vision_EDC date="30-May-2024 10:35 -0500" createdBy="Test User" role="Project Manager" numberSubjectsProcessed="1">
1283    <patient patientId="TEST-002" uniqueId="123456790" whenCreated="2023-04-15 12:09:02 -0400" creator="Test User" siteName="Test Site" siteUniqueId="987654321" lastLanguage="English" numberOfForms="1">
1284        <form name="test.form.with.comments" lastModified="2023-04-15 12:09:15 -0400" whoLastModifiedName="Test User" whoLastModifiedRole="Tester" whenCreated="123456789" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="Test Form With Comments" formIndex="1" formGroup="Test Group" formState="In-Work">
1285            <category name="Test Category" type="normal" highestIndex="0">
1286                <field name="field_with_comments" type="text" dataType="string" errorCode="valid" whenCreated="2023-04-15 12:08:26 -0400" keepHistory="true">
1287                    <entry id="1">
1288                        <value by="Test User" byUniqueId="111111111" role="Tester" when="2023-04-15 12:09:02 -0400" xml:space="preserve">Field Value</value>
1289                    </entry>
1290                    <comment id="1">
1291                        <value by="Test User" byUniqueId="111111111" role="Tester" when="2023-04-15 12:09:05 -0400" xml:space="preserve">First comment</value>
1292                    </comment>
1293                    <comment id="2">
1294                        <value by="Another User" byUniqueId="222222222" role="Reviewer" when="2023-04-15 12:10:00 -0400" xml:space="preserve">Second comment</value>
1295                    </comment>
1296                </field>
1297                <field name="field_without_comments" type="text" dataType="string" errorCode="valid" whenCreated="2023-04-15 12:08:30 -0400" keepHistory="true">
1298                    <entry id="1">
1299                        <value by="Test User" byUniqueId="111111111" role="Tester" when="2023-04-15 12:09:10 -0400" xml:space="preserve">Another Value</value>
1300                    </entry>
1301                </field>
1302            </category>
1303        </form>
1304    </patient>
1305</export_from_vision_EDC>"#;
1306
1307        let result = parse_subject_native_string(xml).expect("Should parse successfully");
1308
1309        assert_eq!(result.patients.len(), 1, "Should have exactly 1 patient");
1310
1311        let patient = &result.patients[0];
1312        let forms = patient.forms.as_ref().expect("Patient should have forms");
1313        let form = &forms[0];
1314        let categories = form
1315            .categories
1316            .as_ref()
1317            .expect("Form should have categories");
1318        let fields = categories[0]
1319            .fields
1320            .as_ref()
1321            .expect("Category should have fields");
1322        assert_eq!(fields.len(), 2, "Should have 2 fields");
1323
1324        let field_with_comments = &fields[0];
1325        assert_eq!(field_with_comments.name, "field_with_comments");
1326
1327        let comments = field_with_comments
1328            .comments
1329            .as_ref()
1330            .expect("Field should have comments");
1331        assert_eq!(comments.len(), 2, "Should have exactly 2 comments");
1332
1333        let comment1 = &comments[0];
1334        assert_eq!(comment1.comment_id, "1");
1335        let comment1_value = comment1
1336            .value
1337            .as_ref()
1338            .expect("Comment 1 should have value");
1339        assert_eq!(comment1_value.value, "First comment");
1340        assert_eq!(comment1_value.by, "Test User");
1341        assert_eq!(comment1_value.role, "Tester");
1342
1343        let comment2 = &comments[1];
1344        assert_eq!(comment2.comment_id, "2");
1345        let comment2_value = comment2
1346            .value
1347            .as_ref()
1348            .expect("Comment 2 should have value");
1349        assert_eq!(comment2_value.value, "Second comment");
1350        assert_eq!(comment2_value.by, "Another User");
1351        assert_eq!(comment2_value.role, "Reviewer");
1352
1353        let field_without_comments = &fields[1];
1354        assert_eq!(field_without_comments.name, "field_without_comments");
1355        assert!(
1356            field_without_comments.comments.is_none(),
1357            "Field without comments should have no comments"
1358        );
1359    }
1360
1361    #[test]
1362    fn test_empty_forms_handling() {
1363        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1364<export_from_vision_EDC date="30-May-2024 10:35 -0500" createdBy="Test User" role="Project Manager" numberSubjectsProcessed="1">
1365    <patient patientId="TEST-003" uniqueId="123456791" whenCreated="2023-04-15 12:09:02 -0400" creator="Test User" siteName="Test Site" siteUniqueId="987654321" lastLanguage="English" numberOfForms="0">
1366    </patient>
1367</export_from_vision_EDC>"#;
1368
1369        let result = parse_subject_native_string(xml).expect("Should parse successfully");
1370
1371        assert_eq!(result.patients.len(), 1, "Should have exactly 1 patient");
1372
1373        let patient = &result.patients[0];
1374        assert_eq!(patient.patient_id, "TEST-003");
1375        assert_eq!(patient.number_of_forms, 0);
1376        assert!(
1377            patient.forms.is_none(),
1378            "Patient with 0 forms should have None for forms"
1379        );
1380    }
1381
1382    #[test]
1383    fn test_large_patient_forms_regression() {
1384        let mut xml = String::from(
1385            r#"<?xml version="1.0" encoding="UTF-8"?>
1386<export_from_vision_EDC date="30-May-2024 10:35 -0500" createdBy="Test User" role="Project Manager" numberSubjectsProcessed="1">
1387    <patient patientId="LARGE-TEST" uniqueId="123456792" whenCreated="2023-04-15 12:09:02 -0400" creator="Test User" siteName="Test Site" siteUniqueId="987654321" lastLanguage="English" numberOfForms="50">"#,
1388        );
1389
1390        for i in 1..=50 {
1391            xml.push_str(&format!(r#"
1392        <form name="test.form.{}" lastModified="2023-04-15 12:09:15 -0400" whoLastModifiedName="Test User" whoLastModifiedRole="Tester" whenCreated="12345678{}" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="Test Form {}" formIndex="{}" formGroup="Test Group" formState="In-Work">
1393            <state value="form.state.in.work" signer="Test User - Tester" signerUniqueId="111111111" dateSigned="2023-04-15 12:09:02 -0400"/>
1394            <category name="Category {}" type="normal" highestIndex="0">
1395                <field name="field_{}" type="text" dataType="string" errorCode="valid" whenCreated="2023-04-15 12:08:26 -0400" keepHistory="true">
1396                    <entry id="1">
1397                        <value by="Test User" byUniqueId="111111111" role="Tester" when="2023-04-15 12:09:02 -0400" xml:space="preserve">Value {}</value>
1398                    </entry>
1399                    <comment id="1">
1400                        <value by="Test User" byUniqueId="111111111" role="Tester" when="2023-04-15 12:09:05 -0400" xml:space="preserve">Comment for form {}</value>
1401                    </comment>
1402                </field>
1403            </category>
1404        </form>"#, i, i, i, i, i, i, i, i));
1405        }
1406
1407        xml.push_str(
1408            r#"
1409    </patient>
1410</export_from_vision_EDC>"#,
1411        );
1412
1413        let result =
1414            parse_subject_native_string(&xml).expect("Should parse large patient successfully");
1415
1416        assert_eq!(result.patients.len(), 1, "Should have exactly 1 patient");
1417
1418        let patient = &result.patients[0];
1419        assert_eq!(patient.patient_id, "LARGE-TEST");
1420        assert_eq!(patient.number_of_forms, 50);
1421
1422        let forms = patient.forms.as_ref().expect("Patient should have forms");
1423        assert_eq!(forms.len(), 50, "Patient should have exactly 50 forms");
1424
1425        for (i, form) in forms.iter().enumerate() {
1426            let form_num = i + 1;
1427            assert_eq!(form.name, format!("test.form.{}", form_num));
1428            assert_eq!(form.form_title, format!("Test Form {}", form_num));
1429            assert_eq!(form.form_index, form_num);
1430
1431            let categories = form
1432                .categories
1433                .as_ref()
1434                .expect("Form should have categories");
1435            assert_eq!(categories.len(), 1);
1436
1437            let fields = categories[0]
1438                .fields
1439                .as_ref()
1440                .expect("Category should have fields");
1441            assert_eq!(fields.len(), 1);
1442
1443            let entries = fields[0]
1444                .entries
1445                .as_ref()
1446                .expect("Field should have entries");
1447            assert_eq!(entries.len(), 1);
1448            assert_eq!(
1449                entries[0].value.as_ref().unwrap().value,
1450                format!("Value {}", form_num)
1451            );
1452
1453            let comments = fields[0]
1454                .comments
1455                .as_ref()
1456                .expect("Field should have comments");
1457            assert_eq!(comments.len(), 1);
1458            assert_eq!(
1459                comments[0].value.as_ref().unwrap().value,
1460                format!("Comment for form {}", form_num)
1461            );
1462        }
1463    }
1464
1465    #[test]
1466    fn test_malformed_datetime_handling() {
1467        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1468<export_from_vision_EDC date="30-May-2024 10:35 -0500" createdBy="Test User" role="Project Manager" numberSubjectsProcessed="1">
1469    <patient patientId="TEST-004" uniqueId="123456793" whenCreated="" creator="Test User" siteName="Test Site" siteUniqueId="987654321" lastLanguage="English" numberOfForms="1">
1470        <form name="test.form.malformed.dates" lastModified="" whoLastModifiedName="Test User" whoLastModifiedRole="Tester" whenCreated="123456789" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="Test Form" formIndex="1" formGroup="Test Group" formState="In-Work">
1471            <category name="Test Category" type="normal" highestIndex="0">
1472                <field name="test_field" type="text" dataType="string" errorCode="valid" whenCreated="" keepHistory="true">
1473                    <entry id="1">
1474                        <value by="Test User" byUniqueId="111111111" role="Tester" when="2023-04-15 12:09:02 -0400" xml:space="preserve">Test Value</value>
1475                    </entry>
1476                </field>
1477            </category>
1478        </form>
1479    </patient>
1480</export_from_vision_EDC>"#;
1481
1482        let result =
1483            parse_subject_native_string(xml).expect("Should handle malformed datetimes gracefully");
1484
1485        assert_eq!(result.patients.len(), 1, "Should have exactly 1 patient");
1486
1487        let patient = &result.patients[0];
1488        assert!(
1489            patient.when_created.is_none(),
1490            "Empty whenCreated should be None"
1491        );
1492
1493        let forms = patient.forms.as_ref().expect("Patient should have forms");
1494        let form = &forms[0];
1495        assert!(
1496            form.last_modified.is_none(),
1497            "Empty lastModified should be None"
1498        );
1499
1500        let categories = form
1501            .categories
1502            .as_ref()
1503            .expect("Form should have categories");
1504        let fields = categories[0]
1505            .fields
1506            .as_ref()
1507            .expect("Category should have fields");
1508        let field = &fields[0];
1509        assert!(
1510            field.when_created.is_none(),
1511            "Empty whenCreated in field should be None"
1512        );
1513    }
1514
1515    #[test]
1516    fn test_empty_datetime_in_value_and_reason() {
1517        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1518<export_from_vision_EDC date="30-May-2024 10:35 -0500" createdBy="Test User" role="Project Manager" numberSubjectsProcessed="1">
1519  <patient patientId="TEST-001" uniqueId="123456" whenCreated="2023-04-15 12:09:02 -0400" creator="Test User" siteName="Test Site" siteUniqueId="654321" lastLanguage="" numberOfForms="1">
1520    <form name="test.form" lastModified="2023-04-15 12:09:15 -0400" whoLastModifiedName="Test User" whoLastModifiedRole="Manager" whenCreated="1681574905839" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="Test Form" formIndex="1" formGroup="Test" formState="In-Work">
1521      <state value="form.state.in.work" signer="Test User - Manager" signerUniqueId="123456" dateSigned="2023-04-15 12:09:02 -0400" />
1522      <category name="Test Category" type="normal" highestIndex="0">
1523        <field name="test_field" type="text" dataType="string" errorCode="valid" whenCreated="2023-04-15 12:08:26 -0400" keepHistory="true">
1524          <entry id="1">
1525            <value by="Test User" byUniqueId="123456" role="Manager" when="" xml:space="preserve">Test Value</value>
1526            <reason by="Test User" byUniqueId="123456" role="Manager" when="" xml:space="preserve">Test Reason</reason>
1527          </entry>
1528        </field>
1529      </category>
1530    </form>
1531  </patient>
1532</export_from_vision_EDC>"#;
1533
1534        let result = parse_subject_native_string(xml);
1535        assert!(result.is_ok(), "Should parse successfully: {:?}", result);
1536
1537        let native = result.unwrap();
1538        assert_eq!(native.patients.len(), 1, "Should have 1 patient");
1539
1540        let patient = &native.patients[0];
1541        let forms = patient.forms.as_ref().expect("Patient should have forms");
1542        let form = &forms[0];
1543        let categories = form
1544            .categories
1545            .as_ref()
1546            .expect("Form should have categories");
1547        let fields = categories[0]
1548            .fields
1549            .as_ref()
1550            .expect("Category should have fields");
1551        let field = &fields[0];
1552        let entries = field.entries.as_ref().expect("Field should have entries");
1553        let entry = &entries[0];
1554
1555        let value = entry.value.as_ref().expect("Entry should have value");
1556        assert!(
1557            value.when.is_none(),
1558            "Empty when attribute in value should be None"
1559        );
1560        assert_eq!(value.value, "Test Value");
1561
1562        let reason = entry.reason.as_ref().expect("Entry should have reason");
1563        assert!(
1564            reason.when.is_none(),
1565            "Empty when attribute in reason should be None"
1566        );
1567        assert_eq!(reason.value, "Test Reason");
1568    }
1569}