prelude_xml_parser/lib.rs
1pub mod errors;
2pub mod native;
3
4use std::{collections::HashMap, fs::read_to_string, io::Cursor, path::Path};
5
6use rayon::prelude::*;
7
8use crate::errors::Error;
9use crate::native::{
10 common::{Category, Comment, Entry, Field, LockState, Reason, State, Value},
11 site_native::{Site, SiteNative},
12 subject_native::{Form, Patient, SubjectNative},
13 user_native::{User, UserNative},
14};
15use quick_xml::events::{BytesStart, Event};
16use quick_xml::Reader;
17
18/// Parses a Prelude native XML file into a `Native` struct.
19///
20/// # Example
21///
22/// ```
23/// use std::path::Path;
24///
25/// use prelude_xml_parser::parse_site_native_file;
26///
27/// let file_path = Path::new("tests/assets/site_native.xml");
28/// let native = parse_site_native_file(&file_path).unwrap();
29///
30/// assert!(native.sites.len() >= 1, "Vector length is less than 1");
31/// ```
32pub fn parse_site_native_file(xml_path: &Path) -> Result<SiteNative, Error> {
33 check_valid_xml_file(xml_path)?;
34
35 let xml_file = read_to_string(xml_path)?;
36 let native = parse_site_native_string(&xml_file)?;
37
38 Ok(native)
39}
40
41/// Parse a string of Prelude native site XML into a `SiteNative` struct.
42///
43/// # Example
44///
45/// ```
46/// use chrono::{DateTime, Utc};
47/// use prelude_xml_parser::parse_site_native_string;
48/// use prelude_xml_parser::native::site_native::*;
49///
50/// let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
51/// <export_from_vision_EDC date="01-Jun-2024 18:17 -0500" createdBy="Paul Sanders" role="Project Manager" numberSubjectsProcessed="2">
52///
53/// <site name="Some Site" uniqueId="1681574834910" numberOfPatients="4" countOfRandomizedPatients="0" whenCreated="2023-04-15 12:08:19 -0400" creator="Paul Sanders" numberOfForms="1">
54/// <form name="demographic.form.name.site.demographics" lastModified="2023-04-15 12:08:19 -0400" whoLastModifiedName="Paul Sanders" whoLastModifiedRole="Project Manager" whenCreated="1681574834930" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="Site Demographics" formIndex="1" formGroup="Demographic" formState="In-Work">
55/// <state value="form.state.in.work" signer="Paul Sanders - Project Manager" signerUniqueId="1681162687395" dateSigned="2023-04-15 12:08:19 -0400" />
56/// <category name="Demographics" type="normal" highestIndex="0">
57/// <field name="address" type="text" dataType="string" errorCode="valid" whenCreated="2023-04-15 11:07:14 -0500" keepHistory="true" />
58/// <field name="company" type="text" dataType="string" errorCode="valid" whenCreated="2023-04-15 11:07:14 -0500" keepHistory="true">
59/// <entry id="1">
60/// <value by="Paul Sanders" byUniqueId="1681162687395" role="Project Manager" when="2023-04-15 12:08:19 -0400" xml:space="preserve">Some Company</value>
61/// </entry>
62/// </field>
63/// <field name="site_code_name" type="hidden" dataType="string" errorCode="valid" whenCreated="2023-04-15 11:07:14 -0500" keepHistory="true">
64/// <entry id="1">
65/// <value by="set from calculation" byUniqueId="" role="System" when="2023-04-15 12:08:19 -0400" xml:space="preserve">ABC-Some Site</value>
66/// <reason by="set from calculation" byUniqueId="" role="System" when="2023-04-15 12:08:19 -0400" xml:space="preserve">calculated value</reason>
67/// </entry>
68/// <entry id="2">
69/// <value by="set from calculation" byUniqueId="" role="System" when="2023-04-15 12:07:24 -0400" xml:space="preserve">Some Site</value>
70/// <reason by="set from calculation" byUniqueId="" role="System" when="2023-04-15 12:07:24 -0400" xml:space="preserve">calculated value</reason>
71/// </entry>
72/// </field>
73/// </category>
74/// <category name="Enrollment" type="normal" highestIndex="0">
75/// <field name="enrollment_closed_date" type="popUpCalendar" dataType="date" errorCode="valid" whenCreated="2023-04-15 11:07:14 -0500" keepHistory="true" />
76/// <field name="enrollment_open" type="radio" dataType="string" errorCode="valid" whenCreated="2023-04-15 11:07:14 -0500" keepHistory="true">
77/// <entry id="1">
78/// <value by="Paul Sanders" byUniqueId="1681162687395" role="Project Manager" when="2023-04-15 12:08:19 -0400" xml:space="preserve">Yes</value>
79/// </entry>
80/// </field>
81/// <field name="enrollment_open_date" type="popUpCalendar" dataType="date" errorCode="valid" whenCreated="2023-04-15 11:07:14 -0500" keepHistory="true" />
82/// </category>
83/// </form>
84/// </site>
85///
86/// <site name="Artemis" uniqueId="1691420994591" numberOfPatients="0" countOfRandomizedPatients="0" whenCreated="2023-08-07 08:14:23 -0700" creator="Paul Sanders" numberOfForms="1">
87/// <form name="demographic.form.name.site.demographics" lastModified="2023-08-07 08:14:23 -0700" whoLastModifiedName="Paul Sanders" whoLastModifiedRole="Project Manager" whenCreated="1691420994611" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="Site Demographics" formIndex="1" formGroup="Demographic" formState="In-Work">
88/// <state value="form.state.in.work" signer="Paul Sanders - Project Manager" signerUniqueId="1681162687395" dateSigned="2023-08-07 08:14:23 -0700" />
89/// <category name="Demographics" type="normal" highestIndex="0">
90/// <field name="address" type="text" dataType="string" errorCode="valid" whenCreated="2023-08-07 10:09:54 -0500" keepHistory="true">
91/// <entry id="1">
92/// <value by="Paul Sanders" byUniqueId="1681162687395" role="Project Manager" when="2023-08-07 08:14:21 -0700" xml:space="preserve">1111 Moon Drive</value>
93/// </entry>
94/// <comment id="1">
95/// <value by="Paul Sanders" byUniqueId="1681162687395" role="Project Manager" when="2023-08-07 08:14:21 -0700" xml:space="preserve">Some comment</value>
96/// </comment>
97/// </field>
98/// </category>
99/// </form>
100/// </site>
101///
102/// </export_from_vision_EDC>
103/// "#;
104///
105/// let expected = SiteNative {
106/// sites: vec![
107/// Site {
108/// name: "Some Site".to_string(),
109/// unique_id: "1681574834910".to_string(),
110/// number_of_patients: 4,
111/// count_of_randomized_patients: 0,
112/// when_created: Some(DateTime::parse_from_rfc3339("2023-04-15T16:08:19Z")
113/// .unwrap()
114/// .with_timezone(&Utc)),
115/// creator: "Paul Sanders".to_string(),
116/// number_of_forms: 1,
117/// forms: Some(vec![Form {
118/// name: "demographic.form.name.site.demographics".to_string(),
119/// last_modified: Some(
120/// DateTime::parse_from_rfc3339("2023-04-15T16:08:19Z")
121/// .unwrap()
122/// .with_timezone(&Utc),
123/// ),
124/// who_last_modified_name: Some("Paul Sanders".to_string()),
125/// who_last_modified_role: Some("Project Manager".to_string()),
126/// when_created: 1681574834930,
127/// has_errors: false,
128/// has_warnings: false,
129/// locked: false,
130/// user: None,
131/// date_time_changed: None,
132/// form_title: "Site Demographics".to_string(),
133/// form_index: 1,
134/// form_group: Some("Demographic".to_string()),
135/// form_state: "In-Work".to_string(),
136/// lock_state: None,
137/// states: Some(vec![State {
138/// value: "form.state.in.work".to_string(),
139/// signer: "Paul Sanders - Project Manager".to_string(),
140/// signer_unique_id: "1681162687395".to_string(),
141/// date_signed: Some(
142/// DateTime::parse_from_rfc3339("2023-04-15T16:08:19Z")
143/// .unwrap()
144/// .with_timezone(&Utc),
145/// ),
146/// }]),
147/// categories: Some(vec![
148/// Category {
149/// name: "Demographics".to_string(),
150/// category_type: "normal".to_string(),
151/// highest_index: 0,
152/// fields: Some(vec![
153/// Field {
154/// name: "address".to_string(),
155/// field_type: "text".to_string(),
156/// data_type: Some("string".to_string()),
157/// error_code: "valid".to_string(),
158/// when_created: Some(DateTime::parse_from_rfc3339(
159/// "2023-04-15T16:07:14Z",
160/// )
161/// .unwrap()
162/// .with_timezone(&Utc)),
163/// keep_history: true,
164/// entries: None,
165/// comments: None,
166/// },
167/// Field {
168/// name: "company".to_string(),
169/// field_type: "text".to_string(),
170/// data_type: Some("string".to_string()),
171/// error_code: "valid".to_string(),
172/// when_created: Some(DateTime::parse_from_rfc3339(
173/// "2023-04-15T16:07:14Z",
174/// )
175/// .unwrap()
176/// .with_timezone(&Utc)),
177/// keep_history: true,
178/// entries: Some(vec![Entry {
179/// entry_id: "1".to_string(),
180/// reviewed_by: None,
181/// reviewed_by_unique_id: None,
182/// reviewed_by_when: None,
183/// value: Some(Value {
184/// by: "Paul Sanders".to_string(),
185/// by_unique_id: Some("1681162687395".to_string()),
186/// role: "Project Manager".to_string(),
187/// when: Some(DateTime::parse_from_rfc3339(
188/// "2023-04-15T16:08:19Z",
189/// )
190/// .unwrap()
191/// .with_timezone(&Utc)),
192/// value: "Some Company".to_string(),
193/// }),
194/// reason: None,
195/// }]),
196/// comments: None,
197/// },
198/// Field {
199/// name: "site_code_name".to_string(),
200/// field_type: "hidden".to_string(),
201/// data_type: Some("string".to_string()),
202/// error_code: "valid".to_string(),
203/// when_created: Some(DateTime::parse_from_rfc3339(
204/// "2023-04-15T16:07:14Z",
205/// )
206/// .unwrap()
207/// .with_timezone(&Utc)),
208/// keep_history: true,
209/// entries: Some(vec![
210/// Entry {
211/// entry_id: "1".to_string(),
212/// reviewed_by: None,
213/// reviewed_by_unique_id: None,
214/// reviewed_by_when: None,
215/// value: Some(Value {
216/// by: "set from calculation".to_string(),
217/// by_unique_id: None,
218/// role: "System".to_string(),
219/// when: Some(DateTime::parse_from_rfc3339(
220/// "2023-04-15T16:08:19Z",
221/// )
222/// .unwrap()
223/// .with_timezone(&Utc)),
224/// value: "ABC-Some Site".to_string(),
225/// }),
226/// reason: Some(Reason {
227/// by: "set from calculation".to_string(),
228/// by_unique_id: None,
229/// role: "System".to_string(),
230/// when: Some(DateTime::parse_from_rfc3339(
231/// "2023-04-15T16:08:19Z",
232/// )
233/// .unwrap()
234/// .with_timezone(&Utc)),
235/// value: "calculated value".to_string(),
236/// }),
237/// },
238/// Entry {
239/// entry_id: "2".to_string(),
240/// reviewed_by: None,
241/// reviewed_by_unique_id: None,
242/// reviewed_by_when: None,
243/// value: Some(Value {
244/// by: "set from calculation".to_string(),
245/// by_unique_id: None,
246/// role: "System".to_string(),
247/// when: Some(DateTime::parse_from_rfc3339(
248/// "2023-04-15T16:07:24Z",
249/// )
250/// .unwrap()
251/// .with_timezone(&Utc)),
252/// value: "Some Site".to_string(),
253/// }),
254/// reason: Some(Reason {
255/// by: "set from calculation".to_string(),
256/// by_unique_id: None,
257/// role: "System".to_string(),
258/// when: Some(DateTime::parse_from_rfc3339(
259/// "2023-04-15T16:07:24Z",
260/// )
261/// .unwrap()
262/// .with_timezone(&Utc)),
263/// value: "calculated value".to_string(),
264/// }),
265/// },
266/// ]),
267/// comments: None,
268/// },
269/// ]),
270/// },
271/// Category {
272/// name: "Enrollment".to_string(),
273/// category_type: "normal".to_string(),
274/// highest_index: 0,
275/// fields: Some(vec![
276/// Field {
277/// name: "enrollment_closed_date".to_string(),
278/// field_type: "popUpCalendar".to_string(),
279/// data_type: Some("date".to_string()),
280/// error_code: "valid".to_string(),
281/// when_created: Some(DateTime::parse_from_rfc3339(
282/// "2023-04-15T16:07:14Z",
283/// )
284/// .unwrap()
285/// .with_timezone(&Utc)),
286/// keep_history: true,
287/// entries: None,
288/// comments: None,
289/// },
290/// Field {
291/// name: "enrollment_open".to_string(),
292/// field_type: "radio".to_string(),
293/// data_type: Some("string".to_string()),
294/// error_code: "valid".to_string(),
295/// when_created: Some(DateTime::parse_from_rfc3339(
296/// "2023-04-15T16:07:14Z",
297/// )
298/// .unwrap()
299/// .with_timezone(&Utc)),
300/// keep_history: true,
301/// entries: Some(vec![Entry {
302/// entry_id: "1".to_string(),
303/// reviewed_by: None,
304/// reviewed_by_unique_id: None,
305/// reviewed_by_when: None,
306/// value: Some(Value {
307/// by: "Paul Sanders".to_string(),
308/// by_unique_id: Some("1681162687395".to_string()),
309/// role: "Project Manager".to_string(),
310/// when: Some(DateTime::parse_from_rfc3339(
311/// "2023-04-15T16:08:19Z",
312/// )
313/// .unwrap()
314/// .with_timezone(&Utc)),
315/// value: "Yes".to_string(),
316/// }),
317/// reason: None,
318/// }]),
319/// comments: None,
320/// },
321/// Field {
322/// name: "enrollment_open_date".to_string(),
323/// field_type: "popUpCalendar".to_string(),
324/// data_type: Some("date".to_string()),
325/// error_code: "valid".to_string(),
326/// when_created: Some(DateTime::parse_from_rfc3339(
327/// "2023-04-15T16:07:14Z",
328/// )
329/// .unwrap()
330/// .with_timezone(&Utc)),
331/// keep_history: true,
332/// entries: None,
333/// comments: None,
334/// },
335/// ]),
336/// },
337/// ]),
338/// }]),
339/// },
340/// Site {
341/// name: "Artemis".to_string(),
342/// unique_id: "1691420994591".to_string(),
343/// number_of_patients: 0,
344/// count_of_randomized_patients: 0,
345/// when_created: Some(DateTime::parse_from_rfc3339("2023-08-07T15:14:23Z")
346/// .unwrap()
347/// .with_timezone(&Utc)),
348/// creator: "Paul Sanders".to_string(),
349/// number_of_forms: 1,
350/// forms: Some(vec![Form {
351/// name: "demographic.form.name.site.demographics".to_string(),
352/// last_modified: Some(
353/// DateTime::parse_from_rfc3339("2023-08-07T15:14:23Z")
354/// .unwrap()
355/// .with_timezone(&Utc),
356/// ),
357/// who_last_modified_name: Some("Paul Sanders".to_string()),
358/// who_last_modified_role: Some("Project Manager".to_string()),
359/// when_created: 1691420994611,
360/// has_errors: false,
361/// has_warnings: false,
362/// locked: false,
363/// user: None,
364/// date_time_changed: None,
365/// form_title: "Site Demographics".to_string(),
366/// form_index: 1,
367/// form_group: Some("Demographic".to_string()),
368/// form_state: "In-Work".to_string(),
369/// lock_state: None,
370/// states: Some(vec![State {
371/// value: "form.state.in.work".to_string(),
372/// signer: "Paul Sanders - Project Manager".to_string(),
373/// signer_unique_id: "1681162687395".to_string(),
374/// date_signed: Some(
375/// DateTime::parse_from_rfc3339("2023-08-07T15:14:23Z")
376/// .unwrap()
377/// .with_timezone(&Utc),
378/// ),
379/// }]),
380/// categories: Some(vec![Category {
381/// name: "Demographics".to_string(),
382/// category_type: "normal".to_string(),
383/// highest_index: 0,
384/// fields: Some(vec![Field {
385/// name: "address".to_string(),
386/// field_type: "text".to_string(),
387/// data_type: Some("string".to_string()),
388/// error_code: "valid".to_string(),
389/// when_created: Some(DateTime::parse_from_rfc3339("2023-08-07T15:09:54Z")
390/// .unwrap()
391/// .with_timezone(&Utc)),
392/// keep_history: true,
393/// entries: Some(vec![Entry {
394/// entry_id: "1".to_string(),
395/// reviewed_by: None,
396/// reviewed_by_unique_id: None,
397/// reviewed_by_when: None,
398/// value: Some(Value {
399/// by: "Paul Sanders".to_string(),
400/// by_unique_id: Some("1681162687395".to_string()),
401/// role: "Project Manager".to_string(),
402/// when: Some(DateTime::parse_from_rfc3339("2023-08-07T15:14:21Z")
403/// .unwrap()
404/// .with_timezone(&Utc)),
405/// value: "1111 Moon Drive".to_string(),
406/// }),
407/// reason: None,
408/// }]),
409/// comments: Some(vec![Comment {
410/// comment_id: "1".to_string(),
411/// value: Some(Value {
412/// by: "Paul Sanders".to_string(),
413/// by_unique_id: Some("1681162687395".to_string()),
414/// role: "Project Manager".to_string(),
415/// when: Some(DateTime::parse_from_rfc3339("2023-08-07T15:14:21Z")
416/// .unwrap()
417/// .with_timezone(&Utc)),
418/// value: "Some comment".to_string(),
419/// }),
420/// }]),
421/// }]),
422/// }]),
423/// }]),
424/// },
425/// ],
426/// };
427/// let result = parse_site_native_string(xml).unwrap();
428/// assert_eq!(result, expected);
429pub fn parse_site_native_string(xml_str: &str) -> Result<SiteNative, Error> {
430 let chunks = extract_site_chunks(xml_str);
431 let sites = chunks
432 .into_par_iter()
433 .map(parse_site_xml)
434 .collect::<Result<Vec<_>, _>>()?;
435 Ok(SiteNative { sites })
436}
437
438/// Parses a Prelude native subject XML file into a `SubjectNative` struct.
439///
440/// # Example
441///
442/// ```
443/// use std::path::Path;
444///
445/// use prelude_xml_parser::parse_subject_native_file;
446///
447/// let file_path = Path::new("tests/assets/subject_native.xml");
448/// let native = parse_subject_native_file(&file_path).unwrap();
449///
450/// assert!(native.patients.len() >= 1, "Vector length is less than 1");
451/// ```
452pub fn parse_subject_native_file(xml_path: &Path) -> Result<SubjectNative, Error> {
453 check_valid_xml_file(xml_path)?;
454
455 let xml_str = read_to_string(xml_path)?;
456 let chunks = extract_patient_chunks(&xml_str);
457 let patients = chunks
458 .into_par_iter()
459 .map(parse_patient_xml)
460 .collect::<Result<Vec<_>, _>>()?;
461 Ok(SubjectNative { patients })
462}
463
464/// Parse a string of Prelude native subject XML into a `SubjectNative` struct.
465///
466/// # Example
467///
468/// ```
469/// use chrono::{DateTime, Utc};
470/// use prelude_xml_parser::parse_subject_native_string;
471/// use prelude_xml_parser::native::common::LockState;
472/// use prelude_xml_parser::native::subject_native::*;
473///
474/// let xml = r#"<export_from_vision_EDC date="30-May-2024 10:35 -0500" createdBy="Paul Sanders" role="Project Manager" numberSubjectsProcessed="4">
475/// <patient patientId="ABC-001" uniqueId="1681574905819" whenCreated="2023-04-15 12:09:02 -0400" creator="Paul Sanders" siteName="Some Site" siteUniqueId="1681574834910" lastLanguage="English" numberOfForms="6">
476/// <form name="day.0.form.name.demographics" lastModified="2023-04-15 12:09:15 -0400" whoLastModifiedName="Paul Sanders" whoLastModifiedRole="Project Manager" whenCreated="1681574905839" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="Demographics" formIndex="1" formGroup="Day 0" formState="In-Work">
477/// <state value="form.state.in.work" signer="Paul Sanders - Project Manager" signerUniqueId="1681162687395" dateSigned="2023-04-15 12:09:02 -0400"/>
478/// <category name="Demographics" type="normal" highestIndex="0">
479/// <field name="breed" type="combo-box" dataType="string" errorCode="valid" whenCreated="2023-04-15 12:08:26 -0400" keepHistory="true">
480/// <entry id="1">
481/// <value by="Paul Sanders" byUniqueId="1681162687395" role="Project Manager" when="2023-04-15 12:09:02 -0400" xml:space="preserve">Labrador</value>
482/// </entry>
483/// </field>
484/// </category>
485/// </form>
486/// </patient>
487/// <patient patientId="DEF-002" uniqueId="1681574905820" whenCreated="2023-04-16 12:10:02 -0400" creator="Wade Watts" siteName="Another Site" siteUniqueId="1681574834911" lastLanguage="" numberOfForms="8">
488/// <form name="day.0.form.name.demographics" lastModified="2023-04-16 12:10:15 -0400" whoLastModifiedName="Barney Rubble" whoLastModifiedRole="Technician" whenCreated="1681574905838" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="Demographics" formIndex="1" formGroup="Day 0" formState="In-Work">
489/// <state value="form.state.in.work" signer="Paul Sanders - Project Manager" signerUniqueId="1681162687395" dateSigned="2023-04-16 12:10:02 -0400"/>
490/// <category name="Demographics" type="normal" highestIndex="0">
491/// <field name="breed" type="combo-box" dataType="string" errorCode="valid" whenCreated="2023-04-15 12:08:26 -0400" keepHistory="true">
492/// <entry id="1">
493/// <value by="Paul Sanders" byUniqueId="1681162687395" role="Project Manager" when="2023-04-15 12:09:02 -0400" xml:space="preserve">Labrador</value>
494/// </entry>
495/// </field>
496/// </category>
497/// </form>
498/// </patient>
499/// </export_from_vision_EDC>
500/// "#;
501///
502/// let expected = SubjectNative {
503/// patients: vec![
504/// Patient {
505/// patient_id: "ABC-001".to_string(),
506/// unique_id: "1681574905819".to_string(),
507/// when_created: Some(DateTime::parse_from_rfc3339("2023-04-15T16:09:02Z")
508/// .unwrap()
509/// .with_timezone(&Utc)),
510/// creator: "Paul Sanders".to_string(),
511/// site_name: "Some Site".to_string(),
512/// site_unique_id: "1681574834910".to_string(),
513/// last_language: Some("English".to_string()),
514/// number_of_forms: 6,
515/// forms: Some(vec![Form {
516/// name: "day.0.form.name.demographics".to_string(),
517/// last_modified: Some(DateTime::parse_from_rfc3339("2023-04-15T16:09:15Z")
518/// .unwrap()
519/// .with_timezone(&Utc)),
520/// who_last_modified_name: Some("Paul Sanders".to_string()),
521/// who_last_modified_role: Some("Project Manager".to_string()),
522/// when_created: 1681574905839,
523/// has_errors: false,
524/// has_warnings: false,
525/// locked: false,
526/// user: None,
527/// date_time_changed: None,
528/// form_title: "Demographics".to_string(),
529/// form_index: 1,
530/// form_group: Some("Day 0".to_string()),
531/// form_state: "In-Work".to_string(),
532/// lock_state: None,
533/// states: Some(vec![State {
534/// value: "form.state.in.work".to_string(),
535/// signer: "Paul Sanders - Project Manager".to_string(),
536/// signer_unique_id: "1681162687395".to_string(),
537/// date_signed: Some(
538/// DateTime::parse_from_rfc3339("2023-04-15T16:09:02Z")
539/// .unwrap()
540/// .with_timezone(&Utc),
541/// ),
542/// }]),
543/// categories: Some(vec![Category {
544/// name: "Demographics".to_string(),
545/// category_type: "normal".to_string(),
546/// highest_index: 0,
547/// fields: Some(vec![Field {
548/// name: "breed".to_string(),
549/// field_type: "combo-box".to_string(),
550/// data_type: Some("string".to_string()),
551/// error_code: "valid".to_string(),
552/// when_created: Some(DateTime::parse_from_rfc3339("2023-04-15T16:08:26Z")
553/// .unwrap()
554/// .with_timezone(&Utc)),
555/// keep_history: true,
556/// entries: Some(vec![Entry {
557/// entry_id: "1".to_string(),
558/// reviewed_by: None,
559/// reviewed_by_unique_id: None,
560/// reviewed_by_when: None,
561/// value: Some(Value {
562/// by: "Paul Sanders".to_string(),
563/// by_unique_id: Some("1681162687395".to_string()),
564/// role: "Project Manager".to_string(),
565/// when: Some(DateTime::parse_from_rfc3339("2023-04-15T16:09:02Z")
566/// .unwrap()
567/// .with_timezone(&Utc)),
568/// value: "Labrador".to_string(),
569/// }),
570/// reason: None,
571/// }]),
572/// comments: None,
573/// }]),
574/// }]),
575/// }]),
576/// },
577/// Patient {
578/// patient_id: "DEF-002".to_string(),
579/// unique_id: "1681574905820".to_string(),
580/// when_created: Some(DateTime::parse_from_rfc3339("2023-04-16T16:10:02Z")
581/// .unwrap()
582/// .with_timezone(&Utc)),
583/// creator: "Wade Watts".to_string(),
584/// site_name: "Another Site".to_string(),
585/// site_unique_id: "1681574834911".to_string(),
586/// last_language: None,
587/// number_of_forms: 8,
588/// forms: Some(vec![Form {
589/// name: "day.0.form.name.demographics".to_string(),
590/// last_modified: Some(DateTime::parse_from_rfc3339("2023-04-16T16:10:15Z")
591/// .unwrap()
592/// .with_timezone(&Utc)),
593/// who_last_modified_name: Some("Barney Rubble".to_string()),
594/// who_last_modified_role: Some("Technician".to_string()),
595/// when_created: 1681574905838,
596/// has_errors: false,
597/// has_warnings: false,
598/// locked: false,
599/// user: None,
600/// date_time_changed: None,
601/// form_title: "Demographics".to_string(),
602/// form_index: 1,
603/// form_group: Some("Day 0".to_string()),
604/// form_state: "In-Work".to_string(),
605/// lock_state: None,
606/// states: Some(vec![State {
607/// value: "form.state.in.work".to_string(),
608/// signer: "Paul Sanders - Project Manager".to_string(),
609/// signer_unique_id: "1681162687395".to_string(),
610/// date_signed: Some(
611/// DateTime::parse_from_rfc3339("2023-04-16T16:10:02Z")
612/// .unwrap()
613/// .with_timezone(&Utc),
614/// ),
615/// }]),
616/// categories: Some(vec![Category {
617/// name: "Demographics".to_string(),
618/// category_type: "normal".to_string(),
619/// highest_index: 0,
620/// fields: Some(vec![Field {
621/// name: "breed".to_string(),
622/// field_type: "combo-box".to_string(),
623/// data_type: Some("string".to_string()),
624/// error_code: "valid".to_string(),
625/// when_created: Some(DateTime::parse_from_rfc3339("2023-04-15T16:08:26Z")
626/// .unwrap()
627/// .with_timezone(&Utc)),
628/// keep_history: true,
629/// entries: Some(vec![Entry {
630/// entry_id: "1".to_string(),
631/// reviewed_by: None,
632/// reviewed_by_unique_id: None,
633/// reviewed_by_when: None,
634/// value: Some(Value {
635/// by: "Paul Sanders".to_string(),
636/// by_unique_id: Some("1681162687395".to_string()),
637/// role: "Project Manager".to_string(),
638/// when: Some(DateTime::parse_from_rfc3339("2023-04-15T16:09:02Z")
639/// .unwrap()
640/// .with_timezone(&Utc)),
641/// value: "Labrador".to_string(),
642/// }),
643/// reason: None,
644/// }]),
645/// comments: None,
646/// }]),
647/// }]),
648/// }]),
649/// },
650/// ],
651/// };
652/// let result = parse_subject_native_string(xml).unwrap();
653///
654/// assert_eq!(result, expected);
655/// ```
656pub fn parse_subject_native_string(xml_str: &str) -> Result<SubjectNative, Error> {
657 let chunks = extract_patient_chunks(xml_str);
658 let patients = chunks
659 .into_par_iter()
660 .map(parse_patient_xml)
661 .collect::<Result<Vec<_>, _>>()?;
662 Ok(SubjectNative { patients })
663}
664
665fn extract_attributes(e: &BytesStart) -> Result<HashMap<String, String>, Error> {
666 let mut attrs = HashMap::new();
667 for attr in e.attributes() {
668 let attr = attr.map_err(|e| {
669 Error::ParsingError(quick_xml::de::DeError::Custom(format!(
670 "Attribute error: {}",
671 e
672 )))
673 })?;
674 let key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
675 let value = String::from_utf8_lossy(&attr.value).to_string();
676 attrs.insert(key, value);
677 }
678 Ok(attrs)
679}
680
681fn extract_patient_chunks(xml: &str) -> Vec<&str> {
682 let mut chunks = Vec::new();
683 let mut pos = 0;
684 loop {
685 match xml[pos..].find("<patient ") {
686 None => break,
687 Some(rel) => {
688 let start = pos + rel;
689 match xml[start..].find("</patient>") {
690 None => break,
691 Some(rel_end) => {
692 let end = start + rel_end + "</patient>".len();
693 chunks.push(&xml[start..end]);
694 pos = end;
695 }
696 }
697 }
698 }
699 }
700 chunks
701}
702
703fn parse_patient_xml(patient_xml: &str) -> Result<Patient, Error> {
704 let wrapped = format!("<r>{}</r>", patient_xml);
705 let mut xml_reader = Reader::from_reader(Cursor::new(wrapped.as_bytes()));
706 xml_reader.config_mut().trim_text(true);
707
708 let mut current_patient: Option<Patient> = None;
709 let mut current_forms: Vec<Form> = Vec::new();
710 let mut current_form: Option<Form> = None;
711 let mut current_states: Vec<State> = Vec::new();
712 let mut current_categories: Vec<Category> = Vec::new();
713 let mut current_category: Option<Category> = None;
714 let mut current_fields: Vec<Field> = Vec::new();
715 let mut current_field: Option<Field> = None;
716 let mut current_entries: Vec<Entry> = Vec::new();
717 let mut current_entry: Option<Entry> = None;
718 let mut current_comments: Vec<Comment> = Vec::new();
719 let mut current_comment: Option<Comment> = None;
720 let mut current_value: Option<Value> = None;
721 let mut current_reason: Option<Reason> = None;
722 let mut text_content = String::new();
723 let mut in_form = false;
724 let mut in_category = false;
725 let mut in_field = false;
726 let mut in_entry = false;
727 let mut in_comment = false;
728 let mut in_value = false;
729 let mut in_reason = false;
730 let mut buf = Vec::new();
731
732 loop {
733 match xml_reader.read_event_into(&mut buf) {
734 Err(e) => {
735 return Err(Error::ParsingError(quick_xml::de::DeError::Custom(
736 format!("XML reading error: {}", e),
737 )))
738 }
739 Ok(Event::Eof) => break,
740
741 Ok(Event::Start(ref e)) => {
742 let name_bytes = e.local_name();
743 if let Ok(name) = std::str::from_utf8(name_bytes.as_ref()) {
744 match name {
745 "patient" => {
746 let attrs = extract_attributes(e)?;
747 current_patient = Some(Patient::from_attributes(attrs)?);
748 current_forms.clear();
749 }
750 "form" if current_patient.is_some() => {
751 let attrs = extract_attributes(e)?;
752 current_form = Some(Form::from_attributes(attrs)?);
753 in_form = true;
754 current_states.clear();
755 current_categories.clear();
756 }
757 "category" if in_form => {
758 let attrs = extract_attributes(e)?;
759 current_category = Some(Category::from_attributes(attrs)?);
760 in_category = true;
761 current_fields.clear();
762 }
763 "field" if in_category => {
764 let attrs = extract_attributes(e)?;
765 current_field = Some(Field::from_attributes(attrs)?);
766 in_field = true;
767 current_entries.clear();
768 current_comments.clear();
769 }
770 "entry" if in_field => {
771 let attrs = extract_attributes(e)?;
772 current_entry = Some(Entry::from_attributes(attrs)?);
773 in_entry = true;
774 }
775 "comment" if in_field => {
776 let attrs = extract_attributes(e)?;
777 let comment_id = attrs.get("id").cloned().unwrap_or_default();
778 current_comment = Some(Comment {
779 comment_id,
780 value: None,
781 });
782 in_comment = true;
783 }
784 "value" if in_entry || in_comment => {
785 let attrs = extract_attributes(e)?;
786 current_value = Some(Value::from_attributes(attrs)?);
787 in_value = true;
788 text_content.clear();
789 }
790 "reason" if in_entry => {
791 let attrs = extract_attributes(e)?;
792 current_reason = Some(Reason::from_attributes(attrs)?);
793 in_reason = true;
794 text_content.clear();
795 }
796 _ => {}
797 }
798 }
799 }
800
801 Ok(Event::Text(e)) if (in_value || in_reason) => {
802 text_content.push_str(&String::from_utf8_lossy(&e));
803 }
804
805 Ok(Event::End(ref e)) => {
806 let name_bytes = e.local_name();
807 if let Ok(name) = std::str::from_utf8(name_bytes.as_ref()) {
808 match name {
809 "patient" => {
810 if let Some(mut patient) = current_patient.take() {
811 if !current_forms.is_empty() {
812 patient.set_forms(std::mem::take(&mut current_forms));
813 }
814 current_patient = Some(patient);
815 }
816 }
817 "form" if in_form => {
818 if let Some(mut form) = current_form.take() {
819 if !current_states.is_empty() {
820 form.states = Some(std::mem::take(&mut current_states));
821 }
822 if !current_categories.is_empty() {
823 form.categories = Some(std::mem::take(&mut current_categories));
824 }
825 current_forms.push(form);
826 }
827 in_form = false;
828 }
829 "category" if in_category => {
830 if let Some(mut category) = current_category.take() {
831 if !current_fields.is_empty() {
832 category.fields = Some(std::mem::take(&mut current_fields));
833 }
834 current_categories.push(category);
835 }
836 in_category = false;
837 }
838 "field" if in_field => {
839 if let Some(mut field) = current_field.take() {
840 if !current_entries.is_empty() {
841 field.entries = Some(std::mem::take(&mut current_entries));
842 }
843 if !current_comments.is_empty() {
844 field.comments = Some(std::mem::take(&mut current_comments));
845 }
846 current_fields.push(field);
847 }
848 in_field = false;
849 }
850 "entry" if in_entry => {
851 if let Some(entry) = current_entry.take() {
852 current_entries.push(entry);
853 }
854 in_entry = false;
855 }
856 "comment" if in_comment => {
857 if let Some(comment) = current_comment.take() {
858 current_comments.push(comment);
859 }
860 in_comment = false;
861 }
862 "value" if in_value => {
863 if let Some(mut value) = current_value.take() {
864 value.value = std::mem::take(&mut text_content);
865 if let Some(ref mut entry) = current_entry {
866 entry.value = Some(value);
867 } else if let Some(ref mut comment) = current_comment {
868 comment.value = Some(value);
869 }
870 }
871 in_value = false;
872 }
873 "reason" if in_reason => {
874 if let Some(mut reason) = current_reason.take() {
875 reason.value = std::mem::take(&mut text_content);
876 if let Some(ref mut entry) = current_entry {
877 entry.reason = Some(reason);
878 }
879 }
880 in_reason = false;
881 }
882 _ => {}
883 }
884 }
885 }
886
887 Ok(Event::Empty(ref e)) => {
888 let name_bytes = e.local_name();
889 if let Ok(name) = std::str::from_utf8(name_bytes.as_ref()) {
890 match name {
891 "state" if in_form => {
892 let attrs = extract_attributes(e)?;
893 let state = State::from_attributes(attrs)?;
894 current_states.push(state);
895 }
896 "lockState" if in_form => {
897 let attrs = extract_attributes(e)?;
898 let lock_state = LockState::from_attributes(attrs)?;
899 if let Some(ref mut form) = current_form {
900 form.lock_state = Some(lock_state);
901 }
902 }
903 "value" if in_entry => {
904 let attrs = extract_attributes(e)?;
905 let value = Value::from_attributes(attrs)?;
906 if let Some(ref mut entry) = current_entry {
907 entry.value = Some(value);
908 }
909 }
910 "reason" if in_entry => {
911 let attrs = extract_attributes(e)?;
912 let reason = Reason::from_attributes(attrs)?;
913 if let Some(ref mut entry) = current_entry {
914 entry.reason = Some(reason);
915 }
916 }
917 _ => {}
918 }
919 }
920 }
921
922 _ => {}
923 }
924
925 buf.clear();
926 }
927
928 current_patient.ok_or_else(|| {
929 Error::ParsingError(quick_xml::de::DeError::Custom(
930 "No patient found in chunk".to_string(),
931 ))
932 })
933}
934
935fn extract_site_chunks(xml: &str) -> Vec<&str> {
936 let mut chunks = Vec::new();
937 let mut pos = 0;
938 loop {
939 match xml[pos..].find("<site ") {
940 None => break,
941 Some(rel) => {
942 let start = pos + rel;
943 match xml[start..].find("</site>") {
944 None => break,
945 Some(rel_end) => {
946 let end = start + rel_end + "</site>".len();
947 chunks.push(&xml[start..end]);
948 pos = end;
949 }
950 }
951 }
952 }
953 }
954 chunks
955}
956
957fn parse_site_xml(site_xml: &str) -> Result<Site, Error> {
958 let wrapped = format!("<r>{}</r>", site_xml);
959 let mut xml_reader = Reader::from_reader(Cursor::new(wrapped.as_bytes()));
960 xml_reader.config_mut().trim_text(true);
961
962 let mut current_site: Option<Site> = None;
963 let mut current_forms: Vec<Form> = Vec::new();
964 let mut current_form: Option<Form> = None;
965 let mut current_states: Vec<State> = Vec::new();
966 let mut current_categories: Vec<Category> = Vec::new();
967 let mut current_category: Option<Category> = None;
968 let mut current_fields: Vec<Field> = Vec::new();
969 let mut current_field: Option<Field> = None;
970 let mut current_entries: Vec<Entry> = Vec::new();
971 let mut current_entry: Option<Entry> = None;
972 let mut current_comments: Vec<Comment> = Vec::new();
973 let mut current_comment: Option<Comment> = None;
974 let mut current_value: Option<Value> = None;
975 let mut current_reason: Option<Reason> = None;
976 let mut text_content = String::new();
977 let mut in_form = false;
978 let mut in_category = false;
979 let mut in_field = false;
980 let mut in_entry = false;
981 let mut in_comment = false;
982 let mut in_value = false;
983 let mut in_reason = false;
984 let mut buf = Vec::new();
985
986 loop {
987 match xml_reader.read_event_into(&mut buf) {
988 Err(e) => {
989 return Err(Error::ParsingError(quick_xml::de::DeError::Custom(
990 format!("XML reading error: {}", e),
991 )))
992 }
993 Ok(Event::Eof) => break,
994
995 Ok(Event::Start(ref e)) => {
996 let name_bytes = e.local_name();
997 if let Ok(name) = std::str::from_utf8(name_bytes.as_ref()) {
998 match name {
999 "site" => {
1000 let attrs = extract_attributes(e)?;
1001 current_site = Some(Site::from_attributes(attrs)?);
1002 current_forms.clear();
1003 }
1004 "form" if current_site.is_some() => {
1005 let attrs = extract_attributes(e)?;
1006 current_form = Some(Form::from_attributes(attrs)?);
1007 in_form = true;
1008 current_states.clear();
1009 current_categories.clear();
1010 }
1011 "category" if in_form => {
1012 let attrs = extract_attributes(e)?;
1013 current_category = Some(Category::from_attributes(attrs)?);
1014 in_category = true;
1015 current_fields.clear();
1016 }
1017 "field" if in_category => {
1018 let attrs = extract_attributes(e)?;
1019 current_field = Some(Field::from_attributes(attrs)?);
1020 in_field = true;
1021 current_entries.clear();
1022 current_comments.clear();
1023 }
1024 "entry" if in_field => {
1025 let attrs = extract_attributes(e)?;
1026 current_entry = Some(Entry::from_attributes(attrs)?);
1027 in_entry = true;
1028 }
1029 "comment" if in_field => {
1030 let attrs = extract_attributes(e)?;
1031 let comment_id = attrs.get("id").cloned().unwrap_or_default();
1032 current_comment = Some(Comment {
1033 comment_id,
1034 value: None,
1035 });
1036 in_comment = true;
1037 }
1038 "value" if in_entry || in_comment => {
1039 let attrs = extract_attributes(e)?;
1040 current_value = Some(Value::from_attributes(attrs)?);
1041 in_value = true;
1042 text_content.clear();
1043 }
1044 "reason" if in_entry => {
1045 let attrs = extract_attributes(e)?;
1046 current_reason = Some(Reason::from_attributes(attrs)?);
1047 in_reason = true;
1048 text_content.clear();
1049 }
1050 _ => {}
1051 }
1052 }
1053 }
1054
1055 Ok(Event::Text(e)) if (in_value || in_reason) => {
1056 text_content.push_str(&String::from_utf8_lossy(&e));
1057 }
1058
1059 Ok(Event::End(ref e)) => {
1060 let name_bytes = e.local_name();
1061 if let Ok(name) = std::str::from_utf8(name_bytes.as_ref()) {
1062 match name {
1063 "site" => {
1064 if let Some(mut site) = current_site.take() {
1065 if !current_forms.is_empty() {
1066 site.set_forms(std::mem::take(&mut current_forms));
1067 }
1068 current_site = Some(site);
1069 }
1070 }
1071 "form" if in_form => {
1072 if let Some(mut form) = current_form.take() {
1073 if !current_states.is_empty() {
1074 form.states = Some(std::mem::take(&mut current_states));
1075 }
1076 if !current_categories.is_empty() {
1077 form.categories = Some(std::mem::take(&mut current_categories));
1078 }
1079 current_forms.push(form);
1080 }
1081 in_form = false;
1082 }
1083 "category" if in_category => {
1084 if let Some(mut category) = current_category.take() {
1085 if !current_fields.is_empty() {
1086 category.fields = Some(std::mem::take(&mut current_fields));
1087 }
1088 current_categories.push(category);
1089 }
1090 in_category = false;
1091 }
1092 "field" if in_field => {
1093 if let Some(mut field) = current_field.take() {
1094 if !current_entries.is_empty() {
1095 field.entries = Some(std::mem::take(&mut current_entries));
1096 }
1097 if !current_comments.is_empty() {
1098 field.comments = Some(std::mem::take(&mut current_comments));
1099 }
1100 current_fields.push(field);
1101 }
1102 in_field = false;
1103 }
1104 "entry" if in_entry => {
1105 if let Some(entry) = current_entry.take() {
1106 current_entries.push(entry);
1107 }
1108 in_entry = false;
1109 }
1110 "comment" if in_comment => {
1111 if let Some(comment) = current_comment.take() {
1112 current_comments.push(comment);
1113 }
1114 in_comment = false;
1115 }
1116 "value" if in_value => {
1117 if let Some(mut value) = current_value.take() {
1118 value.value = std::mem::take(&mut text_content);
1119 if let Some(ref mut entry) = current_entry {
1120 entry.value = Some(value);
1121 } else if let Some(ref mut comment) = current_comment {
1122 comment.value = Some(value);
1123 }
1124 }
1125 in_value = false;
1126 }
1127 "reason" if in_reason => {
1128 if let Some(mut reason) = current_reason.take() {
1129 reason.value = std::mem::take(&mut text_content);
1130 if let Some(ref mut entry) = current_entry {
1131 entry.reason = Some(reason);
1132 }
1133 }
1134 in_reason = false;
1135 }
1136 _ => {}
1137 }
1138 }
1139 }
1140
1141 Ok(Event::Empty(ref e)) => {
1142 let name_bytes = e.local_name();
1143 if let Ok(name) = std::str::from_utf8(name_bytes.as_ref()) {
1144 match name {
1145 "state" if in_form => {
1146 let attrs = extract_attributes(e)?;
1147 let state = State::from_attributes(attrs)?;
1148 current_states.push(state);
1149 }
1150 "lockState" if in_form => {
1151 let attrs = extract_attributes(e)?;
1152 let lock_state = LockState::from_attributes(attrs)?;
1153 if let Some(ref mut form) = current_form {
1154 form.lock_state = Some(lock_state);
1155 }
1156 }
1157 "field" if in_category => {
1158 let attrs = extract_attributes(e)?;
1159 let field = Field::from_attributes(attrs)?;
1160 current_fields.push(field);
1161 }
1162 "value" if in_entry => {
1163 let attrs = extract_attributes(e)?;
1164 let value = Value::from_attributes(attrs)?;
1165 if let Some(ref mut entry) = current_entry {
1166 entry.value = Some(value);
1167 }
1168 }
1169 "reason" if in_entry => {
1170 let attrs = extract_attributes(e)?;
1171 let reason = Reason::from_attributes(attrs)?;
1172 if let Some(ref mut entry) = current_entry {
1173 entry.reason = Some(reason);
1174 }
1175 }
1176 _ => {}
1177 }
1178 }
1179 }
1180
1181 _ => {}
1182 }
1183
1184 buf.clear();
1185 }
1186
1187 current_site.ok_or_else(|| {
1188 Error::ParsingError(quick_xml::de::DeError::Custom(
1189 "No site found in chunk".to_string(),
1190 ))
1191 })
1192}
1193
1194/// Parses a Prelude native user XML file into a `UserNative` struct.
1195///
1196/// # Example
1197///
1198/// ```
1199/// use std::path::Path;
1200///
1201/// use prelude_xml_parser::parse_user_native_file;
1202///
1203/// let file_path = Path::new("tests/assets/user_native.xml");
1204/// let native = parse_user_native_file(&file_path).unwrap();
1205///
1206/// assert!(native.users.len() >= 1, "Vector length is less than 1");
1207/// ```
1208pub fn parse_user_native_file(xml_path: &Path) -> Result<UserNative, Error> {
1209 check_valid_xml_file(xml_path)?;
1210
1211 let xml_file = read_to_string(xml_path)?;
1212 let native = parse_user_native_string(&xml_file)?;
1213
1214 Ok(native)
1215}
1216
1217/// Parse a string of Prelude native user XML into a `UserNative` struct.
1218///
1219/// # Example
1220///
1221/// ```
1222/// use chrono::{DateTime, Utc};
1223/// use prelude_xml_parser::parse_user_native_string;
1224/// use prelude_xml_parser::native::user_native::*;
1225///
1226/// let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1227/// <export_from_vision_EDC date="02-Jun-2024 06:59 -0500" createdBy="Paul Sanders" role="Project Manager" numberSubjectsProcessed="3">
1228/// <user uniqueId="1691421275437" lastLanguage="" creator="Paul Sanders(1681162687395)" numberOfForms="1">
1229/// <form name="form.name.demographics" lastModified="2023-08-07 10:15:41 -0500" whoLastModifiedName="Paul Sanders" whoLastModifiedRole="Project Manager" whenCreated="1691421341578" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="User Demographics" formIndex="1" formGroup="" formState="In-Work">
1230/// <state value="form.state.in.work" signer="Paul Sanders - Project Manager" signerUniqueId="1681162687395" dateSigned="2023-08-07 10:15:41 -0500" />
1231/// <category name="demographics" type="normal" highestIndex="0">
1232/// <field name="address" type="text" dataType="string" errorCode="undefined" whenCreated="2024-01-12 14:14:09 -0600" keepHistory="true" />
1233/// <field name="email" type="text" dataType="string" errorCode="undefined" whenCreated="2023-08-07 10:15:41 -0500" keepHistory="true">
1234/// <entry id="1">
1235/// <value by="Paul Sanders" byUniqueId="1681162687395" role="Project Manager" when="2023-08-07 10:15:41 -0500" xml:space="preserve">jazz@artemis.com</value>
1236/// </entry>
1237/// </field>
1238/// </category>
1239/// <category name="Administrative" type="normal" highestIndex="0">
1240/// <field name="study_assignment" type="text" dataType="" errorCode="undefined" whenCreated="2023-08-07 10:15:41 -0500" keepHistory="true">
1241/// <entry id="1">
1242/// <value by="set from calculation" byUniqueId="" role="System" when="2023-08-07 10:15:41 -0500" xml:space="preserve">On 07-Aug-2023 10:15 -0500, Paul Sanders assigned user from another study</value>
1243/// <reason by="set from calculation" byUniqueId="" role="System" when="2023-08-07 10:15:41 -0500" xml:space="preserve">calculated value</reason>
1244/// </entry>
1245/// </field>
1246/// </category>
1247/// </form>
1248/// </user>
1249/// </export_from_vision_EDC>
1250/// "#;
1251///
1252/// let expected = UserNative {
1253/// users: vec![User {
1254/// unique_id: "1691421275437".to_string(),
1255/// last_language: None,
1256/// creator: "Paul Sanders(1681162687395)".to_string(),
1257/// number_of_forms: 1,
1258/// forms: Some(vec![Form {
1259/// name: "form.name.demographics".to_string(),
1260/// last_modified: Some(
1261/// DateTime::parse_from_rfc3339("2023-08-07T15:15:41Z")
1262/// .unwrap()
1263/// .with_timezone(&Utc),
1264/// ),
1265/// who_last_modified_name: Some("Paul Sanders".to_string()),
1266/// who_last_modified_role: Some("Project Manager".to_string()),
1267/// when_created: 1691421341578,
1268/// has_errors: false,
1269/// has_warnings: false,
1270/// locked: false,
1271/// user: None,
1272/// date_time_changed: None,
1273/// form_title: "User Demographics".to_string(),
1274/// form_index: 1,
1275/// form_group: None,
1276/// form_state: "In-Work".to_string(),
1277/// lock_state: None,
1278/// states: Some(vec![State {
1279/// value: "form.state.in.work".to_string(),
1280/// signer: "Paul Sanders - Project Manager".to_string(),
1281/// signer_unique_id: "1681162687395".to_string(),
1282/// date_signed: Some(
1283/// DateTime::parse_from_rfc3339("2023-08-07T15:15:41Z")
1284/// .unwrap()
1285/// .with_timezone(&Utc),
1286/// ),
1287/// }]),
1288/// categories: Some(vec![
1289/// Category {
1290/// name: "demographics".to_string(),
1291/// category_type: "normal".to_string(),
1292/// highest_index: 0,
1293/// fields: Some(vec![
1294/// Field {
1295/// name: "address".to_string(),
1296/// field_type: "text".to_string(),
1297/// data_type: Some("string".to_string()),
1298/// error_code: "undefined".to_string(),
1299/// when_created: Some(DateTime::parse_from_rfc3339("2024-01-12T20:14:09Z")
1300/// .unwrap()
1301/// .with_timezone(&Utc)),
1302/// keep_history: true,
1303/// entries: None,
1304/// comments: None,
1305/// },
1306/// Field {
1307/// name: "email".to_string(),
1308/// field_type: "text".to_string(),
1309/// data_type: Some("string".to_string()),
1310/// error_code: "undefined".to_string(),
1311/// when_created: Some(DateTime::parse_from_rfc3339("2023-08-07T15:15:41Z")
1312/// .unwrap()
1313/// .with_timezone(&Utc)),
1314/// keep_history: true,
1315/// entries: Some(vec![Entry {
1316/// entry_id: "1".to_string(),
1317/// reviewed_by: None,
1318/// reviewed_by_unique_id: None,
1319/// reviewed_by_when: None,
1320/// value: Some(Value {
1321/// by: "Paul Sanders".to_string(),
1322/// by_unique_id: Some("1681162687395".to_string()),
1323/// role: "Project Manager".to_string(),
1324/// when: Some(DateTime::parse_from_rfc3339("2023-08-07T15:15:41Z")
1325/// .unwrap()
1326/// .with_timezone(&Utc)),
1327/// value: "jazz@artemis.com".to_string(),
1328/// }),
1329/// reason: None,
1330/// }]),
1331/// comments: None,
1332/// },
1333/// ]),
1334/// },
1335/// Category {
1336/// name: "Administrative".to_string(),
1337/// category_type: "normal".to_string(),
1338/// highest_index: 0,
1339/// fields: Some(vec![
1340/// Field {
1341/// name: "study_assignment".to_string(),
1342/// field_type: "text".to_string(),
1343/// data_type: None,
1344/// error_code: "undefined".to_string(),
1345/// when_created: Some(DateTime::parse_from_rfc3339("2023-08-07T15:15:41Z")
1346/// .unwrap()
1347/// .with_timezone(&Utc)),
1348/// keep_history: true,
1349/// entries: Some(vec![
1350/// Entry {
1351/// entry_id: "1".to_string(),
1352/// reviewed_by: None,
1353/// reviewed_by_unique_id: None,
1354/// reviewed_by_when: None,
1355/// value: Some(Value {
1356/// by: "set from calculation".to_string(),
1357/// by_unique_id: None,
1358/// role: "System".to_string(),
1359/// when: Some(DateTime::parse_from_rfc3339("2023-08-07T15:15:41Z")
1360/// .unwrap()
1361/// .with_timezone(&Utc)),
1362/// value: "On 07-Aug-2023 10:15 -0500, Paul Sanders assigned user from another study".to_string(),
1363/// }),
1364/// reason: Some(Reason {
1365/// by: "set from calculation".to_string(),
1366/// by_unique_id: None,
1367/// role: "System".to_string(),
1368/// when: Some(DateTime::parse_from_rfc3339("2023-08-07T15:15:41Z")
1369/// .unwrap()
1370/// .with_timezone(&Utc)),
1371/// value: "calculated value".to_string(),
1372/// }),
1373/// },
1374/// ]),
1375/// comments: None,
1376/// },
1377/// ]),
1378/// },
1379/// ]),
1380/// }]),
1381/// }],
1382/// };
1383///
1384/// let result = parse_user_native_string(xml).unwrap();
1385///
1386/// assert_eq!(result, expected);
1387/// ```
1388pub fn parse_user_native_string(xml_str: &str) -> Result<UserNative, Error> {
1389 let chunks = extract_user_chunks(xml_str);
1390 let users = chunks
1391 .into_par_iter()
1392 .map(parse_user_xml)
1393 .collect::<Result<Vec<_>, _>>()?;
1394 Ok(UserNative { users })
1395}
1396
1397fn extract_user_chunks(xml: &str) -> Vec<&str> {
1398 let mut chunks = Vec::new();
1399 let mut pos = 0;
1400 loop {
1401 match xml[pos..].find("<user ") {
1402 None => break,
1403 Some(rel) => {
1404 let start = pos + rel;
1405 match xml[start..].find("</user>") {
1406 None => break,
1407 Some(rel_end) => {
1408 let end = start + rel_end + "</user>".len();
1409 chunks.push(&xml[start..end]);
1410 pos = end;
1411 }
1412 }
1413 }
1414 }
1415 }
1416 chunks
1417}
1418
1419fn parse_user_xml(user_xml: &str) -> Result<User, Error> {
1420 let wrapped = format!("<r>{}</r>", user_xml);
1421 let mut xml_reader = Reader::from_reader(Cursor::new(wrapped.as_bytes()));
1422 xml_reader.config_mut().trim_text(true);
1423
1424 let mut current_user: Option<User> = None;
1425 let mut current_forms: Vec<Form> = Vec::new();
1426 let mut current_form: Option<Form> = None;
1427 let mut current_states: Vec<State> = Vec::new();
1428 let mut current_categories: Vec<Category> = Vec::new();
1429 let mut current_category: Option<Category> = None;
1430 let mut current_fields: Vec<Field> = Vec::new();
1431 let mut current_field: Option<Field> = None;
1432 let mut current_entries: Vec<Entry> = Vec::new();
1433 let mut current_entry: Option<Entry> = None;
1434 let mut current_comments: Vec<Comment> = Vec::new();
1435 let mut current_comment: Option<Comment> = None;
1436 let mut current_value: Option<Value> = None;
1437 let mut current_reason: Option<Reason> = None;
1438 let mut text_content = String::new();
1439 let mut in_form = false;
1440 let mut in_category = false;
1441 let mut in_field = false;
1442 let mut in_entry = false;
1443 let mut in_comment = false;
1444 let mut in_value = false;
1445 let mut in_reason = false;
1446 let mut buf = Vec::new();
1447
1448 loop {
1449 match xml_reader.read_event_into(&mut buf) {
1450 Err(e) => {
1451 return Err(Error::ParsingError(quick_xml::de::DeError::Custom(
1452 format!("XML reading error: {}", e),
1453 )))
1454 }
1455 Ok(Event::Eof) => break,
1456
1457 Ok(Event::Start(ref e)) => {
1458 let name_bytes = e.local_name();
1459 if let Ok(name) = std::str::from_utf8(name_bytes.as_ref()) {
1460 match name {
1461 "user" => {
1462 let attrs = extract_attributes(e)?;
1463 current_user = Some(User::from_attributes(attrs)?);
1464 }
1465 "form" => {
1466 let attrs = extract_attributes(e)?;
1467 current_form = Some(Form::from_attributes(attrs)?);
1468 in_form = true;
1469 }
1470 "category" if in_form => {
1471 let attrs = extract_attributes(e)?;
1472 current_category = Some(Category::from_attributes(attrs)?);
1473 in_category = true;
1474 }
1475 "field" if in_category => {
1476 let attrs = extract_attributes(e)?;
1477 current_field = Some(Field::from_attributes(attrs)?);
1478 in_field = true;
1479 }
1480 "entry" if in_field => {
1481 let attrs = extract_attributes(e)?;
1482 current_entry = Some(Entry::from_attributes(attrs)?);
1483 in_entry = true;
1484 }
1485 "comment" if in_field => {
1486 let attrs = extract_attributes(e)?;
1487 let comment_id = attrs.get("id").cloned().unwrap_or_default();
1488 current_comment = Some(Comment {
1489 comment_id,
1490 value: None,
1491 });
1492 in_comment = true;
1493 }
1494 "value" if in_entry || in_comment => {
1495 let attrs = extract_attributes(e)?;
1496 current_value = Some(Value::from_attributes(attrs)?);
1497 in_value = true;
1498 text_content.clear();
1499 }
1500 "reason" if in_entry => {
1501 let attrs = extract_attributes(e)?;
1502 current_reason = Some(Reason::from_attributes(attrs)?);
1503 in_reason = true;
1504 text_content.clear();
1505 }
1506 _ => {}
1507 }
1508 }
1509 }
1510
1511 Ok(Event::Text(e)) if (in_value || in_reason) => {
1512 text_content.push_str(&String::from_utf8_lossy(&e));
1513 }
1514
1515 Ok(Event::End(ref e)) => {
1516 let name_bytes = e.local_name();
1517 if let Ok(name) = std::str::from_utf8(name_bytes.as_ref()) {
1518 match name {
1519 "user" => {
1520 if let Some(mut user) = current_user.take() {
1521 if !current_forms.is_empty() {
1522 user.set_forms(std::mem::take(&mut current_forms));
1523 }
1524 current_user = Some(user);
1525 }
1526 }
1527 "form" if in_form => {
1528 if let Some(mut form) = current_form.take() {
1529 if !current_states.is_empty() {
1530 form.states = Some(std::mem::take(&mut current_states));
1531 }
1532 if !current_categories.is_empty() {
1533 form.categories = Some(std::mem::take(&mut current_categories));
1534 }
1535 current_forms.push(form);
1536 }
1537 in_form = false;
1538 }
1539 "category" if in_category => {
1540 if let Some(mut category) = current_category.take() {
1541 if !current_fields.is_empty() {
1542 category.fields = Some(std::mem::take(&mut current_fields));
1543 }
1544 current_categories.push(category);
1545 }
1546 in_category = false;
1547 }
1548 "field" if in_field => {
1549 if let Some(mut field) = current_field.take() {
1550 if !current_entries.is_empty() {
1551 field.entries = Some(std::mem::take(&mut current_entries));
1552 }
1553 if !current_comments.is_empty() {
1554 field.comments = Some(std::mem::take(&mut current_comments));
1555 }
1556 current_fields.push(field);
1557 }
1558 in_field = false;
1559 }
1560 "entry" if in_entry => {
1561 if let Some(entry) = current_entry.take() {
1562 current_entries.push(entry);
1563 }
1564 in_entry = false;
1565 }
1566 "comment" if in_comment => {
1567 if let Some(comment) = current_comment.take() {
1568 current_comments.push(comment);
1569 }
1570 in_comment = false;
1571 }
1572 "value" if in_value => {
1573 if let Some(mut value) = current_value.take() {
1574 value.value = std::mem::take(&mut text_content);
1575 if let Some(ref mut entry) = current_entry {
1576 entry.value = Some(value);
1577 } else if let Some(ref mut comment) = current_comment {
1578 comment.value = Some(value);
1579 }
1580 }
1581 in_value = false;
1582 }
1583 "reason" if in_reason => {
1584 if let Some(mut reason) = current_reason.take() {
1585 reason.value = std::mem::take(&mut text_content);
1586 if let Some(ref mut entry) = current_entry {
1587 entry.reason = Some(reason);
1588 }
1589 }
1590 in_reason = false;
1591 }
1592 _ => {}
1593 }
1594 }
1595 }
1596
1597 Ok(Event::Empty(ref e)) => {
1598 let name_bytes = e.local_name();
1599 if let Ok(name) = std::str::from_utf8(name_bytes.as_ref()) {
1600 match name {
1601 "state" if in_form => {
1602 let attrs = extract_attributes(e)?;
1603 let state = State::from_attributes(attrs)?;
1604 current_states.push(state);
1605 }
1606 "field" if in_category => {
1607 let attrs = extract_attributes(e)?;
1608 let field = Field::from_attributes(attrs)?;
1609 current_fields.push(field);
1610 }
1611 _ => {}
1612 }
1613 }
1614 }
1615
1616 _ => {}
1617 }
1618 buf.clear();
1619 }
1620
1621 current_user.ok_or_else(|| {
1622 Error::ParsingError(quick_xml::de::DeError::Custom(
1623 "No user element found".to_string(),
1624 ))
1625 })
1626}
1627
1628fn check_valid_xml_file(xml_path: &Path) -> Result<(), Error> {
1629 if !xml_path.exists() {
1630 return Err(Error::FileNotFound(xml_path.to_path_buf()));
1631 }
1632
1633 if let Some(extension) = xml_path.extension() {
1634 if extension != "xml" {
1635 return Err(Error::InvalidFileType(xml_path.to_owned()));
1636 }
1637 } else {
1638 return Err(Error::Unknown);
1639 }
1640
1641 Ok(())
1642}
1643
1644#[cfg(test)]
1645mod tests {
1646 use super::*;
1647 use tempfile::{tempdir, Builder};
1648
1649 #[test]
1650 fn test_site_file_not_found_error() {
1651 let dir = tempdir().unwrap().path().to_path_buf();
1652 let result = parse_site_native_file(&dir);
1653 assert!(result.is_err());
1654 assert!(matches!(result, Err(Error::FileNotFound(_))));
1655 }
1656
1657 #[test]
1658 fn test_site_invalid_file_type_error() {
1659 let file = Builder::new()
1660 .prefix("test")
1661 .suffix(".csv")
1662 .tempfile()
1663 .unwrap();
1664 let result = parse_site_native_file(file.path());
1665
1666 assert!(result.is_err());
1667 assert!(matches!(result, Err(Error::InvalidFileType(_))));
1668 }
1669
1670 #[test]
1671 fn test_subject_file_not_found_error() {
1672 let dir = tempdir().unwrap().path().to_path_buf();
1673 let result = parse_subject_native_file(&dir);
1674 assert!(result.is_err());
1675 assert!(matches!(result, Err(Error::FileNotFound(_))));
1676 }
1677
1678 #[test]
1679 fn test_subject_invalid_file_type_error() {
1680 let file = Builder::new()
1681 .prefix("test")
1682 .suffix(".csv")
1683 .tempfile()
1684 .unwrap();
1685 let result = parse_subject_native_file(file.path());
1686
1687 assert!(result.is_err());
1688 assert!(matches!(result, Err(Error::InvalidFileType(_))));
1689 }
1690
1691 #[test]
1692 fn test_user_file_not_found_error() {
1693 let dir = tempdir().unwrap().path().to_path_buf();
1694 let result = parse_user_native_file(&dir);
1695 assert!(result.is_err());
1696 assert!(matches!(result, Err(Error::FileNotFound(_))));
1697 }
1698
1699 #[test]
1700 fn test_user_invalid_file_type_error() {
1701 let file = Builder::new()
1702 .prefix("test")
1703 .suffix(".csv")
1704 .tempfile()
1705 .unwrap();
1706 let result = parse_user_native_file(file.path());
1707
1708 assert!(result.is_err());
1709 assert!(matches!(result, Err(Error::InvalidFileType(_))));
1710 }
1711
1712 #[test]
1713 fn test_forms_parsing_regression() {
1714 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1715<export_from_vision_EDC date="30-May-2024 10:35 -0500" createdBy="Test User" role="Project Manager" numberSubjectsProcessed="1">
1716 <patient patientId="TEST-001" uniqueId="123456789" whenCreated="2023-04-15 12:09:02 -0400" creator="Test User" siteName="Test Site" siteUniqueId="987654321" lastLanguage="English" numberOfForms="2">
1717 <form name="test.form.1" lastModified="2023-04-15 12:09:15 -0400" whoLastModifiedName="Test User" whoLastModifiedRole="Tester" whenCreated="123456789" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="Test Form 1" formIndex="1" formGroup="Test Group" formState="In-Work">
1718 <state value="form.state.in.work" signer="Test User - Tester" signerUniqueId="111111111" dateSigned="2023-04-15 12:09:02 -0400"/>
1719 <category name="Test Category" type="normal" highestIndex="0">
1720 <field name="test_field" type="text" dataType="string" errorCode="valid" whenCreated="2023-04-15 12:08:26 -0400" keepHistory="true">
1721 <entry id="1">
1722 <value by="Test User" byUniqueId="111111111" role="Tester" when="2023-04-15 12:09:02 -0400" xml:space="preserve">Test Value</value>
1723 </entry>
1724 </field>
1725 </category>
1726 </form>
1727 <form name="test.form.2" lastModified="2023-04-15 12:10:15 -0400" whoLastModifiedName="Test User" whoLastModifiedRole="Tester" whenCreated="123456790" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="Test Form 2" formIndex="2" formGroup="Test Group" formState="Complete">
1728 <state value="form.state.complete" signer="Test User - Tester" signerUniqueId="111111111" dateSigned="2023-04-15 12:10:02 -0400"/>
1729 </form>
1730 </patient>
1731</export_from_vision_EDC>"#;
1732
1733 let result = parse_subject_native_string(xml).expect("Should parse successfully");
1734
1735 assert_eq!(result.patients.len(), 1, "Should have exactly 1 patient");
1736
1737 let patient = &result.patients[0];
1738 assert_eq!(patient.patient_id, "TEST-001");
1739 assert_eq!(patient.number_of_forms, 2);
1740
1741 let forms = patient.forms.as_ref().expect("Patient should have forms");
1742 assert_eq!(forms.len(), 2, "Patient should have exactly 2 forms");
1743
1744 let form1 = &forms[0];
1745 assert_eq!(form1.name, "test.form.1");
1746 assert_eq!(form1.form_title, "Test Form 1");
1747 assert_eq!(form1.form_index, 1);
1748 assert_eq!(form1.form_state, "In-Work");
1749
1750 let states1 = form1.states.as_ref().expect("Form 1 should have states");
1751 assert_eq!(states1.len(), 1);
1752 assert_eq!(states1[0].value, "form.state.in.work");
1753
1754 let categories1 = form1
1755 .categories
1756 .as_ref()
1757 .expect("Form 1 should have categories");
1758 assert_eq!(categories1.len(), 1);
1759 assert_eq!(categories1[0].name, "Test Category");
1760
1761 let fields1 = categories1[0]
1762 .fields
1763 .as_ref()
1764 .expect("Category should have fields");
1765 assert_eq!(fields1.len(), 1);
1766 assert_eq!(fields1[0].name, "test_field");
1767
1768 let entries1 = fields1[0]
1769 .entries
1770 .as_ref()
1771 .expect("Field should have entries");
1772 assert_eq!(entries1.len(), 1);
1773 assert_eq!(entries1[0].entry_id, "1");
1774
1775 let value1 = entries1[0].value.as_ref().expect("Entry should have value");
1776 assert_eq!(value1.value, "Test Value");
1777 assert_eq!(value1.by, "Test User");
1778 assert_eq!(value1.role, "Tester");
1779
1780 let form2 = &forms[1];
1781 assert_eq!(form2.name, "test.form.2");
1782 assert_eq!(form2.form_title, "Test Form 2");
1783 assert_eq!(form2.form_index, 2);
1784 assert_eq!(form2.form_state, "Complete");
1785
1786 let states2 = form2.states.as_ref().expect("Form 2 should have states");
1787 assert_eq!(states2.len(), 1);
1788 assert_eq!(states2[0].value, "form.state.complete");
1789 }
1790
1791 #[test]
1792 fn test_comments_parsing_regression() {
1793 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1794<export_from_vision_EDC date="30-May-2024 10:35 -0500" createdBy="Test User" role="Project Manager" numberSubjectsProcessed="1">
1795 <patient patientId="TEST-002" uniqueId="123456790" whenCreated="2023-04-15 12:09:02 -0400" creator="Test User" siteName="Test Site" siteUniqueId="987654321" lastLanguage="English" numberOfForms="1">
1796 <form name="test.form.with.comments" lastModified="2023-04-15 12:09:15 -0400" whoLastModifiedName="Test User" whoLastModifiedRole="Tester" whenCreated="123456789" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="Test Form With Comments" formIndex="1" formGroup="Test Group" formState="In-Work">
1797 <category name="Test Category" type="normal" highestIndex="0">
1798 <field name="field_with_comments" type="text" dataType="string" errorCode="valid" whenCreated="2023-04-15 12:08:26 -0400" keepHistory="true">
1799 <entry id="1">
1800 <value by="Test User" byUniqueId="111111111" role="Tester" when="2023-04-15 12:09:02 -0400" xml:space="preserve">Field Value</value>
1801 </entry>
1802 <comment id="1">
1803 <value by="Test User" byUniqueId="111111111" role="Tester" when="2023-04-15 12:09:05 -0400" xml:space="preserve">First comment</value>
1804 </comment>
1805 <comment id="2">
1806 <value by="Another User" byUniqueId="222222222" role="Reviewer" when="2023-04-15 12:10:00 -0400" xml:space="preserve">Second comment</value>
1807 </comment>
1808 </field>
1809 <field name="field_without_comments" type="text" dataType="string" errorCode="valid" whenCreated="2023-04-15 12:08:30 -0400" keepHistory="true">
1810 <entry id="1">
1811 <value by="Test User" byUniqueId="111111111" role="Tester" when="2023-04-15 12:09:10 -0400" xml:space="preserve">Another Value</value>
1812 </entry>
1813 </field>
1814 </category>
1815 </form>
1816 </patient>
1817</export_from_vision_EDC>"#;
1818
1819 let result = parse_subject_native_string(xml).expect("Should parse successfully");
1820
1821 assert_eq!(result.patients.len(), 1, "Should have exactly 1 patient");
1822
1823 let patient = &result.patients[0];
1824 let forms = patient.forms.as_ref().expect("Patient should have forms");
1825 let form = &forms[0];
1826 let categories = form
1827 .categories
1828 .as_ref()
1829 .expect("Form should have categories");
1830 let fields = categories[0]
1831 .fields
1832 .as_ref()
1833 .expect("Category should have fields");
1834 assert_eq!(fields.len(), 2, "Should have 2 fields");
1835
1836 let field_with_comments = &fields[0];
1837 assert_eq!(field_with_comments.name, "field_with_comments");
1838
1839 let comments = field_with_comments
1840 .comments
1841 .as_ref()
1842 .expect("Field should have comments");
1843 assert_eq!(comments.len(), 2, "Should have exactly 2 comments");
1844
1845 let comment1 = &comments[0];
1846 assert_eq!(comment1.comment_id, "1");
1847 let comment1_value = comment1
1848 .value
1849 .as_ref()
1850 .expect("Comment 1 should have value");
1851 assert_eq!(comment1_value.value, "First comment");
1852 assert_eq!(comment1_value.by, "Test User");
1853 assert_eq!(comment1_value.role, "Tester");
1854
1855 let comment2 = &comments[1];
1856 assert_eq!(comment2.comment_id, "2");
1857 let comment2_value = comment2
1858 .value
1859 .as_ref()
1860 .expect("Comment 2 should have value");
1861 assert_eq!(comment2_value.value, "Second comment");
1862 assert_eq!(comment2_value.by, "Another User");
1863 assert_eq!(comment2_value.role, "Reviewer");
1864
1865 let field_without_comments = &fields[1];
1866 assert_eq!(field_without_comments.name, "field_without_comments");
1867 assert!(
1868 field_without_comments.comments.is_none(),
1869 "Field without comments should have no comments"
1870 );
1871 }
1872
1873 #[test]
1874 fn test_empty_forms_handling() {
1875 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1876<export_from_vision_EDC date="30-May-2024 10:35 -0500" createdBy="Test User" role="Project Manager" numberSubjectsProcessed="1">
1877 <patient patientId="TEST-003" uniqueId="123456791" whenCreated="2023-04-15 12:09:02 -0400" creator="Test User" siteName="Test Site" siteUniqueId="987654321" lastLanguage="English" numberOfForms="0">
1878 </patient>
1879</export_from_vision_EDC>"#;
1880
1881 let result = parse_subject_native_string(xml).expect("Should parse successfully");
1882
1883 assert_eq!(result.patients.len(), 1, "Should have exactly 1 patient");
1884
1885 let patient = &result.patients[0];
1886 assert_eq!(patient.patient_id, "TEST-003");
1887 assert_eq!(patient.number_of_forms, 0);
1888 assert!(
1889 patient.forms.is_none(),
1890 "Patient with 0 forms should have None for forms"
1891 );
1892 }
1893
1894 #[test]
1895 fn test_large_patient_forms_regression() {
1896 let mut xml = String::from(
1897 r#"<?xml version="1.0" encoding="UTF-8"?>
1898<export_from_vision_EDC date="30-May-2024 10:35 -0500" createdBy="Test User" role="Project Manager" numberSubjectsProcessed="1">
1899 <patient patientId="LARGE-TEST" uniqueId="123456792" whenCreated="2023-04-15 12:09:02 -0400" creator="Test User" siteName="Test Site" siteUniqueId="987654321" lastLanguage="English" numberOfForms="50">"#,
1900 );
1901
1902 for i in 1..=50 {
1903 xml.push_str(&format!(r#"
1904 <form name="test.form.{}" lastModified="2023-04-15 12:09:15 -0400" whoLastModifiedName="Test User" whoLastModifiedRole="Tester" whenCreated="12345678{}" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="Test Form {}" formIndex="{}" formGroup="Test Group" formState="In-Work">
1905 <state value="form.state.in.work" signer="Test User - Tester" signerUniqueId="111111111" dateSigned="2023-04-15 12:09:02 -0400"/>
1906 <category name="Category {}" type="normal" highestIndex="0">
1907 <field name="field_{}" type="text" dataType="string" errorCode="valid" whenCreated="2023-04-15 12:08:26 -0400" keepHistory="true">
1908 <entry id="1">
1909 <value by="Test User" byUniqueId="111111111" role="Tester" when="2023-04-15 12:09:02 -0400" xml:space="preserve">Value {}</value>
1910 </entry>
1911 <comment id="1">
1912 <value by="Test User" byUniqueId="111111111" role="Tester" when="2023-04-15 12:09:05 -0400" xml:space="preserve">Comment for form {}</value>
1913 </comment>
1914 </field>
1915 </category>
1916 </form>"#, i, i, i, i, i, i, i, i));
1917 }
1918
1919 xml.push_str(
1920 r#"
1921 </patient>
1922</export_from_vision_EDC>"#,
1923 );
1924
1925 let result =
1926 parse_subject_native_string(&xml).expect("Should parse large patient successfully");
1927
1928 assert_eq!(result.patients.len(), 1, "Should have exactly 1 patient");
1929
1930 let patient = &result.patients[0];
1931 assert_eq!(patient.patient_id, "LARGE-TEST");
1932 assert_eq!(patient.number_of_forms, 50);
1933
1934 let forms = patient.forms.as_ref().expect("Patient should have forms");
1935 assert_eq!(forms.len(), 50, "Patient should have exactly 50 forms");
1936
1937 for (i, form) in forms.iter().enumerate() {
1938 let form_num = i + 1;
1939 assert_eq!(form.name, format!("test.form.{}", form_num));
1940 assert_eq!(form.form_title, format!("Test Form {}", form_num));
1941 assert_eq!(form.form_index, form_num);
1942
1943 let categories = form
1944 .categories
1945 .as_ref()
1946 .expect("Form should have categories");
1947 assert_eq!(categories.len(), 1);
1948
1949 let fields = categories[0]
1950 .fields
1951 .as_ref()
1952 .expect("Category should have fields");
1953 assert_eq!(fields.len(), 1);
1954
1955 let entries = fields[0]
1956 .entries
1957 .as_ref()
1958 .expect("Field should have entries");
1959 assert_eq!(entries.len(), 1);
1960 assert_eq!(
1961 entries[0].value.as_ref().unwrap().value,
1962 format!("Value {}", form_num)
1963 );
1964
1965 let comments = fields[0]
1966 .comments
1967 .as_ref()
1968 .expect("Field should have comments");
1969 assert_eq!(comments.len(), 1);
1970 assert_eq!(
1971 comments[0].value.as_ref().unwrap().value,
1972 format!("Comment for form {}", form_num)
1973 );
1974 }
1975 }
1976
1977 #[test]
1978 fn test_malformed_datetime_handling() {
1979 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
1980<export_from_vision_EDC date="30-May-2024 10:35 -0500" createdBy="Test User" role="Project Manager" numberSubjectsProcessed="1">
1981 <patient patientId="TEST-004" uniqueId="123456793" whenCreated="" creator="Test User" siteName="Test Site" siteUniqueId="987654321" lastLanguage="English" numberOfForms="1">
1982 <form name="test.form.malformed.dates" lastModified="" whoLastModifiedName="Test User" whoLastModifiedRole="Tester" whenCreated="123456789" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="Test Form" formIndex="1" formGroup="Test Group" formState="In-Work">
1983 <category name="Test Category" type="normal" highestIndex="0">
1984 <field name="test_field" type="text" dataType="string" errorCode="valid" whenCreated="" keepHistory="true">
1985 <entry id="1">
1986 <value by="Test User" byUniqueId="111111111" role="Tester" when="2023-04-15 12:09:02 -0400" xml:space="preserve">Test Value</value>
1987 </entry>
1988 </field>
1989 </category>
1990 </form>
1991 </patient>
1992</export_from_vision_EDC>"#;
1993
1994 let result =
1995 parse_subject_native_string(xml).expect("Should handle malformed datetimes gracefully");
1996
1997 assert_eq!(result.patients.len(), 1, "Should have exactly 1 patient");
1998
1999 let patient = &result.patients[0];
2000 assert!(
2001 patient.when_created.is_none(),
2002 "Empty whenCreated should be None"
2003 );
2004
2005 let forms = patient.forms.as_ref().expect("Patient should have forms");
2006 let form = &forms[0];
2007 assert!(
2008 form.last_modified.is_none(),
2009 "Empty lastModified should be None"
2010 );
2011
2012 let categories = form
2013 .categories
2014 .as_ref()
2015 .expect("Form should have categories");
2016 let fields = categories[0]
2017 .fields
2018 .as_ref()
2019 .expect("Category should have fields");
2020 let field = &fields[0];
2021 assert!(
2022 field.when_created.is_none(),
2023 "Empty whenCreated in field should be None"
2024 );
2025 }
2026
2027 #[test]
2028 fn test_empty_datetime_in_value_and_reason() {
2029 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
2030<export_from_vision_EDC date="30-May-2024 10:35 -0500" createdBy="Test User" role="Project Manager" numberSubjectsProcessed="1">
2031 <patient patientId="TEST-001" uniqueId="123456" whenCreated="2023-04-15 12:09:02 -0400" creator="Test User" siteName="Test Site" siteUniqueId="654321" lastLanguage="" numberOfForms="1">
2032 <form name="test.form" lastModified="2023-04-15 12:09:15 -0400" whoLastModifiedName="Test User" whoLastModifiedRole="Manager" whenCreated="1681574905839" hasErrors="false" hasWarnings="false" locked="false" user="" dateTimeChanged="" formTitle="Test Form" formIndex="1" formGroup="Test" formState="In-Work">
2033 <state value="form.state.in.work" signer="Test User - Manager" signerUniqueId="123456" dateSigned="2023-04-15 12:09:02 -0400" />
2034 <category name="Test Category" type="normal" highestIndex="0">
2035 <field name="test_field" type="text" dataType="string" errorCode="valid" whenCreated="2023-04-15 12:08:26 -0400" keepHistory="true">
2036 <entry id="1">
2037 <value by="Test User" byUniqueId="123456" role="Manager" when="" xml:space="preserve">Test Value</value>
2038 <reason by="Test User" byUniqueId="123456" role="Manager" when="" xml:space="preserve">Test Reason</reason>
2039 </entry>
2040 </field>
2041 </category>
2042 </form>
2043 </patient>
2044</export_from_vision_EDC>"#;
2045
2046 let result = parse_subject_native_string(xml);
2047 assert!(result.is_ok(), "Should parse successfully: {:?}", result);
2048
2049 let native = result.unwrap();
2050 assert_eq!(native.patients.len(), 1, "Should have 1 patient");
2051
2052 let patient = &native.patients[0];
2053 let forms = patient.forms.as_ref().expect("Patient should have forms");
2054 let form = &forms[0];
2055 let categories = form
2056 .categories
2057 .as_ref()
2058 .expect("Form should have categories");
2059 let fields = categories[0]
2060 .fields
2061 .as_ref()
2062 .expect("Category should have fields");
2063 let field = &fields[0];
2064 let entries = field.entries.as_ref().expect("Field should have entries");
2065 let entry = &entries[0];
2066
2067 let value = entry.value.as_ref().expect("Entry should have value");
2068 assert!(
2069 value.when.is_none(),
2070 "Empty when attribute in value should be None"
2071 );
2072 assert_eq!(value.value, "Test Value");
2073
2074 let reason = entry.reason.as_ref().expect("Entry should have reason");
2075 assert!(
2076 reason.when.is_none(),
2077 "Empty when attribute in reason should be None"
2078 );
2079 assert_eq!(reason.value, "Test Reason");
2080 }
2081}