parse_sap_atom_feed/xml/
mod.rs

1use regex::Regex;
2
3static ISO_LANGUAGE_ENGLISH: &'static str = "en";
4static XML_NAMESPACE_MS_DATA_SERVICES: &'static str =
5    "http://schemas.microsoft.com/ado/2007/08/dataservices";
6static XML_NAMESPACE_MS_DATA_SERVICES_METADATA: &'static str =
7    "http://schemas.microsoft.com/ado/2007/08/dataservices/metadata";
8static XML_NAMESPACE_MS_DATA_SERVICES_SCHEME: &'static str =
9    "http://schemas.microsoft.com/ado/2007/08/dataservices/scheme";
10static XML_NAMESPACE_MS_EDM: &'static str = "http://schemas.microsoft.com/ado/2008/09/edm";
11static XML_NAMESPACE_MS_EDMX: &'static str = "http://schemas.microsoft.com/ado/2007/06/edmx";
12static XML_NAMESPACE_OASIS_ODATA_EDM: &'static str = "http://docs.oasis-open.org/odata/ns/edm";
13static XML_NAMESPACE_SAP_DATA: &'static str = "http://www.sap.com/Protocols/SAPData";
14static XML_NAMESPACE_W3_APP: &'static str = "http://www.w3.org/2007/app";
15static XML_NAMESPACE_W3_ATOM: &'static str = "http://www.w3.org/2005/Atom";
16
17// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
18// XML Defaults
19// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
20pub fn default_true() -> bool {
21    true
22}
23pub fn default_false() -> bool {
24    false
25}
26pub fn default_xml_language() -> String {
27    ISO_LANGUAGE_ENGLISH.to_string()
28}
29pub fn default_xml_namespace() -> String {
30    XML_NAMESPACE_MS_EDM.to_string()
31}
32pub fn default_xml_namespace_app() -> String {
33    XML_NAMESPACE_W3_APP.to_string()
34}
35pub fn default_xml_namespace_atom() -> Option<String> {
36    Some(XML_NAMESPACE_W3_ATOM.to_string())
37}
38pub fn default_xml_namespace_edmx() -> String {
39    XML_NAMESPACE_MS_EDMX.to_string()
40}
41pub fn default_xml_namespace_d() -> String {
42    XML_NAMESPACE_MS_DATA_SERVICES.to_string()
43}
44pub fn default_xml_namespace_m() -> String {
45    XML_NAMESPACE_MS_DATA_SERVICES_METADATA.to_string()
46}
47pub fn default_xml_namespace_oasis() -> String {
48    XML_NAMESPACE_OASIS_ODATA_EDM.to_string()
49}
50pub fn default_xml_namespace_sap() -> String {
51    XML_NAMESPACE_SAP_DATA.to_string()
52}
53pub fn default_xml_data_services_scheme() -> String {
54    XML_NAMESPACE_MS_DATA_SERVICES_SCHEME.to_string()
55}
56
57/// # CORRECT FORMATTING ERRORS IN RAW XML
58///
59/// When reading certain entity sets from SAP's demo OData service `GWSAMPLE_BASIC`, various formatting errors have been
60/// noticed that will cause an XML parser to throw its toys out of the pram.
61///
62/// Whether these errors also occur in other SAP-delivered OData services has not been determined; however, coding is
63/// included here to correct those errors detected so far in GWSAMPLE_BASIC
64///
65/// 1. Correct potentially invalid `m:etag` attribute values on an `<entry>` tag:
66///
67///    ```xml
68///    <entry m:etag="W/"datetime'2023-08-31T01%3A00%3A06.0000000'"">
69///    ```
70///
71///    Is corrected to:
72///
73///    ```xml
74///    <entry m:etag="datetime'2023-08-31T01%3A00%3A06.0000000'">
75///    ```
76///
77/// 1. Entity set content properties containing text descriptions are not enclosed in double quotes, neither is it the
78///    convention to escape or character encode special characters.
79///    E.G.:
80///
81///    ```xml
82///    <d:Category>PDAs & Organizers</d:Category>
83///    ```
84///
85///    Is corrected to:
86///
87///    ```xml
88///    <d:Category>PDAs &amp; Organizers</d:Category>
89///    ```
90pub fn sanitise_xml(xml: String) -> String {
91    let clean_xml = sanitise_bad_etags(xml);
92
93    sanitise_naked_ampersand(clean_xml)
94}
95
96fn sanitise_bad_etags(xml: String) -> String {
97    if xml.contains("entry m:etag=\"W/\"") || xml.contains("entry m:etag=\"W/&quot;") {
98        let mut clean_xml = xml.replace("m:etag=\"W/\"", "m:etag=\"");
99        clean_xml = clean_xml.replace("m:etag=\"W/&quot;", "m:etag=\"");
100        clean_xml = clean_xml.replace("'\"\">", "'\">");
101        clean_xml = clean_xml.replace("'&quot;\">", "'\">");
102        clean_xml
103    } else {
104        xml
105    }
106}
107
108/// Naked ampersand characters might occur in OData properties containing text descriptions.
109/// E.G.:
110///
111/// `<d:Category>PDAs & Organizers</d:Category>`
112///
113/// Such characters must be replaced with the character encoding `&amp;`
114///
115/// First, search for ampersands with non-whitespace characters immediately before and after,
116/// then search for ampersand characters with a space on either side.
117///
118/// This functionality assumes that the character string `&amp;` does not occur in the XML
119fn sanitise_naked_ampersand(xml: String) -> String {
120    let re = Regex::new(r"(\S)&(\S)").unwrap();
121    let clean_xml = re.replace_all(&xml, "$1&amp;$2");
122
123    clean_xml.replace(" & ", " &amp; ")
124}
125
126// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
127#[cfg(test)]
128pub mod unit_tests;