1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
use regex::Regex;
pub static XML_DECLARATION: &[u8] = "<?xml version=\"1.0\" encoding=\"utf-8\"?>".as_bytes();
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// XML Defaults
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
pub fn default_true() -> bool {
true
}
pub fn default_false() -> bool {
false
}
pub fn default_xml_language() -> String {
"en".to_string()
}
pub fn default_xml_namespace() -> String {
"http://schemas.microsoft.com/ado/2008/09/edm".to_string()
}
pub fn default_xml_namespace_atom() -> Option<String> {
Some("http://www.w3.org/2005/Atom".to_string())
}
pub fn default_xml_namespace_edmx() -> String {
"http://schemas.microsoft.com/ado/2007/06/edmx".to_string()
}
pub fn default_xml_namespace_d() -> String {
"http://schemas.microsoft.com/ado/2007/08/dataservices".to_string()
}
pub fn default_xml_namespace_m() -> String {
"http://schemas.microsoft.com/ado/2007/08/dataservices/metadata".to_string()
}
pub fn default_xml_namespace_oasis() -> String {
"http://docs.oasis-open.org/odata/ns/edm".to_string()
}
pub fn default_xml_namespace_sap() -> String {
"http://www.sap.com/Protocols/SAPData".to_string()
}
/// # CORRECT FORMATTING ERRORS IN RAW XML
///
/// When reading certain entity sets from SAP's demo OData service `GWSAMPLE_BASIC`, various formatting errors have been
/// noticed that will cause an XML parser to throw its toys out of the pram.
///
/// Whether these errors alsop occur in other SAP-delivered Odata services has not been determined; however, coding is
/// included here to correct those errors detected so far in GWSAMPLE_BASIC
///
/// 1. Correct potentially invalid `m:etag` attribute values on an `<entry>` tag:
///
/// ```xml
/// <entry m:etag="W/"datetime'2023-08-31T01%3A00%3A06.0000000'"">
/// ```
///
/// Is corrected to:
///
/// ```xml
/// <entry m:etag="datetime'2023-08-31T01%3A00%3A06.0000000'">
/// ```
///
/// 1. Entity set content properties containing text descriptions are not enclosed in double quotes, neither is it the
/// convemtion to escape or character encode special characters.
/// E.G.:
///
/// ```xml
/// <d:Category>PDAs & Organizers</d:Category>
/// ```
///
/// Is corrected to:
///
/// ```xml
/// <d:Category>PDAs & Organizers</d:Category>
/// ```
pub fn sanitise_xml(xml: String) -> String {
let clean_xml = sanitise_bad_etags(xml);
sanitise_naked_ampersand(clean_xml)
}
fn sanitise_bad_etags(xml: String) -> String {
if xml.contains("entry m:etag=\"W/\"") || xml.contains("entry m:etag=\"W/"") {
let mut clean_xml = xml.replace("m:etag=\"W/\"", "m:etag=\"");
clean_xml = clean_xml.replace("m:etag=\"W/"", "m:etag=\"");
clean_xml = clean_xml.replace("'\"\">", "'\">");
clean_xml = clean_xml.replace("'"\">", "'\">");
clean_xml
} else {
xml
}
}
/// Naked ampersand characters might occur in OData properties containing text descriptions.
/// E.G.:
///
/// ```
/// <d:Category>PDAs & Organizers</d:Category>
/// ```
///
/// Such characters must be replaced with the character encoding `&`
///
/// First, search for ampersands with non-whitespace characters immediately before and after,
/// then search for ampersand characters with a space on either side.
///
/// This functionality assumes that the character string `&` does not occur in the XML
fn sanitise_naked_ampersand(xml: String) -> String {
let re = Regex::new(r"(\S)\&(\S)").unwrap();
let clean_xml = re.replace_all(&xml, "$1&$2");
clean_xml.replace(" & ", " & ")
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#[cfg(test)]
pub mod unit_tests;