#![deny(missing_docs)]
#![deny(unsafe_code)]
#![warn(rust_2018_idioms)]
use md5::{Digest, Md5};
use std::fmt;
use std::fs;
use std::io;
use std::path::Path;
pub const TISS_NAMESPACE: &str = "http://www.ans.gov.br/padroes/tiss/schemas";
#[derive(Debug)]
pub enum TissHashError {
InvalidXml(String),
Io(io::Error),
}
impl fmt::Display for TissHashError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::InvalidXml(msg) => write!(f, "XML inválido para hash TISS: {msg}"),
Self::Io(err) => write!(f, "erro de I/O ao ler XML TISS: {err}"),
}
}
}
impl std::error::Error for TissHashError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Self::InvalidXml(_) => None,
Self::Io(err) => Some(err),
}
}
}
impl From<io::Error> for TissHashError {
fn from(err: io::Error) -> Self {
Self::Io(err)
}
}
impl From<roxmltree::Error> for TissHashError {
fn from(err: roxmltree::Error) -> Self {
Self::InvalidXml(err.to_string())
}
}
pub fn hash_tiss(xml: &[u8]) -> Result<String, TissHashError> {
if let Some(enc) = detect_unsupported_bom(xml) {
return Err(TissHashError::InvalidXml(format!(
"encoding {enc} fora de escopo (suportado: ISO-8859-1, UTF-8)"
)));
}
let utf8 = decode_to_utf8(xml);
let opts = roxmltree::ParsingOptions {
allow_dtd: true,
..Default::default()
};
let doc = roxmltree::Document::parse_with_options(&utf8, opts)?;
let root = doc.root_element();
let hash_node_id = find_hash_node(root)?;
let mut buf = String::new();
for node in root.descendants() {
if !is_leaf_for_hash(node) {
continue;
}
if Some(node.id()) == hash_node_id {
continue;
}
if let Some(t) = node.text() {
buf.push_str(t);
}
}
let mut hasher = Md5::new();
hasher.update(buf.as_bytes());
let digest = hasher.finalize();
Ok(hex_lower(&digest))
}
pub fn hash_tiss_file<P: AsRef<Path>>(path: P) -> Result<String, TissHashError> {
let raw = fs::read(path)?;
hash_tiss(&raw)
}
fn is_leaf_for_hash(n: roxmltree::Node<'_, '_>) -> bool {
if !(n.is_element() || n.is_comment()) {
return false;
}
!n.children()
.any(|c| c.is_element() || c.is_comment() || c.is_pi())
}
fn find_hash_node(
root: roxmltree::Node<'_, '_>,
) -> Result<Option<roxmltree::NodeId>, TissHashError> {
let mut found: Option<roxmltree::NodeId> = None;
let mut count = 0usize;
for n in root.descendants() {
if n.is_element()
&& n.tag_name().name() == "hash"
&& n.tag_name().namespace() == Some(TISS_NAMESPACE)
{
count += 1;
if found.is_none() {
found = Some(n.id());
}
}
}
if count > 1 {
return Err(TissHashError::InvalidXml(format!(
"múltiplos elementos <hash> do namespace TISS (encontrados {count}, esperado no máximo 1)"
)));
}
Ok(found)
}
fn detect_unsupported_bom(raw: &[u8]) -> Option<&'static str> {
if raw.starts_with(&[0xFF, 0xFE, 0x00, 0x00]) || raw.starts_with(&[0x00, 0x00, 0xFE, 0xFF]) {
Some("UTF-32")
} else if raw.starts_with(&[0xFF, 0xFE]) || raw.starts_with(&[0xFE, 0xFF]) {
Some("UTF-16")
} else {
None
}
}
fn decode_to_utf8(raw: &[u8]) -> String {
let bytes = if raw.starts_with(&[0xEF, 0xBB, 0xBF]) {
&raw[3..]
} else {
raw
};
let head_len = bytes.len().min(200);
let head_lower: String = bytes[..head_len]
.iter()
.map(|&b| b.to_ascii_lowercase() as char)
.collect();
let is_iso = head_lower.contains("encoding=\"iso-8859-1\"")
|| head_lower.contains("encoding='iso-8859-1'");
if is_iso {
let s: String = bytes.iter().map(|&b| b as char).collect();
s.replacen("encoding='iso-8859-1'", "encoding='utf-8'", 1)
.replacen("encoding=\"iso-8859-1\"", "encoding=\"utf-8\"", 1)
} else {
String::from_utf8_lossy(bytes).into_owned()
}
}
fn hex_lower(digest: &[u8]) -> String {
const HEX: &[u8; 16] = b"0123456789abcdef";
let mut out = String::with_capacity(digest.len() * 2);
for &b in digest {
out.push(HEX[(b >> 4) as usize] as char);
out.push(HEX[(b & 0x0F) as usize] as char);
}
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn hex_lower_zera() {
assert_eq!(hex_lower(&[0u8; 16]), "00000000000000000000000000000000");
}
#[test]
fn hex_lower_ff() {
assert_eq!(hex_lower(&[0xFFu8; 16]), "ffffffffffffffffffffffffffffffff");
}
#[test]
fn hex_lower_mix() {
assert_eq!(hex_lower(&[0xDE, 0xAD, 0xBE, 0xEF]), "deadbeef");
}
#[test]
fn md5_string_vazia() {
let mut h = Md5::new();
h.update(b"");
assert_eq!(hex_lower(&h.finalize()), "d41d8cd98f00b204e9800998ecf8427e");
}
#[test]
fn decode_utf8_strippa_bom() {
let raw = b"\xEF\xBB\xBF<?xml version='1.0' encoding='utf-8'?><a/>";
let s = decode_to_utf8(raw);
assert!(s.starts_with("<?xml"));
}
#[test]
fn decode_iso_reescreve_decl() {
let mut raw: Vec<u8> = b"<?xml version='1.0' encoding='iso-8859-1'?><a>".to_vec();
raw.push(0xC9); raw.extend_from_slice(b"</a>");
let s = decode_to_utf8(&raw);
assert!(s.contains("encoding='utf-8'"));
assert!(s.contains('É')); }
#[test]
fn xml_invalido_retorna_erro() {
let r = hash_tiss(b"<no-encoding><sem-fechar>");
assert!(matches!(r, Err(TissHashError::InvalidXml(_))));
}
#[test]
fn multiplos_hash_rejeitado() {
let xml = b"<?xml version='1.0' encoding='utf-8'?>\
<ans:mensagemTISS xmlns:ans=\"http://www.ans.gov.br/padroes/tiss/schemas\">\
<ans:epilogo><ans:hash>A</ans:hash><ans:hash>B</ans:hash></ans:epilogo>\
</ans:mensagemTISS>";
assert!(matches!(hash_tiss(xml), Err(TissHashError::InvalidXml(_))));
}
#[test]
fn um_hash_aceito() {
let xml = b"<?xml version='1.0' encoding='utf-8'?>\
<ans:mensagemTISS xmlns:ans=\"http://www.ans.gov.br/padroes/tiss/schemas\">\
<ans:epilogo><ans:hash>X</ans:hash></ans:epilogo>\
</ans:mensagemTISS>";
assert!(hash_tiss(xml).is_ok());
}
#[test]
fn sem_hash_aceito() {
let xml = b"<?xml version='1.0' encoding='utf-8'?>\
<ans:mensagemTISS xmlns:ans=\"http://www.ans.gov.br/padroes/tiss/schemas\">\
<ans:guia><ans:valor>42</ans:valor></ans:guia>\
</ans:mensagemTISS>";
assert!(hash_tiss(xml).is_ok());
}
#[test]
fn bom_utf16_le_rejeitado() {
assert_eq!(
detect_unsupported_bom(&[0xFF, 0xFE, 0x3C, 0x00]),
Some("UTF-16")
);
let r = hash_tiss(&[0xFF, 0xFE, 0x3C, 0x00]);
assert!(matches!(r, Err(TissHashError::InvalidXml(_))));
}
#[test]
fn bom_utf16_be_rejeitado() {
assert_eq!(
detect_unsupported_bom(&[0xFE, 0xFF, 0x00, 0x3C]),
Some("UTF-16")
);
let r = hash_tiss(&[0xFE, 0xFF, 0x00, 0x3C]);
assert!(matches!(r, Err(TissHashError::InvalidXml(_))));
}
#[test]
fn bom_utf32_le_rejeitado() {
assert_eq!(
detect_unsupported_bom(&[0xFF, 0xFE, 0x00, 0x00]),
Some("UTF-32")
);
}
#[test]
fn bom_utf32_be_rejeitado() {
assert_eq!(
detect_unsupported_bom(&[0x00, 0x00, 0xFE, 0xFF]),
Some("UTF-32")
);
}
#[test]
fn sem_bom_passa() {
assert_eq!(detect_unsupported_bom(&[0xEF, 0xBB, 0xBF, 0x3C]), None);
assert_eq!(detect_unsupported_bom(b"<a/>"), None);
}
#[test]
fn hash_mensagem_minima_inline() {
let xml = b"<?xml version='1.0' encoding='utf-8'?>\
<ans:mensagemTISS xmlns:ans=\"http://www.ans.gov.br/padroes/tiss/schemas\">\
<ans:epilogo><ans:hash>QUALQUER</ans:hash></ans:epilogo>\
</ans:mensagemTISS>";
let h = hash_tiss(xml).unwrap();
assert_eq!(h, "d41d8cd98f00b204e9800998ecf8427e");
}
}