1use crate::constants::{ATTR_INFO, ATTR_VALUE_THRESHOLD, ELEMENT_NAME_INFO, TEXT_THRESHOLD};
7use md5::{Digest, Md5};
8use std::collections::HashMap;
9
10use super::namespace::ExpandedName;
11
12#[derive(Debug, Clone)]
14pub enum XmlContent {
15 Element(XmlElement),
17 Text(XmlText),
19 Comment(XmlComment),
21 ProcessingInstruction(XmlProcessingInstruction),
23}
24
25impl XmlContent {
26 pub fn info_size(&self) -> i32 {
30 match self {
31 XmlContent::Element(e) => e.info_size,
32 XmlContent::Text(t) => t.info_size,
33 XmlContent::Comment(c) => c.info_size,
34 XmlContent::ProcessingInstruction(pi) => pi.info_size,
35 }
36 }
37
38 pub fn content_equals(&self, other: &XmlContent) -> bool {
40 match (self, other) {
41 (XmlContent::Element(a), XmlContent::Element(b)) => a.content_equals(b),
42 (XmlContent::Text(a), XmlContent::Text(b)) => a.content_equals(b),
43 (XmlContent::Comment(a), XmlContent::Comment(b)) => a.content_equals(b),
44 (XmlContent::ProcessingInstruction(a), XmlContent::ProcessingInstruction(b)) => {
45 a.content_equals(b)
46 }
47 _ => false,
48 }
49 }
50
51 pub fn content_hash(&self) -> i32 {
55 match self {
56 XmlContent::Element(e) => e.content_hash(),
57 XmlContent::Text(t) => t.content_hash(),
58 XmlContent::Comment(c) => c.content_hash(),
59 XmlContent::ProcessingInstruction(pi) => pi.content_hash(),
60 }
61 }
62
63 pub fn is_element(&self) -> bool {
65 matches!(self, XmlContent::Element(_))
66 }
67
68 pub fn is_text(&self) -> bool {
70 matches!(self, XmlContent::Text(_))
71 }
72
73 pub fn is_comment(&self) -> bool {
75 matches!(self, XmlContent::Comment(_))
76 }
77
78 pub fn is_processing_instruction(&self) -> bool {
80 matches!(self, XmlContent::ProcessingInstruction(_))
81 }
82
83 pub fn as_element(&self) -> Option<&XmlElement> {
85 match self {
86 XmlContent::Element(e) => Some(e),
87 _ => None,
88 }
89 }
90
91 pub fn as_element_mut(&mut self) -> Option<&mut XmlElement> {
93 match self {
94 XmlContent::Element(e) => Some(e),
95 _ => None,
96 }
97 }
98
99 pub fn as_text(&self) -> Option<&XmlText> {
101 match self {
102 XmlContent::Text(t) => Some(t),
103 _ => None,
104 }
105 }
106
107 pub fn as_text_mut(&mut self) -> Option<&mut XmlText> {
109 match self {
110 XmlContent::Text(t) => Some(t),
111 _ => None,
112 }
113 }
114
115 pub fn as_processing_instruction(&self) -> Option<&XmlProcessingInstruction> {
117 match self {
118 XmlContent::ProcessingInstruction(pi) => Some(pi),
119 _ => None,
120 }
121 }
122}
123
124fn calculate_hash_chars(data: &[char]) -> [u8; 16] {
132 let mut hasher = Md5::new();
133 for &c in data {
134 let code = c as u16;
135 hasher.update([(code & 0xff) as u8, (code >> 8) as u8]);
136 }
137 hasher.finalize().into()
138}
139
140fn calculate_hash_str(data: &str) -> [u8; 16] {
142 let mut hasher = Md5::new();
143 for code in data.encode_utf16() {
145 hasher.update([(code & 0xff) as u8, (code >> 8) as u8]);
146 }
147 hasher.finalize().into()
148}
149
150fn hash_to_i32(hash: &[u8; 16]) -> i32 {
159 let b0 = hash[0] as i8 as i32;
161 let b1 = hash[1] as i8 as i32;
162 let b2 = hash[2] as i8 as i32;
163 let b3 = hash[3] as i8 as i32;
164 b0 + (b1 << 8) + (b2 << 16) + (b3 << 24)
165}
166
167#[derive(Debug, Clone)]
169pub struct XmlElement {
170 name: String,
172 expanded_name: Option<ExpandedName>,
174 namespace_decls: HashMap<String, String>,
176 attributes: HashMap<String, String>,
178 name_hash: i32,
180 attr_hash: [u8; 16],
182 info_size: i32,
184}
185
186impl XmlElement {
187 pub fn new(name: String, attributes: HashMap<String, String>) -> Self {
189 Self::new_with_namespace(name, None, HashMap::new(), attributes)
190 }
191
192 pub fn new_with_namespace(
194 name: String,
195 expanded_name: Option<ExpandedName>,
196 namespace_decls: HashMap<String, String>,
197 attributes: HashMap<String, String>,
198 ) -> Self {
199 let mut element = XmlElement {
200 name,
201 expanded_name,
202 namespace_decls,
203 attributes,
204 name_hash: 0,
205 attr_hash: [0; 16],
206 info_size: 0,
207 };
208 element.rehash();
209 element
210 }
211
212 pub fn rehash(&mut self) {
216 self.name_hash = java_string_hash(&self.name);
217 self.info_size = ELEMENT_NAME_INFO;
218
219 let mut hasher = Md5::new();
220
221 let mut attr_names: Vec<&String> = self.attributes.keys().collect();
226 attr_names.sort();
227
228 for attr_name in attr_names {
229 let attr_value = &self.attributes[attr_name];
230 let vsize = attr_value.chars().count() as i32;
231 self.info_size += ATTR_INFO
232 + if vsize > ATTR_VALUE_THRESHOLD {
233 vsize - ATTR_VALUE_THRESHOLD
234 } else {
235 1
236 };
237 hasher.update(calculate_hash_str(attr_name));
238 hasher.update(calculate_hash_str(attr_value));
239 }
240
241 self.attr_hash = hasher.finalize().into();
242 }
243
244 pub fn qname(&self) -> &str {
246 &self.name
247 }
248
249 pub fn set_qname(&mut self, name: String) {
254 self.name = name;
255 }
256
257 pub fn attributes(&self) -> &HashMap<String, String> {
259 &self.attributes
260 }
261
262 pub fn attributes_mut(&mut self) -> &mut HashMap<String, String> {
267 &mut self.attributes
268 }
269
270 pub fn set_attributes(&mut self, attributes: HashMap<String, String>) {
275 self.attributes = attributes;
276 }
277
278 pub fn expanded_name(&self) -> Option<&ExpandedName> {
280 self.expanded_name.as_ref()
281 }
282
283 pub fn namespace_decls(&self) -> &HashMap<String, String> {
285 &self.namespace_decls
286 }
287
288 pub fn names_match(&self, other: &XmlElement) -> bool {
291 match (&self.expanded_name, &other.expanded_name) {
292 (Some(a), Some(b)) => a == b,
293 (None, None) => self.name == other.name,
294 _ => false,
295 }
296 }
297
298 pub fn content_equals(&self, other: &XmlElement) -> bool {
303 self.name_hash == other.name_hash && self.attr_hash == other.attr_hash
304 }
305
306 pub fn namespace_decls_equal(&self, other: &XmlElement) -> bool {
308 self.namespace_decls == other.namespace_decls
309 }
310
311 pub fn content_hash(&self) -> i32 {
313 hash_to_i32(&self.attr_hash) ^ self.name_hash
314 }
315
316 pub fn info_size(&self) -> i32 {
318 self.info_size
319 }
320}
321
322impl std::fmt::Display for XmlElement {
323 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
324 write!(f, "{} {{", self.name)?;
325 let mut first = true;
326 let mut attr_names: Vec<&String> = self.attributes.keys().collect();
328 attr_names.sort();
329 for name in attr_names {
330 if !first {
331 write!(f, " ")?;
332 }
333 first = false;
334 write!(f, " {}={}", name, self.attributes[name])?;
335 }
336 write!(f, "}}")
337 }
338}
339
340#[derive(Debug, Clone)]
342pub struct XmlText {
343 text: Vec<char>,
345 content_hash: [u8; 16],
347 info_size: i32,
349}
350
351impl XmlText {
352 pub fn new(text: &str) -> Self {
354 let chars: Vec<char> = text.chars().collect();
355 Self::from_chars(chars)
356 }
357
358 pub fn from_chars(text: Vec<char>) -> Self {
360 let content_hash = calculate_hash_chars(&text);
361 let len = text.len() as i32;
362 let info_size = if len > TEXT_THRESHOLD {
363 len - TEXT_THRESHOLD
364 } else {
365 1
366 };
367 XmlText {
368 text,
369 content_hash,
370 info_size,
371 }
372 }
373
374 pub fn from_char_slice(text: &[char], start: usize, length: usize) -> Self {
376 let chars: Vec<char> = text[start..start + length].to_vec();
377 Self::from_chars(chars)
378 }
379
380 pub fn content_equals(&self, other: &XmlText) -> bool {
382 self.content_hash == other.content_hash
383 }
384
385 pub fn text(&self) -> &[char] {
387 &self.text
388 }
389
390 pub fn set_text(&mut self, text: Vec<char>) {
394 self.content_hash = calculate_hash_chars(&text);
395 let len = text.len() as i32;
396 self.info_size = if len > TEXT_THRESHOLD {
397 len - TEXT_THRESHOLD
398 } else {
399 1
400 };
401 self.text = text;
402 }
403
404 pub fn content_hash(&self) -> i32 {
406 hash_to_i32(&self.content_hash)
407 }
408
409 pub fn info_size(&self) -> i32 {
411 self.info_size
412 }
413}
414
415impl std::fmt::Display for XmlText {
416 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
417 let s: String = self.text.iter().collect();
418 write!(f, "{}", s)
419 }
420}
421
422#[derive(Debug, Clone)]
424pub struct XmlComment {
425 text: Vec<char>,
427 content_hash: [u8; 16],
429 info_size: i32,
431}
432
433impl XmlComment {
434 pub fn new(text: &str) -> Self {
436 let chars: Vec<char> = text.chars().collect();
437 Self::from_chars(chars)
438 }
439
440 pub fn from_chars(text: Vec<char>) -> Self {
442 let content_hash = calculate_hash_chars(&text);
443 let info_size = 1;
445 XmlComment {
446 text,
447 content_hash,
448 info_size,
449 }
450 }
451
452 pub fn content_equals(&self, other: &XmlComment) -> bool {
454 self.content_hash == other.content_hash
455 }
456
457 pub fn text(&self) -> &[char] {
459 &self.text
460 }
461
462 pub fn set_text(&mut self, text: Vec<char>) {
464 self.content_hash = calculate_hash_chars(&text);
465 self.text = text;
466 }
467
468 pub fn content_hash(&self) -> i32 {
470 hash_to_i32(&self.content_hash)
471 }
472
473 pub fn info_size(&self) -> i32 {
475 self.info_size
476 }
477}
478
479impl std::fmt::Display for XmlComment {
480 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
481 let s: String = self.text.iter().collect();
482 write!(f, "<!-- {} -->", s)
483 }
484}
485
486#[derive(Debug, Clone)]
488pub struct XmlProcessingInstruction {
489 target: String,
491 content: String,
493 content_hash: [u8; 16],
495 info_size: i32,
497}
498
499impl XmlProcessingInstruction {
500 pub fn new(target: &str, content: &str) -> Self {
502 let content_hash = Self::calculate_hash(target, content);
503 XmlProcessingInstruction {
504 target: target.to_string(),
505 content: content.to_string(),
506 content_hash,
507 info_size: 1,
508 }
509 }
510
511 fn calculate_hash(target: &str, content: &str) -> [u8; 16] {
512 use md5::{Digest, Md5};
513 let mut hasher = Md5::new();
514 for code in target.encode_utf16() {
515 hasher.update([(code & 0xff) as u8, (code >> 8) as u8]);
516 }
517 for code in content.encode_utf16() {
518 hasher.update([(code & 0xff) as u8, (code >> 8) as u8]);
519 }
520 hasher.finalize().into()
521 }
522
523 pub fn content_equals(&self, other: &XmlProcessingInstruction) -> bool {
525 self.content_hash == other.content_hash
526 }
527
528 pub fn target(&self) -> &str {
530 &self.target
531 }
532
533 pub fn content(&self) -> &str {
535 &self.content
536 }
537
538 pub fn content_hash(&self) -> i32 {
540 hash_to_i32(&self.content_hash)
541 }
542
543 pub fn info_size(&self) -> i32 {
545 self.info_size
546 }
547}
548
549impl std::fmt::Display for XmlProcessingInstruction {
550 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
551 if self.content.is_empty() {
552 write!(f, "<?{}?>", self.target)
553 } else {
554 write!(f, "<?{} {}?>", self.target, self.content)
555 }
556 }
557}
558
559pub fn java_string_hash(s: &str) -> i32 {
566 let mut hash: i32 = 0;
567 for code in s.encode_utf16() {
568 hash = hash.wrapping_mul(31).wrapping_add(code as i32);
569 }
570 hash
571}
572
573#[cfg(test)]
574mod tests {
575 use super::*;
576
577 #[test]
578 fn test_java_string_hash() {
579 assert_eq!(java_string_hash(""), 0);
581 assert_eq!(java_string_hash("a"), 97);
582 assert_eq!(java_string_hash("ab"), 97 * 31 + 98);
583 assert_eq!(java_string_hash("hello"), 99162322);
584 }
585
586 #[test]
587 fn test_text_node_equality() {
588 let t1 = XmlText::new("hello world");
589 let t2 = XmlText::new("hello world");
590 let t3 = XmlText::new("hello world!");
591
592 assert!(t1.content_equals(&t2));
593 assert!(!t1.content_equals(&t3));
594 }
595
596 #[test]
597 fn test_element_equality() {
598 let mut attrs1 = HashMap::new();
599 attrs1.insert("id".to_string(), "foo".to_string());
600
601 let mut attrs2 = HashMap::new();
602 attrs2.insert("id".to_string(), "foo".to_string());
603
604 let mut attrs3 = HashMap::new();
605 attrs3.insert("id".to_string(), "bar".to_string());
606
607 let e1 = XmlElement::new("div".to_string(), attrs1);
608 let e2 = XmlElement::new("div".to_string(), attrs2);
609 let e3 = XmlElement::new("div".to_string(), attrs3);
610 let e4 = XmlElement::new("span".to_string(), HashMap::new());
611
612 assert!(e1.content_equals(&e2));
613 assert!(!e1.content_equals(&e3));
614 assert!(!e1.content_equals(&e4));
615 }
616
617 #[test]
618 fn test_info_size() {
619 let t1 = XmlText::new("hi");
621 assert_eq!(t1.info_size(), 1);
622
623 let t2 = XmlText::new("hello world");
625 assert_eq!(t2.info_size(), 11 - TEXT_THRESHOLD);
626
627 let e1 = XmlElement::new("div".to_string(), HashMap::new());
629 assert_eq!(e1.info_size(), ELEMENT_NAME_INFO);
630
631 let mut attrs = HashMap::new();
633 attrs.insert("id".to_string(), "x".to_string());
634 let e2 = XmlElement::new("div".to_string(), attrs);
635 assert_eq!(e2.info_size(), ELEMENT_NAME_INFO + ATTR_INFO + 1);
636 }
637
638 #[test]
639 fn test_xml_content_enum() {
640 let elem = XmlContent::Element(XmlElement::new("div".to_string(), HashMap::new()));
641 let text = XmlContent::Text(XmlText::new("hello"));
642
643 assert!(elem.is_element());
644 assert!(!elem.is_text());
645 assert!(!text.is_element());
646 assert!(text.is_text());
647
648 assert!(elem.as_element().is_some());
649 assert!(elem.as_text().is_none());
650 assert!(text.as_text().is_some());
651 assert!(text.as_element().is_none());
652 }
653
654 #[test]
655 fn test_namespace_decls_affect_equality() {
656 let attrs = HashMap::new();
657
658 let mut ns1 = HashMap::new();
659 ns1.insert("a".to_string(), "http://example.com/a".to_string());
660
661 let mut ns2 = HashMap::new();
662 ns2.insert("b".to_string(), "http://example.com/b".to_string());
663
664 let e1 = XmlElement::new_with_namespace("root".to_string(), None, ns1, attrs.clone());
665 let e2 = XmlElement::new_with_namespace("root".to_string(), None, ns2, attrs.clone());
666 let e3 = XmlElement::new("root".to_string(), attrs);
667
668 assert!(e1.content_equals(&e2));
670 assert!(e1.content_equals(&e3));
671
672 assert!(!e1.namespace_decls_equal(&e2));
674 assert!(!e1.namespace_decls_equal(&e3));
675 assert!(e1.namespace_decls_equal(&e1));
676 }
677}