1use crate::constants::{ATTR_INFO, ATTR_VALUE_THRESHOLD, ELEMENT_NAME_INFO, TEXT_THRESHOLD};
7use md5::{Digest, Md5};
8use std::collections::HashMap;
9
10use super::namespace::ExpandedName;
11
12#[derive(Debug, Clone)]
14pub enum XmlContent {
15 Element(XmlElement),
17 Text(XmlText),
19 Comment(XmlComment),
21 ProcessingInstruction(XmlProcessingInstruction),
23}
24
25impl XmlContent {
26 pub fn info_size(&self) -> i32 {
30 match self {
31 XmlContent::Element(e) => e.info_size,
32 XmlContent::Text(t) => t.info_size,
33 XmlContent::Comment(c) => c.info_size,
34 XmlContent::ProcessingInstruction(pi) => pi.info_size,
35 }
36 }
37
38 pub fn content_equals(&self, other: &XmlContent) -> bool {
40 match (self, other) {
41 (XmlContent::Element(a), XmlContent::Element(b)) => a.content_equals(b),
42 (XmlContent::Text(a), XmlContent::Text(b)) => a.content_equals(b),
43 (XmlContent::Comment(a), XmlContent::Comment(b)) => a.content_equals(b),
44 (XmlContent::ProcessingInstruction(a), XmlContent::ProcessingInstruction(b)) => {
45 a.content_equals(b)
46 }
47 _ => false,
48 }
49 }
50
51 pub fn content_hash(&self) -> i32 {
55 match self {
56 XmlContent::Element(e) => e.content_hash(),
57 XmlContent::Text(t) => t.content_hash(),
58 XmlContent::Comment(c) => c.content_hash(),
59 XmlContent::ProcessingInstruction(pi) => pi.content_hash(),
60 }
61 }
62
63 pub fn is_element(&self) -> bool {
65 matches!(self, XmlContent::Element(_))
66 }
67
68 pub fn is_text(&self) -> bool {
70 matches!(self, XmlContent::Text(_))
71 }
72
73 pub fn is_comment(&self) -> bool {
75 matches!(self, XmlContent::Comment(_))
76 }
77
78 pub fn is_processing_instruction(&self) -> bool {
80 matches!(self, XmlContent::ProcessingInstruction(_))
81 }
82
83 pub fn as_element(&self) -> Option<&XmlElement> {
85 match self {
86 XmlContent::Element(e) => Some(e),
87 _ => None,
88 }
89 }
90
91 pub fn as_element_mut(&mut self) -> Option<&mut XmlElement> {
93 match self {
94 XmlContent::Element(e) => Some(e),
95 _ => None,
96 }
97 }
98
99 pub fn as_text(&self) -> Option<&XmlText> {
101 match self {
102 XmlContent::Text(t) => Some(t),
103 _ => None,
104 }
105 }
106
107 pub fn as_text_mut(&mut self) -> Option<&mut XmlText> {
109 match self {
110 XmlContent::Text(t) => Some(t),
111 _ => None,
112 }
113 }
114
115 pub fn as_processing_instruction(&self) -> Option<&XmlProcessingInstruction> {
117 match self {
118 XmlContent::ProcessingInstruction(pi) => Some(pi),
119 _ => None,
120 }
121 }
122}
123
124fn calculate_hash_chars(data: &[char]) -> [u8; 16] {
132 let mut hasher = Md5::new();
133 for &c in data {
134 let code = c as u16;
135 hasher.update([(code & 0xff) as u8, (code >> 8) as u8]);
136 }
137 hasher.finalize().into()
138}
139
140fn calculate_hash_str(data: &str) -> [u8; 16] {
142 let mut hasher = Md5::new();
143 for code in data.encode_utf16() {
145 hasher.update([(code & 0xff) as u8, (code >> 8) as u8]);
146 }
147 hasher.finalize().into()
148}
149
150fn hash_to_i32(hash: &[u8; 16]) -> i32 {
159 let b0 = hash[0] as i8 as i32;
161 let b1 = hash[1] as i8 as i32;
162 let b2 = hash[2] as i8 as i32;
163 let b3 = hash[3] as i8 as i32;
164 b0.wrapping_add(b1 << 8)
165 .wrapping_add(b2 << 16)
166 .wrapping_add(b3 << 24)
167}
168
169#[derive(Debug, Clone)]
171pub struct XmlElement {
172 name: String,
174 expanded_name: Option<ExpandedName>,
176 namespace_decls: HashMap<String, String>,
178 attributes: HashMap<String, String>,
180 name_hash: i32,
182 attr_hash: [u8; 16],
184 info_size: i32,
186}
187
188impl XmlElement {
189 pub fn new(name: String, attributes: HashMap<String, String>) -> Self {
191 Self::new_with_namespace(name, None, HashMap::new(), attributes)
192 }
193
194 pub fn new_with_namespace(
196 name: String,
197 expanded_name: Option<ExpandedName>,
198 namespace_decls: HashMap<String, String>,
199 attributes: HashMap<String, String>,
200 ) -> Self {
201 let mut element = XmlElement {
202 name,
203 expanded_name,
204 namespace_decls,
205 attributes,
206 name_hash: 0,
207 attr_hash: [0; 16],
208 info_size: 0,
209 };
210 element.rehash();
211 element
212 }
213
214 pub fn rehash(&mut self) {
218 self.name_hash = java_string_hash(&self.name);
219 self.info_size = ELEMENT_NAME_INFO;
220
221 let mut hasher = Md5::new();
222
223 let mut attr_names: Vec<&String> = self.attributes.keys().collect();
228 attr_names.sort();
229
230 for attr_name in attr_names {
231 let attr_value = &self.attributes[attr_name];
232 let vsize = attr_value.chars().count() as i32;
233 self.info_size += ATTR_INFO
234 + if vsize > ATTR_VALUE_THRESHOLD {
235 vsize - ATTR_VALUE_THRESHOLD
236 } else {
237 1
238 };
239 hasher.update(calculate_hash_str(attr_name));
240 hasher.update(calculate_hash_str(attr_value));
241 }
242
243 self.attr_hash = hasher.finalize().into();
244 }
245
246 pub fn qname(&self) -> &str {
248 &self.name
249 }
250
251 pub fn set_qname(&mut self, name: String) {
256 self.name = name;
257 }
258
259 pub fn attributes(&self) -> &HashMap<String, String> {
261 &self.attributes
262 }
263
264 pub fn attributes_mut(&mut self) -> &mut HashMap<String, String> {
269 &mut self.attributes
270 }
271
272 pub fn set_attributes(&mut self, attributes: HashMap<String, String>) {
277 self.attributes = attributes;
278 }
279
280 pub fn expanded_name(&self) -> Option<&ExpandedName> {
282 self.expanded_name.as_ref()
283 }
284
285 pub fn namespace_decls(&self) -> &HashMap<String, String> {
287 &self.namespace_decls
288 }
289
290 pub fn names_match(&self, other: &XmlElement) -> bool {
293 match (&self.expanded_name, &other.expanded_name) {
294 (Some(a), Some(b)) => a == b,
295 (None, None) => self.name == other.name,
296 _ => false,
297 }
298 }
299
300 pub fn content_equals(&self, other: &XmlElement) -> bool {
305 self.name_hash == other.name_hash && self.attr_hash == other.attr_hash
306 }
307
308 pub fn namespace_decls_equal(&self, other: &XmlElement) -> bool {
310 self.namespace_decls == other.namespace_decls
311 }
312
313 pub fn content_hash(&self) -> i32 {
315 hash_to_i32(&self.attr_hash) ^ self.name_hash
316 }
317
318 pub fn info_size(&self) -> i32 {
320 self.info_size
321 }
322}
323
324impl std::fmt::Display for XmlElement {
325 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
326 write!(f, "{} {{", self.name)?;
327 let mut first = true;
328 let mut attr_names: Vec<&String> = self.attributes.keys().collect();
330 attr_names.sort();
331 for name in attr_names {
332 if !first {
333 write!(f, " ")?;
334 }
335 first = false;
336 write!(f, " {}={}", name, self.attributes[name])?;
337 }
338 write!(f, "}}")
339 }
340}
341
342#[derive(Debug, Clone)]
344pub struct XmlText {
345 text: Vec<char>,
347 content_hash: [u8; 16],
349 info_size: i32,
351}
352
353impl XmlText {
354 pub fn new(text: &str) -> Self {
356 let chars: Vec<char> = text.chars().collect();
357 Self::from_chars(chars)
358 }
359
360 pub fn from_chars(text: Vec<char>) -> Self {
362 let content_hash = calculate_hash_chars(&text);
363 let len = text.len() as i32;
364 let info_size = if len > TEXT_THRESHOLD {
365 len - TEXT_THRESHOLD
366 } else {
367 1
368 };
369 XmlText {
370 text,
371 content_hash,
372 info_size,
373 }
374 }
375
376 pub fn from_char_slice(text: &[char], start: usize, length: usize) -> Self {
378 let chars: Vec<char> = text[start..start + length].to_vec();
379 Self::from_chars(chars)
380 }
381
382 pub fn content_equals(&self, other: &XmlText) -> bool {
384 self.content_hash == other.content_hash
385 }
386
387 pub fn text(&self) -> &[char] {
389 &self.text
390 }
391
392 pub fn set_text(&mut self, text: Vec<char>) {
396 self.content_hash = calculate_hash_chars(&text);
397 let len = text.len() as i32;
398 self.info_size = if len > TEXT_THRESHOLD {
399 len - TEXT_THRESHOLD
400 } else {
401 1
402 };
403 self.text = text;
404 }
405
406 pub fn content_hash(&self) -> i32 {
408 hash_to_i32(&self.content_hash)
409 }
410
411 pub fn info_size(&self) -> i32 {
413 self.info_size
414 }
415}
416
417impl std::fmt::Display for XmlText {
418 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
419 let s: String = self.text.iter().collect();
420 write!(f, "{}", s)
421 }
422}
423
424#[derive(Debug, Clone)]
426pub struct XmlComment {
427 text: Vec<char>,
429 content_hash: [u8; 16],
431 info_size: i32,
433}
434
435impl XmlComment {
436 pub fn new(text: &str) -> Self {
438 let chars: Vec<char> = text.chars().collect();
439 Self::from_chars(chars)
440 }
441
442 pub fn from_chars(text: Vec<char>) -> Self {
444 let content_hash = calculate_hash_chars(&text);
445 let info_size = 1;
447 XmlComment {
448 text,
449 content_hash,
450 info_size,
451 }
452 }
453
454 pub fn content_equals(&self, other: &XmlComment) -> bool {
456 self.content_hash == other.content_hash
457 }
458
459 pub fn text(&self) -> &[char] {
461 &self.text
462 }
463
464 pub fn set_text(&mut self, text: Vec<char>) {
466 self.content_hash = calculate_hash_chars(&text);
467 self.text = text;
468 }
469
470 pub fn content_hash(&self) -> i32 {
472 hash_to_i32(&self.content_hash)
473 }
474
475 pub fn info_size(&self) -> i32 {
477 self.info_size
478 }
479}
480
481impl std::fmt::Display for XmlComment {
482 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
483 let s: String = self.text.iter().collect();
484 write!(f, "<!-- {} -->", s)
485 }
486}
487
488#[derive(Debug, Clone)]
490pub struct XmlProcessingInstruction {
491 target: String,
493 content: String,
495 content_hash: [u8; 16],
497 info_size: i32,
499}
500
501impl XmlProcessingInstruction {
502 pub fn new(target: &str, content: &str) -> Self {
504 let content_hash = Self::calculate_hash(target, content);
505 XmlProcessingInstruction {
506 target: target.to_string(),
507 content: content.to_string(),
508 content_hash,
509 info_size: 1,
510 }
511 }
512
513 fn calculate_hash(target: &str, content: &str) -> [u8; 16] {
514 use md5::{Digest, Md5};
515 let mut hasher = Md5::new();
516 for code in target.encode_utf16() {
517 hasher.update([(code & 0xff) as u8, (code >> 8) as u8]);
518 }
519 for code in content.encode_utf16() {
520 hasher.update([(code & 0xff) as u8, (code >> 8) as u8]);
521 }
522 hasher.finalize().into()
523 }
524
525 pub fn content_equals(&self, other: &XmlProcessingInstruction) -> bool {
527 self.content_hash == other.content_hash
528 }
529
530 pub fn target(&self) -> &str {
532 &self.target
533 }
534
535 pub fn content(&self) -> &str {
537 &self.content
538 }
539
540 pub fn content_hash(&self) -> i32 {
542 hash_to_i32(&self.content_hash)
543 }
544
545 pub fn info_size(&self) -> i32 {
547 self.info_size
548 }
549}
550
551impl std::fmt::Display for XmlProcessingInstruction {
552 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
553 if self.content.is_empty() {
554 write!(f, "<?{}?>", self.target)
555 } else {
556 write!(f, "<?{} {}?>", self.target, self.content)
557 }
558 }
559}
560
561pub fn java_string_hash(s: &str) -> i32 {
568 let mut hash: i32 = 0;
569 for code in s.encode_utf16() {
570 hash = hash.wrapping_mul(31).wrapping_add(code as i32);
571 }
572 hash
573}
574
575#[cfg(test)]
576mod tests {
577 use super::*;
578
579 #[test]
580 fn test_java_string_hash() {
581 assert_eq!(java_string_hash(""), 0);
583 assert_eq!(java_string_hash("a"), 97);
584 assert_eq!(java_string_hash("ab"), 97 * 31 + 98);
585 assert_eq!(java_string_hash("hello"), 99162322);
586 }
587
588 #[test]
589 fn test_text_node_equality() {
590 let t1 = XmlText::new("hello world");
591 let t2 = XmlText::new("hello world");
592 let t3 = XmlText::new("hello world!");
593
594 assert!(t1.content_equals(&t2));
595 assert!(!t1.content_equals(&t3));
596 }
597
598 #[test]
599 fn test_element_equality() {
600 let mut attrs1 = HashMap::new();
601 attrs1.insert("id".to_string(), "foo".to_string());
602
603 let mut attrs2 = HashMap::new();
604 attrs2.insert("id".to_string(), "foo".to_string());
605
606 let mut attrs3 = HashMap::new();
607 attrs3.insert("id".to_string(), "bar".to_string());
608
609 let e1 = XmlElement::new("div".to_string(), attrs1);
610 let e2 = XmlElement::new("div".to_string(), attrs2);
611 let e3 = XmlElement::new("div".to_string(), attrs3);
612 let e4 = XmlElement::new("span".to_string(), HashMap::new());
613
614 assert!(e1.content_equals(&e2));
615 assert!(!e1.content_equals(&e3));
616 assert!(!e1.content_equals(&e4));
617 }
618
619 #[test]
620 fn test_info_size() {
621 let t1 = XmlText::new("hi");
623 assert_eq!(t1.info_size(), 1);
624
625 let t2 = XmlText::new("hello world");
627 assert_eq!(t2.info_size(), 11 - TEXT_THRESHOLD);
628
629 let e1 = XmlElement::new("div".to_string(), HashMap::new());
631 assert_eq!(e1.info_size(), ELEMENT_NAME_INFO);
632
633 let mut attrs = HashMap::new();
635 attrs.insert("id".to_string(), "x".to_string());
636 let e2 = XmlElement::new("div".to_string(), attrs);
637 assert_eq!(e2.info_size(), ELEMENT_NAME_INFO + ATTR_INFO + 1);
638 }
639
640 #[test]
641 fn test_xml_content_enum() {
642 let elem = XmlContent::Element(XmlElement::new("div".to_string(), HashMap::new()));
643 let text = XmlContent::Text(XmlText::new("hello"));
644
645 assert!(elem.is_element());
646 assert!(!elem.is_text());
647 assert!(!text.is_element());
648 assert!(text.is_text());
649
650 assert!(elem.as_element().is_some());
651 assert!(elem.as_text().is_none());
652 assert!(text.as_text().is_some());
653 assert!(text.as_element().is_none());
654 }
655
656 #[test]
657 fn test_namespace_decls_affect_equality() {
658 let attrs = HashMap::new();
659
660 let mut ns1 = HashMap::new();
661 ns1.insert("a".to_string(), "http://example.com/a".to_string());
662
663 let mut ns2 = HashMap::new();
664 ns2.insert("b".to_string(), "http://example.com/b".to_string());
665
666 let e1 = XmlElement::new_with_namespace("root".to_string(), None, ns1, attrs.clone());
667 let e2 = XmlElement::new_with_namespace("root".to_string(), None, ns2, attrs.clone());
668 let e3 = XmlElement::new("root".to_string(), attrs);
669
670 assert!(e1.content_equals(&e2));
672 assert!(e1.content_equals(&e3));
673
674 assert!(!e1.namespace_decls_equal(&e2));
676 assert!(!e1.namespace_decls_equal(&e3));
677 assert!(e1.namespace_decls_equal(&e1));
678 }
679}