1use crate::constants::{ATTR_INFO, ATTR_VALUE_THRESHOLD, ELEMENT_NAME_INFO, TEXT_THRESHOLD};
7use md5::{Digest, Md5};
8use std::collections::HashMap;
9
10#[derive(Debug, Clone)]
12pub enum XmlContent {
13 Element(XmlElement),
15 Text(XmlText),
17 Comment(XmlComment),
19}
20
21impl XmlContent {
22 pub fn info_size(&self) -> i32 {
26 match self {
27 XmlContent::Element(e) => e.info_size,
28 XmlContent::Text(t) => t.info_size,
29 XmlContent::Comment(c) => c.info_size,
30 }
31 }
32
33 pub fn content_equals(&self, other: &XmlContent) -> bool {
35 match (self, other) {
36 (XmlContent::Element(a), XmlContent::Element(b)) => a.content_equals(b),
37 (XmlContent::Text(a), XmlContent::Text(b)) => a.content_equals(b),
38 (XmlContent::Comment(a), XmlContent::Comment(b)) => a.content_equals(b),
39 _ => false,
40 }
41 }
42
43 pub fn content_hash(&self) -> i32 {
47 match self {
48 XmlContent::Element(e) => e.content_hash(),
49 XmlContent::Text(t) => t.content_hash(),
50 XmlContent::Comment(c) => c.content_hash(),
51 }
52 }
53
54 pub fn is_element(&self) -> bool {
56 matches!(self, XmlContent::Element(_))
57 }
58
59 pub fn is_text(&self) -> bool {
61 matches!(self, XmlContent::Text(_))
62 }
63
64 pub fn is_comment(&self) -> bool {
66 matches!(self, XmlContent::Comment(_))
67 }
68
69 pub fn as_element(&self) -> Option<&XmlElement> {
71 match self {
72 XmlContent::Element(e) => Some(e),
73 _ => None,
74 }
75 }
76
77 pub fn as_element_mut(&mut self) -> Option<&mut XmlElement> {
79 match self {
80 XmlContent::Element(e) => Some(e),
81 _ => None,
82 }
83 }
84
85 pub fn as_text(&self) -> Option<&XmlText> {
87 match self {
88 XmlContent::Text(t) => Some(t),
89 _ => None,
90 }
91 }
92
93 pub fn as_text_mut(&mut self) -> Option<&mut XmlText> {
95 match self {
96 XmlContent::Text(t) => Some(t),
97 _ => None,
98 }
99 }
100}
101
102fn calculate_hash_chars(data: &[char]) -> [u8; 16] {
110 let mut hasher = Md5::new();
111 for &c in data {
112 let code = c as u16;
113 hasher.update([(code & 0xff) as u8, (code >> 8) as u8]);
114 }
115 hasher.finalize().into()
116}
117
118fn calculate_hash_str(data: &str) -> [u8; 16] {
120 let mut hasher = Md5::new();
121 for code in data.encode_utf16() {
123 hasher.update([(code & 0xff) as u8, (code >> 8) as u8]);
124 }
125 hasher.finalize().into()
126}
127
128fn hash_to_i32(hash: &[u8; 16]) -> i32 {
137 let b0 = hash[0] as i8 as i32;
139 let b1 = hash[1] as i8 as i32;
140 let b2 = hash[2] as i8 as i32;
141 let b3 = hash[3] as i8 as i32;
142 b0 + (b1 << 8) + (b2 << 16) + (b3 << 24)
143}
144
145#[derive(Debug, Clone)]
147pub struct XmlElement {
148 name: String,
150 attributes: HashMap<String, String>,
152 name_hash: i32,
154 attr_hash: [u8; 16],
156 info_size: i32,
158}
159
160impl XmlElement {
161 pub fn new(name: String, attributes: HashMap<String, String>) -> Self {
163 let mut element = XmlElement {
164 name,
165 attributes,
166 name_hash: 0,
167 attr_hash: [0; 16],
168 info_size: 0,
169 };
170 element.rehash();
171 element
172 }
173
174 pub fn rehash(&mut self) {
178 self.name_hash = java_string_hash(&self.name);
179 self.info_size = ELEMENT_NAME_INFO;
180
181 let mut hasher = Md5::new();
182
183 let mut attr_names: Vec<&String> = self.attributes.keys().collect();
188 attr_names.sort();
189
190 for attr_name in attr_names {
191 let attr_value = &self.attributes[attr_name];
192 let vsize = attr_value.chars().count() as i32;
193 self.info_size += ATTR_INFO
194 + if vsize > ATTR_VALUE_THRESHOLD {
195 vsize - ATTR_VALUE_THRESHOLD
196 } else {
197 1
198 };
199 hasher.update(calculate_hash_str(attr_name));
200 hasher.update(calculate_hash_str(attr_value));
201 }
202
203 self.attr_hash = hasher.finalize().into();
204 }
205
206 pub fn qname(&self) -> &str {
208 &self.name
209 }
210
211 pub fn set_qname(&mut self, name: String) {
216 self.name = name;
217 }
218
219 pub fn attributes(&self) -> &HashMap<String, String> {
221 &self.attributes
222 }
223
224 pub fn attributes_mut(&mut self) -> &mut HashMap<String, String> {
229 &mut self.attributes
230 }
231
232 pub fn set_attributes(&mut self, attributes: HashMap<String, String>) {
237 self.attributes = attributes;
238 }
239
240 pub fn content_equals(&self, other: &XmlElement) -> bool {
242 self.name_hash == other.name_hash && self.attr_hash == other.attr_hash
243 }
244
245 pub fn content_hash(&self) -> i32 {
247 hash_to_i32(&self.attr_hash) ^ self.name_hash
248 }
249
250 pub fn info_size(&self) -> i32 {
252 self.info_size
253 }
254}
255
256impl std::fmt::Display for XmlElement {
257 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
258 write!(f, "{} {{", self.name)?;
259 let mut first = true;
260 let mut attr_names: Vec<&String> = self.attributes.keys().collect();
262 attr_names.sort();
263 for name in attr_names {
264 if !first {
265 write!(f, " ")?;
266 }
267 first = false;
268 write!(f, " {}={}", name, self.attributes[name])?;
269 }
270 write!(f, "}}")
271 }
272}
273
274#[derive(Debug, Clone)]
276pub struct XmlText {
277 text: Vec<char>,
279 content_hash: [u8; 16],
281 info_size: i32,
283}
284
285impl XmlText {
286 pub fn new(text: &str) -> Self {
288 let chars: Vec<char> = text.chars().collect();
289 Self::from_chars(chars)
290 }
291
292 pub fn from_chars(text: Vec<char>) -> Self {
294 let content_hash = calculate_hash_chars(&text);
295 let len = text.len() as i32;
296 let info_size = if len > TEXT_THRESHOLD {
297 len - TEXT_THRESHOLD
298 } else {
299 1
300 };
301 XmlText {
302 text,
303 content_hash,
304 info_size,
305 }
306 }
307
308 pub fn from_char_slice(text: &[char], start: usize, length: usize) -> Self {
310 let chars: Vec<char> = text[start..start + length].to_vec();
311 Self::from_chars(chars)
312 }
313
314 pub fn content_equals(&self, other: &XmlText) -> bool {
316 self.content_hash == other.content_hash
317 }
318
319 pub fn text(&self) -> &[char] {
321 &self.text
322 }
323
324 pub fn set_text(&mut self, text: Vec<char>) {
328 self.content_hash = calculate_hash_chars(&text);
329 let len = text.len() as i32;
330 self.info_size = if len > TEXT_THRESHOLD {
331 len - TEXT_THRESHOLD
332 } else {
333 1
334 };
335 self.text = text;
336 }
337
338 pub fn content_hash(&self) -> i32 {
340 hash_to_i32(&self.content_hash)
341 }
342
343 pub fn info_size(&self) -> i32 {
345 self.info_size
346 }
347}
348
349impl std::fmt::Display for XmlText {
350 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
351 let s: String = self.text.iter().collect();
352 write!(f, "{}", s)
353 }
354}
355
356#[derive(Debug, Clone)]
358pub struct XmlComment {
359 text: Vec<char>,
361 content_hash: [u8; 16],
363 info_size: i32,
365}
366
367impl XmlComment {
368 pub fn new(text: &str) -> Self {
370 let chars: Vec<char> = text.chars().collect();
371 Self::from_chars(chars)
372 }
373
374 pub fn from_chars(text: Vec<char>) -> Self {
376 let content_hash = calculate_hash_chars(&text);
377 let info_size = 1;
379 XmlComment {
380 text,
381 content_hash,
382 info_size,
383 }
384 }
385
386 pub fn content_equals(&self, other: &XmlComment) -> bool {
388 self.content_hash == other.content_hash
389 }
390
391 pub fn text(&self) -> &[char] {
393 &self.text
394 }
395
396 pub fn set_text(&mut self, text: Vec<char>) {
398 self.content_hash = calculate_hash_chars(&text);
399 self.text = text;
400 }
401
402 pub fn content_hash(&self) -> i32 {
404 hash_to_i32(&self.content_hash)
405 }
406
407 pub fn info_size(&self) -> i32 {
409 self.info_size
410 }
411}
412
413impl std::fmt::Display for XmlComment {
414 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
415 let s: String = self.text.iter().collect();
416 write!(f, "<!-- {} -->", s)
417 }
418}
419
420pub fn java_string_hash(s: &str) -> i32 {
427 let mut hash: i32 = 0;
428 for code in s.encode_utf16() {
429 hash = hash.wrapping_mul(31).wrapping_add(code as i32);
430 }
431 hash
432}
433
434#[cfg(test)]
435mod tests {
436 use super::*;
437
438 #[test]
439 fn test_java_string_hash() {
440 assert_eq!(java_string_hash(""), 0);
442 assert_eq!(java_string_hash("a"), 97);
443 assert_eq!(java_string_hash("ab"), 97 * 31 + 98);
444 assert_eq!(java_string_hash("hello"), 99162322);
445 }
446
447 #[test]
448 fn test_text_node_equality() {
449 let t1 = XmlText::new("hello world");
450 let t2 = XmlText::new("hello world");
451 let t3 = XmlText::new("hello world!");
452
453 assert!(t1.content_equals(&t2));
454 assert!(!t1.content_equals(&t3));
455 }
456
457 #[test]
458 fn test_element_equality() {
459 let mut attrs1 = HashMap::new();
460 attrs1.insert("id".to_string(), "foo".to_string());
461
462 let mut attrs2 = HashMap::new();
463 attrs2.insert("id".to_string(), "foo".to_string());
464
465 let mut attrs3 = HashMap::new();
466 attrs3.insert("id".to_string(), "bar".to_string());
467
468 let e1 = XmlElement::new("div".to_string(), attrs1);
469 let e2 = XmlElement::new("div".to_string(), attrs2);
470 let e3 = XmlElement::new("div".to_string(), attrs3);
471 let e4 = XmlElement::new("span".to_string(), HashMap::new());
472
473 assert!(e1.content_equals(&e2));
474 assert!(!e1.content_equals(&e3));
475 assert!(!e1.content_equals(&e4));
476 }
477
478 #[test]
479 fn test_info_size() {
480 let t1 = XmlText::new("hi");
482 assert_eq!(t1.info_size(), 1);
483
484 let t2 = XmlText::new("hello world");
486 assert_eq!(t2.info_size(), 11 - TEXT_THRESHOLD);
487
488 let e1 = XmlElement::new("div".to_string(), HashMap::new());
490 assert_eq!(e1.info_size(), ELEMENT_NAME_INFO);
491
492 let mut attrs = HashMap::new();
494 attrs.insert("id".to_string(), "x".to_string());
495 let e2 = XmlElement::new("div".to_string(), attrs);
496 assert_eq!(e2.info_size(), ELEMENT_NAME_INFO + ATTR_INFO + 1);
497 }
498
499 #[test]
500 fn test_xml_content_enum() {
501 let elem = XmlContent::Element(XmlElement::new("div".to_string(), HashMap::new()));
502 let text = XmlContent::Text(XmlText::new("hello"));
503
504 assert!(elem.is_element());
505 assert!(!elem.is_text());
506 assert!(!text.is_element());
507 assert!(text.is_text());
508
509 assert!(elem.as_element().is_some());
510 assert!(elem.as_text().is_none());
511 assert!(text.as_text().is_some());
512 assert!(text.as_element().is_none());
513 }
514}