1use super::error::XPathError;
10
11pub fn normalize_space(value: &str) -> String {
24 let mut result = String::with_capacity(value.len());
25 let mut prev_was_space = true; for ch in value.chars() {
28 if is_xml_whitespace(ch) {
29 if !prev_was_space {
30 result.push(' ');
31 prev_was_space = true;
32 }
33 } else {
34 result.push(ch);
35 prev_was_space = false;
36 }
37 }
38
39 if result.ends_with(' ') {
41 result.pop();
42 }
43
44 result
45}
46
47#[inline]
51pub fn is_xml_whitespace(ch: char) -> bool {
52 matches!(ch, ' ' | '\t' | '\n' | '\r')
53}
54
55#[inline]
59pub fn is_xml_whitespace_str(s: &str) -> bool {
60 s.bytes().all(|b| matches!(b, b' ' | b'\t' | b'\n' | b'\r'))
61}
62
63pub fn normalize_string_value(
84 value: &str,
85 is_attr: bool,
86 raise_on_error: bool,
87) -> Result<String, XPathError> {
88 let mut result = String::with_capacity(value.len());
89 let mut chars = value.chars().peekable();
90
91 while let Some(ch) = chars.next() {
92 if ch == '&' {
93 let mut entity = String::new();
95
96 loop {
97 match chars.next() {
98 Some(';') => break,
99 Some(c) => entity.push(c),
100 None => {
101 if raise_on_error {
102 return Err(XPathError::syntax_error(
103 "Entity reference not terminated by semicolon",
104 ));
105 }
106 result.push('&');
107 result.push_str(&entity);
108 break;
109 }
110 }
111 }
112
113 match resolve_entity(&entity) {
114 Some(resolved) => result.push(resolved),
115 None => {
116 if raise_on_error {
117 return Err(XPathError::syntax_error(format!(
118 "Unknown entity reference '&{};'",
119 entity
120 )));
121 }
122 result.push('&');
123 result.push_str(&entity);
124 result.push(';');
125 }
126 }
127 } else if is_attr && (ch == '\t' || ch == '\n' || ch == '\r') {
128 result.push(' ');
130 } else if ch == '\r' {
131 if chars.peek() == Some(&'\n') {
133 chars.next();
134 }
135 result.push('\n');
136 } else {
137 result.push(ch);
138 }
139 }
140
141 Ok(result)
142}
143
144fn resolve_entity(entity: &str) -> Option<char> {
146 match entity {
147 "lt" => Some('<'),
148 "gt" => Some('>'),
149 "amp" => Some('&'),
150 "quot" => Some('"'),
151 "apos" => Some('\''),
152 _ if entity.starts_with('#') => resolve_numeric_entity(&entity[1..]),
153 _ => None,
154 }
155}
156
157fn resolve_numeric_entity(entity: &str) -> Option<char> {
159 let code = if let Some(hex) = entity.strip_prefix('x') {
160 u32::from_str_radix(hex, 16).ok()?
161 } else {
162 entity.parse::<u32>().ok()?
163 };
164
165 char::from_u32(code)
166}
167
168pub fn concat(values: &[&str]) -> String {
170 values.concat()
171}
172
173pub fn starts_with(value: &str, prefix: &str) -> bool {
175 value.starts_with(prefix)
176}
177
178pub fn ends_with(value: &str, suffix: &str) -> bool {
180 value.ends_with(suffix)
181}
182
183pub fn contains(value: &str, substring: &str) -> bool {
185 value.contains(substring)
186}
187
188pub fn substring_before(value: &str, pattern: &str) -> String {
190 match value.find(pattern) {
191 Some(pos) => value[..pos].to_string(),
192 None => String::new(),
193 }
194}
195
196pub fn substring_after(value: &str, pattern: &str) -> String {
198 match value.find(pattern) {
199 Some(pos) => value[pos + pattern.len()..].to_string(),
200 None => String::new(),
201 }
202}
203
204pub fn string_length(value: &str) -> usize {
206 value.chars().count()
207}
208
209pub fn substring(value: &str, start: f64, length: Option<f64>) -> String {
220 if start.is_nan() {
222 return String::new();
223 }
224
225 let chars: Vec<char> = value.chars().collect();
226 let str_len = chars.len() as i64;
227
228 let start_rounded = start.round() as i64;
230
231 match length {
232 Some(len) => {
233 if len.is_nan() {
234 return String::new();
235 }
236 let len_rounded = len.round() as i64;
237
238 let first_pos = start_rounded.max(1); let last_pos = start_rounded + len_rounded; if last_pos <= 1 || first_pos > str_len {
247 return String::new();
248 }
249
250 let begin_idx = (first_pos - 1) as usize;
251 let end_idx = ((last_pos - 1) as usize).min(chars.len());
252
253 if begin_idx >= end_idx {
254 return String::new();
255 }
256
257 chars[begin_idx..end_idx].iter().collect()
258 }
259 None => {
260 if start_rounded > str_len {
262 return String::new();
263 }
264 let begin_idx = (start_rounded.max(1) - 1) as usize;
265 chars[begin_idx..].iter().collect()
266 }
267 }
268}
269
270pub fn upper_case(value: &str) -> String {
272 value.to_uppercase()
273}
274
275pub fn lower_case(value: &str) -> String {
277 value.to_lowercase()
278}
279
280pub fn translate(value: &str, map_from: &str, map_to: &str) -> String {
284 let from_chars: Vec<char> = map_from.chars().collect();
285 let to_chars: Vec<char> = map_to.chars().collect();
286
287 value
288 .chars()
289 .filter_map(|ch| {
290 match from_chars.iter().position(|&c| c == ch) {
291 Some(pos) => {
292 if pos < to_chars.len() {
293 Some(to_chars[pos])
294 } else {
295 None }
297 }
298 None => Some(ch),
299 }
300 })
301 .collect()
302}
303
304pub fn string_to_codepoints(value: &str) -> Vec<u32> {
306 value.chars().map(|c| c as u32).collect()
307}
308
309pub fn codepoints_to_string(codepoints: &[u32]) -> Option<String> {
311 codepoints
312 .iter()
313 .map(|&cp| char::from_u32(cp))
314 .collect::<Option<String>>()
315}
316
317pub fn compare(a: &str, b: &str) -> i32 {
321 match a.cmp(b) {
322 std::cmp::Ordering::Less => -1,
323 std::cmp::Ordering::Equal => 0,
324 std::cmp::Ordering::Greater => 1,
325 }
326}
327
328pub fn string_join(values: &[&str], separator: &str) -> String {
332 values.join(separator)
333}
334
335#[derive(Debug, Clone, Copy, PartialEq, Eq)]
337pub enum UnicodeNormalizationForm {
338 NFC,
340 NFD,
342 NFKC,
344 NFKD,
346}
347
348impl UnicodeNormalizationForm {
349 pub fn parse(s: &str) -> Option<Self> {
351 let trimmed = s.trim();
352 if trimmed.eq_ignore_ascii_case("NFC") {
353 Some(Self::NFC)
354 } else if trimmed.eq_ignore_ascii_case("NFD") {
355 Some(Self::NFD)
356 } else if trimmed.eq_ignore_ascii_case("NFKC") {
357 Some(Self::NFKC)
358 } else if trimmed.eq_ignore_ascii_case("NFKD") {
359 Some(Self::NFKD)
360 } else if trimmed.is_empty() {
361 None
363 } else {
364 None
365 }
366 }
367}
368
369#[cfg(feature = "unicode-normalization")]
374pub fn normalize_unicode(value: &str, form: Option<UnicodeNormalizationForm>) -> String {
375 use unicode_normalization::UnicodeNormalization;
376
377 match form {
378 Some(UnicodeNormalizationForm::NFC) => value.nfc().collect(),
379 Some(UnicodeNormalizationForm::NFD) => value.nfd().collect(),
380 Some(UnicodeNormalizationForm::NFKC) => value.nfkc().collect(),
381 Some(UnicodeNormalizationForm::NFKD) => value.nfkd().collect(),
382 None => value.to_string(),
383 }
384}
385
386#[cfg(not(feature = "unicode-normalization"))]
390pub fn normalize_unicode(
391 value: &str,
392 form: Option<UnicodeNormalizationForm>,
393) -> Result<String, super::error::XPathError> {
394 match form {
395 None => Ok(value.to_string()),
396 Some(f) => Err(super::error::XPathError::not_implemented(format!(
397 "Unicode normalization form {:?} requires unicode-normalization feature",
398 f
399 ))),
400 }
401}
402
403pub fn encode_for_uri(value: &str) -> String {
408 let mut result = String::with_capacity(value.len() * 3);
409 for byte in value.bytes() {
410 if byte.is_ascii_alphanumeric()
411 || byte == b'-'
412 || byte == b'_'
413 || byte == b'.'
414 || byte == b'~'
415 {
416 result.push(byte as char);
417 } else {
418 result.push('%');
419 result.push(to_hex_digit(byte >> 4));
420 result.push(to_hex_digit(byte & 0x0F));
421 }
422 }
423 result
424}
425
426pub fn iri_to_uri(value: &str) -> String {
431 let mut result = String::with_capacity(value.len() * 3);
432 for byte in value.bytes() {
433 if byte == b' ' {
435 result.push_str("%20");
436 } else if (0x20..0x7F).contains(&byte)
437 && byte != b'<'
438 && byte != b'>'
439 && byte != b'"'
440 && byte != b'{'
441 && byte != b'}'
442 && byte != b'|'
443 && byte != b'\\'
444 && byte != b'^'
445 && byte != b'`'
446 {
447 result.push(byte as char);
448 } else {
449 result.push('%');
450 result.push(to_hex_digit(byte >> 4));
451 result.push(to_hex_digit(byte & 0x0F));
452 }
453 }
454 result
455}
456
457pub fn escape_html_uri(value: &str) -> String {
461 let mut result = String::with_capacity(value.len() * 3);
462 for byte in value.bytes() {
463 if (0x20..0x7F).contains(&byte) {
464 result.push(byte as char);
465 } else {
466 result.push('%');
467 result.push(to_hex_digit(byte >> 4));
468 result.push(to_hex_digit(byte & 0x0F));
469 }
470 }
471 result
472}
473
474#[inline]
476fn to_hex_digit(nibble: u8) -> char {
477 if nibble < 10 {
478 (b'0' + nibble) as char
479 } else {
480 (b'A' + nibble - 10) as char
481 }
482}
483
484pub fn codepoint_equal(a: &str, b: &str) -> bool {
488 a == b
489}
490
491#[cfg(test)]
492mod tests {
493 use super::*;
494
495 #[test]
496 fn test_normalize_space() {
497 assert_eq!(normalize_space(" hello world "), "hello world");
498 assert_eq!(normalize_space("\t\nhello\r\nworld\t"), "hello world");
499 assert_eq!(normalize_space(""), "");
500 assert_eq!(normalize_space(" "), "");
501 assert_eq!(normalize_space("no extra spaces"), "no extra spaces");
502 }
503
504 #[test]
505 fn test_is_xml_whitespace() {
506 assert!(is_xml_whitespace(' '));
507 assert!(is_xml_whitespace('\t'));
508 assert!(is_xml_whitespace('\n'));
509 assert!(is_xml_whitespace('\r'));
510 assert!(!is_xml_whitespace('a'));
511 }
512
513 #[test]
514 fn test_is_xml_whitespace_str() {
515 assert!(is_xml_whitespace_str(""));
516 assert!(is_xml_whitespace_str(" "));
517 assert!(is_xml_whitespace_str(" \t\n\r"));
518 assert!(!is_xml_whitespace_str("hello"));
519 assert!(!is_xml_whitespace_str(" a "));
520 }
521
522 #[test]
523 fn test_normalize_string_value_entities() {
524 assert_eq!(
525 normalize_string_value("<>&"'", false, true).unwrap(),
526 "<>&\"'"
527 );
528 }
529
530 #[test]
531 fn test_normalize_string_value_numeric_entities() {
532 assert_eq!(
533 normalize_string_value("AB", false, true).unwrap(),
534 "AB"
535 );
536 }
537
538 #[test]
539 fn test_normalize_string_value_attr() {
540 assert_eq!(
541 normalize_string_value("a\tb\nc", true, true).unwrap(),
542 "a b c"
543 );
544 }
545
546 #[test]
547 fn test_normalize_string_value_newlines() {
548 assert_eq!(
549 normalize_string_value("a\r\nb\rc\n", false, true).unwrap(),
550 "a\nb\nc\n"
551 );
552 }
553
554 #[test]
555 fn test_concat() {
556 assert_eq!(concat(&["a", "b", "c"]), "abc");
557 assert_eq!(concat(&[]), "");
558 }
559
560 #[test]
561 fn test_starts_ends_with() {
562 assert!(starts_with("hello", "he"));
563 assert!(!starts_with("hello", "lo"));
564 assert!(ends_with("hello", "lo"));
565 assert!(!ends_with("hello", "he"));
566 }
567
568 #[test]
569 fn test_substring_before_after() {
570 assert_eq!(substring_before("hello world", " "), "hello");
571 assert_eq!(substring_after("hello world", " "), "world");
572 assert_eq!(substring_before("hello", " "), "");
573 assert_eq!(substring_after("hello", " "), "");
574 }
575
576 #[test]
577 fn test_string_length() {
578 assert_eq!(string_length("hello"), 5);
579 assert_eq!(string_length(""), 0);
580 assert_eq!(string_length("日本語"), 3); }
582
583 #[test]
584 fn test_substring() {
585 assert_eq!(substring("hello", 2.0, Some(3.0)), "ell");
586 assert_eq!(substring("hello", 2.0, None), "ello");
587 assert_eq!(substring("hello", 1.0, Some(5.0)), "hello");
588 assert_eq!(substring("hello", 0.0, Some(3.0)), "he");
589 }
590
591 #[test]
592 fn test_case_conversion() {
593 assert_eq!(upper_case("Hello World"), "HELLO WORLD");
594 assert_eq!(lower_case("Hello World"), "hello world");
595 }
596
597 #[test]
598 fn test_translate() {
599 assert_eq!(translate("bar", "abc", "ABC"), "BAr");
600 assert_eq!(translate("--aaa--", "abc-", "ABC"), "AAA");
601 }
602
603 #[test]
604 fn test_codepoints() {
605 assert_eq!(string_to_codepoints("ABC"), vec![65, 66, 67]);
606 assert_eq!(codepoints_to_string(&[65, 66, 67]).unwrap(), "ABC");
607 }
608
609 #[test]
610 fn test_compare() {
611 assert_eq!(compare("abc", "abd"), -1);
612 assert_eq!(compare("abc", "abc"), 0);
613 assert_eq!(compare("abd", "abc"), 1);
614 }
615}