feedparser_rs/util/text.rs
1//! Text processing utilities
2//!
3//! This module provides functions for text manipulation,
4//! such as trimming, normalizing whitespace, and encoding conversion.
5
6/// Efficient bytes to string conversion - zero-copy for valid UTF-8
7///
8/// Uses `std::str::from_utf8()` for zero-copy conversion when the input
9/// is valid UTF-8, falling back to lossy conversion otherwise.
10///
11/// # Examples
12///
13/// ```
14/// use feedparser_rs::util::text::bytes_to_string;
15///
16/// let valid_utf8 = b"Hello, world!";
17/// assert_eq!(bytes_to_string(valid_utf8), "Hello, world!");
18///
19/// let invalid_utf8 = &[0xFF, 0xFE, 0xFD];
20/// let result = bytes_to_string(invalid_utf8);
21/// assert!(!result.is_empty()); // Lossy conversion succeeded
22/// ```
23#[inline]
24pub fn bytes_to_string(value: &[u8]) -> String {
25 std::str::from_utf8(value).map_or_else(
26 |_| String::from_utf8_lossy(value).into_owned(),
27 std::string::ToString::to_string,
28 )
29}
30
31/// Truncates string to maximum length by character count
32///
33/// Uses efficient byte-length check before expensive char iteration.
34/// Prevents oversized attribute/text values that could cause memory issues.
35///
36/// # Examples
37///
38/// ```
39/// use feedparser_rs::util::text::truncate_to_length;
40///
41/// assert_eq!(truncate_to_length("hello world", 5), "hello");
42/// assert_eq!(truncate_to_length("hi", 100), "hi");
43/// assert_eq!(truncate_to_length("", 10), "");
44/// ```
45#[inline]
46#[must_use]
47pub fn truncate_to_length(s: &str, max_len: usize) -> String {
48 if s.len() <= max_len {
49 s.to_string()
50 } else {
51 s.chars().take(max_len).collect()
52 }
53}