Skip to main content

feedparser_rs/util/
text.rs

1//! Text processing utilities
2//!
3//! This module provides functions for text manipulation,
4//! such as trimming, normalizing whitespace, and encoding conversion.
5
6use crate::types::{Email, Person};
7
8/// Efficient bytes to string conversion - zero-copy for valid UTF-8
9///
10/// Uses `std::str::from_utf8()` for zero-copy conversion when the input
11/// is valid UTF-8, falling back to lossy conversion otherwise.
12///
13/// # Examples
14///
15/// ```
16/// use feedparser_rs::util::text::bytes_to_string;
17///
18/// let valid_utf8 = b"Hello, world!";
19/// assert_eq!(bytes_to_string(valid_utf8), "Hello, world!");
20///
21/// let invalid_utf8 = &[0xFF, 0xFE, 0xFD];
22/// let result = bytes_to_string(invalid_utf8);
23/// assert!(!result.is_empty()); // Lossy conversion succeeded
24/// ```
25#[inline]
26pub fn bytes_to_string(value: &[u8]) -> String {
27    std::str::from_utf8(value).map_or_else(
28        |_| String::from_utf8_lossy(value).into_owned(),
29        std::string::ToString::to_string,
30    )
31}
32
33/// Truncates string to maximum length by character count
34///
35/// Uses efficient byte-length check before expensive char iteration.
36/// Prevents oversized attribute/text values that could cause memory issues.
37///
38/// # Examples
39///
40/// ```
41/// use feedparser_rs::util::text::truncate_to_length;
42///
43/// assert_eq!(truncate_to_length("hello world", 5), "hello");
44/// assert_eq!(truncate_to_length("hi", 100), "hi");
45/// assert_eq!(truncate_to_length("", 10), "");
46/// ```
47#[inline]
48#[must_use]
49pub fn truncate_to_length(s: &str, max_len: usize) -> String {
50    if s.len() <= max_len {
51        s.to_string()
52    } else {
53        s.chars().take(max_len).collect()
54    }
55}
56
57/// Parses an RSS person string into a structured `Person`.
58///
59/// Recognizes common RSS author formats:
60/// - `email (Name)` → `Person { name: Some("Name"), email: Some("email") }`
61/// - `Name <email>` → `Person { name: Some("Name"), email: Some("email") }`
62/// - bare email (contains `@`) → `Person { email: Some(text) }`
63/// - plain string → `Person { name: Some(text) }`
64///
65/// # Examples
66///
67/// ```
68/// use feedparser_rs::util::text::parse_rss_person;
69///
70/// let p = parse_rss_person("editor@example.com (John Editor)");
71/// assert_eq!(p.name.as_deref(), Some("John Editor"));
72/// assert_eq!(p.email.as_deref(), Some("editor@example.com"));
73///
74/// let p = parse_rss_person("John Editor <editor@example.com>");
75/// assert_eq!(p.name.as_deref(), Some("John Editor"));
76/// assert_eq!(p.email.as_deref(), Some("editor@example.com"));
77///
78/// let p = parse_rss_person("just-a-name");
79/// assert_eq!(p.name.as_deref(), Some("just-a-name"));
80/// assert!(p.email.is_none());
81/// ```
82#[must_use]
83pub fn parse_rss_person(text: &str) -> Person {
84    let text = text.trim();
85
86    // Pattern: "email (Name)"
87    if let Some(paren_start) = text.find('(')
88        && text.ends_with(')')
89    {
90        let email_part = text[..paren_start].trim();
91        let name_part = text[paren_start + 1..text.len() - 1].trim();
92        if !email_part.is_empty() {
93            return Person {
94                name: if name_part.is_empty() {
95                    None
96                } else {
97                    Some(name_part.into())
98                },
99                email: Some(Email::new(email_part.to_string())),
100                uri: None,
101                avatar: None,
102            };
103        }
104    }
105
106    // Pattern: "Name <email>"
107    if let Some(angle_start) = text.rfind('<')
108        && text.ends_with('>')
109    {
110        let name_part = text[..angle_start].trim();
111        let email_part = text[angle_start + 1..text.len() - 1].trim();
112        if !email_part.is_empty() {
113            return Person {
114                name: if name_part.is_empty() {
115                    None
116                } else {
117                    Some(name_part.into())
118                },
119                email: Some(Email::new(email_part.to_string())),
120                uri: None,
121                avatar: None,
122            };
123        }
124    }
125
126    // Bare email or plain name
127    if text.contains('@') {
128        Person {
129            name: None,
130            email: Some(Email::new(text.to_string())),
131            uri: None,
132            avatar: None,
133        }
134    } else {
135        Person::from_name(text)
136    }
137}
138
139#[cfg(test)]
140mod tests {
141    use super::*;
142
143    #[test]
144    fn test_parse_rss_person_email_paren_name() {
145        let p = parse_rss_person("editor@example.com (John Editor)");
146        assert_eq!(p.name.as_deref(), Some("John Editor"));
147        assert_eq!(p.email.as_deref(), Some("editor@example.com"));
148    }
149
150    #[test]
151    fn test_parse_rss_person_name_angle_email() {
152        let p = parse_rss_person("John Editor <editor@example.com>");
153        assert_eq!(p.name.as_deref(), Some("John Editor"));
154        assert_eq!(p.email.as_deref(), Some("editor@example.com"));
155    }
156
157    #[test]
158    fn test_parse_rss_person_bare_email() {
159        let p = parse_rss_person("editor@example.com");
160        assert!(p.name.is_none());
161        assert_eq!(p.email.as_deref(), Some("editor@example.com"));
162    }
163
164    #[test]
165    fn test_parse_rss_person_plain_name() {
166        let p = parse_rss_person("John Editor");
167        assert_eq!(p.name.as_deref(), Some("John Editor"));
168        assert!(p.email.is_none());
169    }
170
171    #[test]
172    fn test_parse_rss_person_empty_name_in_parens() {
173        let p = parse_rss_person("editor@example.com ()");
174        assert!(p.name.is_none());
175        assert_eq!(p.email.as_deref(), Some("editor@example.com"));
176    }
177}