Skip to main content

feedparser_rs/util/
text.rs

1//! Text processing utilities
2//!
3//! This module provides functions for text manipulation,
4//! such as trimming, normalizing whitespace, and encoding conversion.
5
6use crate::types::{Email, Person};
7
8/// Efficient bytes to string conversion - zero-copy for valid UTF-8
9///
10/// Uses `std::str::from_utf8()` for zero-copy conversion when the input
11/// is valid UTF-8, falling back to lossy conversion otherwise.
12///
13/// # Examples
14///
15/// ```
16/// use feedparser_rs::util::text::bytes_to_string;
17///
18/// let valid_utf8 = b"Hello, world!";
19/// assert_eq!(bytes_to_string(valid_utf8), "Hello, world!");
20///
21/// let invalid_utf8 = &[0xFF, 0xFE, 0xFD];
22/// let result = bytes_to_string(invalid_utf8);
23/// assert!(!result.is_empty()); // Lossy conversion succeeded
24/// ```
25#[inline]
26pub fn bytes_to_string(value: &[u8]) -> String {
27    std::str::from_utf8(value).map_or_else(
28        |_| String::from_utf8_lossy(value).into_owned(),
29        std::string::ToString::to_string,
30    )
31}
32
33/// Truncates string to maximum length by character count
34///
35/// Uses efficient byte-length check before expensive char iteration.
36/// Prevents oversized attribute/text values that could cause memory issues.
37///
38/// # Examples
39///
40/// ```
41/// use feedparser_rs::util::text::truncate_to_length;
42///
43/// assert_eq!(truncate_to_length("hello world", 5), "hello");
44/// assert_eq!(truncate_to_length("hi", 100), "hi");
45/// assert_eq!(truncate_to_length("", 10), "");
46/// ```
47#[inline]
48#[must_use]
49pub fn truncate_to_length(s: &str, max_len: usize) -> String {
50    if s.len() <= max_len {
51        s.to_string()
52    } else {
53        s.chars().take(max_len).collect()
54    }
55}
56
57/// Parses an RSS person string into a structured `Person`.
58///
59/// Recognizes common RSS author formats:
60/// - `email (Name)` → `Person { name: Some("Name"), email: Some("email") }`
61/// - `Name <email>` → `Person { name: Some("Name"), email: Some("email") }`
62/// - bare email (contains `@`) → `Person { email: Some(text) }`
63/// - plain string → `Person { name: Some(text) }`
64///
65/// # Examples
66///
67/// ```
68/// use feedparser_rs::util::text::parse_rss_person;
69///
70/// let p = parse_rss_person("editor@example.com (John Editor)");
71/// assert_eq!(p.name.as_deref(), Some("John Editor"));
72/// assert_eq!(p.email.as_deref(), Some("editor@example.com"));
73///
74/// let p = parse_rss_person("John Editor <editor@example.com>");
75/// assert_eq!(p.name.as_deref(), Some("John Editor"));
76/// assert_eq!(p.email.as_deref(), Some("editor@example.com"));
77///
78/// let p = parse_rss_person("just-a-name");
79/// assert_eq!(p.name.as_deref(), Some("just-a-name"));
80/// assert!(p.email.is_none());
81/// ```
82#[must_use]
83pub fn parse_rss_person(text: &str) -> Person {
84    let text = text.trim();
85
86    // Pattern: "email (Name)"
87    if let Some(paren_start) = text.find('(')
88        && text.ends_with(')')
89    {
90        let email_part = text[..paren_start].trim();
91        let name_part = text[paren_start + 1..text.len() - 1].trim();
92        if !email_part.is_empty() {
93            return Person {
94                name: if name_part.is_empty() {
95                    None
96                } else {
97                    Some(name_part.into())
98                },
99                email: Some(Email::new(email_part.to_string())),
100                uri: None,
101            };
102        }
103    }
104
105    // Pattern: "Name <email>"
106    if let Some(angle_start) = text.rfind('<')
107        && text.ends_with('>')
108    {
109        let name_part = text[..angle_start].trim();
110        let email_part = text[angle_start + 1..text.len() - 1].trim();
111        if !email_part.is_empty() {
112            return Person {
113                name: if name_part.is_empty() {
114                    None
115                } else {
116                    Some(name_part.into())
117                },
118                email: Some(Email::new(email_part.to_string())),
119                uri: None,
120            };
121        }
122    }
123
124    // Bare email or plain name
125    if text.contains('@') {
126        Person {
127            name: None,
128            email: Some(Email::new(text.to_string())),
129            uri: None,
130        }
131    } else {
132        Person::from_name(text)
133    }
134}
135
136#[cfg(test)]
137mod tests {
138    use super::*;
139
140    #[test]
141    fn test_parse_rss_person_email_paren_name() {
142        let p = parse_rss_person("editor@example.com (John Editor)");
143        assert_eq!(p.name.as_deref(), Some("John Editor"));
144        assert_eq!(p.email.as_deref(), Some("editor@example.com"));
145    }
146
147    #[test]
148    fn test_parse_rss_person_name_angle_email() {
149        let p = parse_rss_person("John Editor <editor@example.com>");
150        assert_eq!(p.name.as_deref(), Some("John Editor"));
151        assert_eq!(p.email.as_deref(), Some("editor@example.com"));
152    }
153
154    #[test]
155    fn test_parse_rss_person_bare_email() {
156        let p = parse_rss_person("editor@example.com");
157        assert!(p.name.is_none());
158        assert_eq!(p.email.as_deref(), Some("editor@example.com"));
159    }
160
161    #[test]
162    fn test_parse_rss_person_plain_name() {
163        let p = parse_rss_person("John Editor");
164        assert_eq!(p.name.as_deref(), Some("John Editor"));
165        assert!(p.email.is_none());
166    }
167
168    #[test]
169    fn test_parse_rss_person_empty_name_in_parens() {
170        let p = parse_rss_person("editor@example.com ()");
171        assert!(p.name.is_none());
172        assert_eq!(p.email.as_deref(), Some("editor@example.com"));
173    }
174}