feedparser_rs/util/text.rs
1//! Text processing utilities
2//!
3//! This module provides functions for text manipulation,
4//! such as trimming, normalizing whitespace, and encoding conversion.
5
6use crate::types::{Email, Person};
7
8/// Efficient bytes to string conversion - zero-copy for valid UTF-8
9///
10/// Uses `std::str::from_utf8()` for zero-copy conversion when the input
11/// is valid UTF-8, falling back to lossy conversion otherwise.
12///
13/// # Examples
14///
15/// ```
16/// use feedparser_rs::util::text::bytes_to_string;
17///
18/// let valid_utf8 = b"Hello, world!";
19/// assert_eq!(bytes_to_string(valid_utf8), "Hello, world!");
20///
21/// let invalid_utf8 = &[0xFF, 0xFE, 0xFD];
22/// let result = bytes_to_string(invalid_utf8);
23/// assert!(!result.is_empty()); // Lossy conversion succeeded
24/// ```
25#[inline]
26pub fn bytes_to_string(value: &[u8]) -> String {
27 std::str::from_utf8(value).map_or_else(
28 |_| String::from_utf8_lossy(value).into_owned(),
29 std::string::ToString::to_string,
30 )
31}
32
33/// Truncates string to maximum length by character count
34///
35/// Uses efficient byte-length check before expensive char iteration.
36/// Prevents oversized attribute/text values that could cause memory issues.
37///
38/// # Examples
39///
40/// ```
41/// use feedparser_rs::util::text::truncate_to_length;
42///
43/// assert_eq!(truncate_to_length("hello world", 5), "hello");
44/// assert_eq!(truncate_to_length("hi", 100), "hi");
45/// assert_eq!(truncate_to_length("", 10), "");
46/// ```
47#[inline]
48#[must_use]
49pub fn truncate_to_length(s: &str, max_len: usize) -> String {
50 if s.len() <= max_len {
51 s.to_string()
52 } else {
53 s.chars().take(max_len).collect()
54 }
55}
56
57/// Parses an RSS person string into a structured `Person`.
58///
59/// Recognizes common RSS author formats:
60/// - `email (Name)` → `Person { name: Some("Name"), email: Some("email") }`
61/// - `Name <email>` → `Person { name: Some("Name"), email: Some("email") }`
62/// - bare email (contains `@`) → `Person { email: Some(text) }`
63/// - plain string → `Person { name: Some(text) }`
64///
65/// # Examples
66///
67/// ```
68/// use feedparser_rs::util::text::parse_rss_person;
69///
70/// let p = parse_rss_person("editor@example.com (John Editor)");
71/// assert_eq!(p.name.as_deref(), Some("John Editor"));
72/// assert_eq!(p.email.as_deref(), Some("editor@example.com"));
73///
74/// let p = parse_rss_person("John Editor <editor@example.com>");
75/// assert_eq!(p.name.as_deref(), Some("John Editor"));
76/// assert_eq!(p.email.as_deref(), Some("editor@example.com"));
77///
78/// let p = parse_rss_person("just-a-name");
79/// assert_eq!(p.name.as_deref(), Some("just-a-name"));
80/// assert!(p.email.is_none());
81/// ```
82#[must_use]
83pub fn parse_rss_person(text: &str) -> Person {
84 let text = text.trim();
85
86 // Pattern: "email (Name)"
87 if let Some(paren_start) = text.find('(')
88 && text.ends_with(')')
89 {
90 let email_part = text[..paren_start].trim();
91 let name_part = text[paren_start + 1..text.len() - 1].trim();
92 if !email_part.is_empty() {
93 return Person {
94 name: if name_part.is_empty() {
95 None
96 } else {
97 Some(name_part.into())
98 },
99 email: Some(Email::new(email_part.to_string())),
100 uri: None,
101 avatar: None,
102 };
103 }
104 }
105
106 // Pattern: "Name <email>"
107 if let Some(angle_start) = text.rfind('<')
108 && text.ends_with('>')
109 {
110 let name_part = text[..angle_start].trim();
111 let email_part = text[angle_start + 1..text.len() - 1].trim();
112 if !email_part.is_empty() {
113 return Person {
114 name: if name_part.is_empty() {
115 None
116 } else {
117 Some(name_part.into())
118 },
119 email: Some(Email::new(email_part.to_string())),
120 uri: None,
121 avatar: None,
122 };
123 }
124 }
125
126 // Bare email or plain name
127 if text.contains('@') {
128 Person {
129 name: None,
130 email: Some(Email::new(text.to_string())),
131 uri: None,
132 avatar: None,
133 }
134 } else {
135 Person::from_name(text)
136 }
137}
138
139#[cfg(test)]
140mod tests {
141 use super::*;
142
143 #[test]
144 fn test_parse_rss_person_email_paren_name() {
145 let p = parse_rss_person("editor@example.com (John Editor)");
146 assert_eq!(p.name.as_deref(), Some("John Editor"));
147 assert_eq!(p.email.as_deref(), Some("editor@example.com"));
148 }
149
150 #[test]
151 fn test_parse_rss_person_name_angle_email() {
152 let p = parse_rss_person("John Editor <editor@example.com>");
153 assert_eq!(p.name.as_deref(), Some("John Editor"));
154 assert_eq!(p.email.as_deref(), Some("editor@example.com"));
155 }
156
157 #[test]
158 fn test_parse_rss_person_bare_email() {
159 let p = parse_rss_person("editor@example.com");
160 assert!(p.name.is_none());
161 assert_eq!(p.email.as_deref(), Some("editor@example.com"));
162 }
163
164 #[test]
165 fn test_parse_rss_person_plain_name() {
166 let p = parse_rss_person("John Editor");
167 assert_eq!(p.name.as_deref(), Some("John Editor"));
168 assert!(p.email.is_none());
169 }
170
171 #[test]
172 fn test_parse_rss_person_empty_name_in_parens() {
173 let p = parse_rss_person("editor@example.com ()");
174 assert!(p.name.is_none());
175 assert_eq!(p.email.as_deref(), Some("editor@example.com"));
176 }
177}