feedparser_rs/util/text.rs
1//! Text processing utilities
2//!
3//! This module provides functions for text manipulation,
4//! such as trimming, normalizing whitespace, and encoding conversion.
5
6use crate::types::{Email, Person};
7
8/// Efficient bytes to string conversion - zero-copy for valid UTF-8
9///
10/// Uses `std::str::from_utf8()` for zero-copy conversion when the input
11/// is valid UTF-8, falling back to lossy conversion otherwise.
12///
13/// # Examples
14///
15/// ```
16/// use feedparser_rs::util::text::bytes_to_string;
17///
18/// let valid_utf8 = b"Hello, world!";
19/// assert_eq!(bytes_to_string(valid_utf8), "Hello, world!");
20///
21/// let invalid_utf8 = &[0xFF, 0xFE, 0xFD];
22/// let result = bytes_to_string(invalid_utf8);
23/// assert!(!result.is_empty()); // Lossy conversion succeeded
24/// ```
25#[inline]
26pub fn bytes_to_string(value: &[u8]) -> String {
27 std::str::from_utf8(value).map_or_else(
28 |_| String::from_utf8_lossy(value).into_owned(),
29 std::string::ToString::to_string,
30 )
31}
32
33/// Truncates string to maximum length by character count
34///
35/// Uses efficient byte-length check before expensive char iteration.
36/// Prevents oversized attribute/text values that could cause memory issues.
37///
38/// # Examples
39///
40/// ```
41/// use feedparser_rs::util::text::truncate_to_length;
42///
43/// assert_eq!(truncate_to_length("hello world", 5), "hello");
44/// assert_eq!(truncate_to_length("hi", 100), "hi");
45/// assert_eq!(truncate_to_length("", 10), "");
46/// ```
47#[inline]
48#[must_use]
49pub fn truncate_to_length(s: &str, max_len: usize) -> String {
50 if s.len() <= max_len {
51 s.to_string()
52 } else {
53 s.chars().take(max_len).collect()
54 }
55}
56
57/// Parses an RSS person string into a structured `Person`.
58///
59/// Recognizes common RSS author formats:
60/// - `email (Name)` → `Person { name: Some("Name"), email: Some("email") }`
61/// - `Name <email>` → `Person { name: Some("Name"), email: Some("email") }`
62/// - bare email (contains `@`) → `Person { email: Some(text) }`
63/// - plain string → `Person { name: Some(text) }`
64///
65/// # Examples
66///
67/// ```
68/// use feedparser_rs::util::text::parse_rss_person;
69///
70/// let p = parse_rss_person("editor@example.com (John Editor)");
71/// assert_eq!(p.name.as_deref(), Some("John Editor"));
72/// assert_eq!(p.email.as_deref(), Some("editor@example.com"));
73///
74/// let p = parse_rss_person("John Editor <editor@example.com>");
75/// assert_eq!(p.name.as_deref(), Some("John Editor"));
76/// assert_eq!(p.email.as_deref(), Some("editor@example.com"));
77///
78/// let p = parse_rss_person("just-a-name");
79/// assert_eq!(p.name.as_deref(), Some("just-a-name"));
80/// assert!(p.email.is_none());
81/// ```
82#[must_use]
83pub fn parse_rss_person(text: &str) -> Person {
84 let text = text.trim();
85
86 // Pattern: "email (Name)"
87 if let Some(paren_start) = text.find('(')
88 && text.ends_with(')')
89 {
90 let email_part = text[..paren_start].trim();
91 let name_part = text[paren_start + 1..text.len() - 1].trim();
92 if !email_part.is_empty() {
93 return Person {
94 name: if name_part.is_empty() {
95 None
96 } else {
97 Some(name_part.into())
98 },
99 email: Some(Email::new(email_part.to_string())),
100 uri: None,
101 };
102 }
103 }
104
105 // Pattern: "Name <email>"
106 if let Some(angle_start) = text.rfind('<')
107 && text.ends_with('>')
108 {
109 let name_part = text[..angle_start].trim();
110 let email_part = text[angle_start + 1..text.len() - 1].trim();
111 if !email_part.is_empty() {
112 return Person {
113 name: if name_part.is_empty() {
114 None
115 } else {
116 Some(name_part.into())
117 },
118 email: Some(Email::new(email_part.to_string())),
119 uri: None,
120 };
121 }
122 }
123
124 // Bare email or plain name
125 if text.contains('@') {
126 Person {
127 name: None,
128 email: Some(Email::new(text.to_string())),
129 uri: None,
130 }
131 } else {
132 Person::from_name(text)
133 }
134}
135
136#[cfg(test)]
137mod tests {
138 use super::*;
139
140 #[test]
141 fn test_parse_rss_person_email_paren_name() {
142 let p = parse_rss_person("editor@example.com (John Editor)");
143 assert_eq!(p.name.as_deref(), Some("John Editor"));
144 assert_eq!(p.email.as_deref(), Some("editor@example.com"));
145 }
146
147 #[test]
148 fn test_parse_rss_person_name_angle_email() {
149 let p = parse_rss_person("John Editor <editor@example.com>");
150 assert_eq!(p.name.as_deref(), Some("John Editor"));
151 assert_eq!(p.email.as_deref(), Some("editor@example.com"));
152 }
153
154 #[test]
155 fn test_parse_rss_person_bare_email() {
156 let p = parse_rss_person("editor@example.com");
157 assert!(p.name.is_none());
158 assert_eq!(p.email.as_deref(), Some("editor@example.com"));
159 }
160
161 #[test]
162 fn test_parse_rss_person_plain_name() {
163 let p = parse_rss_person("John Editor");
164 assert_eq!(p.name.as_deref(), Some("John Editor"));
165 assert!(p.email.is_none());
166 }
167
168 #[test]
169 fn test_parse_rss_person_empty_name_in_parens() {
170 let p = parse_rss_person("editor@example.com ()");
171 assert!(p.name.is_none());
172 assert_eq!(p.email.as_deref(), Some("editor@example.com"));
173 }
174}