Skip to main content

feedparser_rs/namespace/
threading.rs

1/// Atom Threading Extensions (RFC 4685) namespace handler
2///
3/// Namespace URI: `http://purl.org/syndication/thread/1.0`
4/// Prefix: `thr`
5///
6/// Handles two elements:
7/// - `thr:in-reply-to` — attribute-only element referencing the parent entry
8/// - `thr:total` — text element with total response count
9use crate::types::{Entry, InReplyTo, MimeType, SmallString, Url};
10
11/// Atom Threading Extensions namespace URI
12pub const THREADING_NAMESPACE: &str = "http://purl.org/syndication/thread/1.0";
13
14/// Normalize attribute value: empty string after trim becomes None
15#[inline]
16fn non_empty(s: &str) -> Option<&str> {
17    let trimmed = s.trim();
18    if trimmed.is_empty() {
19        None
20    } else {
21        Some(trimmed)
22    }
23}
24
25/// Build an `InReplyTo` from its four optional fields.
26///
27/// Returns `None` only if ALL fields are `None` (fully empty element).
28#[inline]
29fn build_in_reply_to(
30    ref_: Option<SmallString>,
31    href: Option<Url>,
32    type_: Option<MimeType>,
33    source: Option<Url>,
34) -> Option<InReplyTo> {
35    if ref_.is_none() && href.is_none() && type_.is_none() && source.is_none() {
36        return None;
37    }
38    Some(InReplyTo {
39        ref_,
40        href,
41        type_,
42        source,
43    })
44}
45
46/// Parse `thr:in-reply-to` from a quick-xml attribute iterator (Atom and RSS 1.0 parser paths)
47///
48/// Returns `None` only if ALL fields are `None` after normalization (fully empty element).
49/// Returns `Some(InReplyTo)` even if `ref` is missing, to tolerate malformed feeds.
50///
51/// # Arguments
52///
53/// * `attrs` - Iterator over quick-xml attributes (from `element.attributes().flatten()`)
54/// * `max_attr_len` - Maximum attribute value length for `DoS` protection
55pub fn parse_in_reply_to_from_attrs<'a>(
56    attrs: impl Iterator<Item = quick_xml::events::attributes::Attribute<'a>>,
57    max_attr_len: usize,
58) -> Option<InReplyTo> {
59    let mut ref_ = None;
60    let mut href = None;
61    let mut type_ = None;
62    let mut source = None;
63
64    for attr in attrs {
65        if attr.value.len() > max_attr_len {
66            continue;
67        }
68        let Ok(value) = attr.unescape_value() else {
69            continue;
70        };
71        match attr.key.as_ref() {
72            b"ref" => ref_ = non_empty(&value).map(|s| s.to_string().into()),
73            b"href" => href = non_empty(&value).map(|s| s.to_string().into()),
74            b"type" => type_ = non_empty(&value).map(|s| s.to_string().into()),
75            b"source" => source = non_empty(&value).map(|s| s.to_string().into()),
76            _ => {}
77        }
78    }
79
80    build_in_reply_to(ref_, href, type_, source)
81}
82
83/// Parse `thr:in-reply-to` from collected attributes (RSS 2.0 parser path)
84///
85/// Returns `None` only if ALL fields are `None` after normalization.
86///
87/// # Arguments
88///
89/// * `attrs` - Slice of collected `(key_bytes, value_string)` attribute pairs
90/// * `max_attr_len` - Maximum attribute value length for `DoS` protection
91pub fn parse_in_reply_to_from_collected(
92    attrs: &[(Vec<u8>, String)],
93    max_attr_len: usize,
94) -> Option<InReplyTo> {
95    let mut ref_ = None;
96    let mut href = None;
97    let mut type_ = None;
98    let mut source = None;
99
100    for (key, value) in attrs {
101        if value.len() > max_attr_len {
102            continue;
103        }
104        match key.as_slice() {
105            b"ref" => ref_ = non_empty(value).map(|s| s.to_string().into()),
106            b"href" => href = non_empty(value).map(|s| s.to_string().into()),
107            b"type" => type_ = non_empty(value).map(|s| s.to_string().into()),
108            b"source" => source = non_empty(value).map(|s| s.to_string().into()),
109            _ => {}
110        }
111    }
112
113    build_in_reply_to(ref_, href, type_, source)
114}
115
116/// Handle `thr:total` text content
117///
118/// Parses non-negative integer from text. Silently ignores non-numeric, negative,
119/// overflow, empty, and whitespace-only values (consistent with how `parse_duration`
120/// handles invalid input throughout the codebase).
121///
122/// # Arguments
123///
124/// * `text` - Text content of the `thr:total` element
125/// * `entry` - Entry to update with the parsed count
126pub fn handle_total(text: &str, entry: &mut Entry) {
127    entry.thr_total = text.trim().parse::<u32>().ok();
128}
129
130#[cfg(test)]
131mod tests {
132    use super::*;
133
134    #[test]
135    fn test_handle_total_valid() {
136        let mut entry = Entry::default();
137        handle_total("15", &mut entry);
138        assert_eq!(entry.thr_total, Some(15));
139    }
140
141    #[test]
142    fn test_handle_total_with_whitespace() {
143        let mut entry = Entry::default();
144        handle_total("  42  ", &mut entry);
145        assert_eq!(entry.thr_total, Some(42));
146    }
147
148    #[test]
149    fn test_handle_total_zero() {
150        let mut entry = Entry::default();
151        handle_total("0", &mut entry);
152        assert_eq!(entry.thr_total, Some(0));
153    }
154
155    #[test]
156    fn test_handle_total_non_numeric() {
157        let mut entry = Entry::default();
158        handle_total("abc", &mut entry);
159        assert_eq!(entry.thr_total, None);
160    }
161
162    #[test]
163    fn test_handle_total_negative() {
164        let mut entry = Entry::default();
165        handle_total("-5", &mut entry);
166        assert_eq!(entry.thr_total, None);
167    }
168
169    #[test]
170    fn test_handle_total_overflow() {
171        let mut entry = Entry::default();
172        handle_total("99999999999999", &mut entry);
173        assert_eq!(entry.thr_total, None);
174    }
175
176    #[test]
177    fn test_handle_total_empty() {
178        let mut entry = Entry::default();
179        handle_total("", &mut entry);
180        assert_eq!(entry.thr_total, None);
181    }
182
183    #[test]
184    fn test_handle_total_whitespace_only() {
185        let mut entry = Entry::default();
186        handle_total("   ", &mut entry);
187        assert_eq!(entry.thr_total, None);
188    }
189
190    #[test]
191    fn test_parse_in_reply_to_from_collected_full() {
192        let attrs = vec![
193            (b"ref".to_vec(), "tag:example.com,2024:post/1".to_string()),
194            (b"href".to_vec(), "https://example.com/post/1".to_string()),
195            (b"type".to_vec(), "text/html".to_string()),
196            (
197                b"source".to_vec(),
198                "https://example.com/feed.xml".to_string(),
199            ),
200        ];
201        let result = parse_in_reply_to_from_collected(&attrs, 1024);
202        assert!(result.is_some());
203        let irt = result.unwrap();
204        assert_eq!(irt.ref_.as_deref(), Some("tag:example.com,2024:post/1"));
205        assert_eq!(irt.href.as_deref(), Some("https://example.com/post/1"));
206        assert_eq!(irt.type_.as_deref(), Some("text/html"));
207        assert_eq!(irt.source.as_deref(), Some("https://example.com/feed.xml"));
208    }
209
210    #[test]
211    fn test_parse_in_reply_to_from_collected_empty_ref() {
212        let attrs = vec![
213            (b"ref".to_vec(), String::new()),
214            (b"href".to_vec(), "https://example.com/post/1".to_string()),
215        ];
216        let result = parse_in_reply_to_from_collected(&attrs, 1024);
217        assert!(result.is_some());
218        let irt = result.unwrap();
219        // Empty ref should be normalized to None
220        assert!(irt.ref_.is_none());
221        assert_eq!(irt.href.as_deref(), Some("https://example.com/post/1"));
222    }
223
224    #[test]
225    fn test_parse_in_reply_to_from_collected_all_empty() {
226        let attrs = vec![
227            (b"ref".to_vec(), String::new()),
228            (b"href".to_vec(), String::new()),
229            (b"type".to_vec(), "  ".to_string()),
230            (b"source".to_vec(), String::new()),
231        ];
232        let result = parse_in_reply_to_from_collected(&attrs, 1024);
233        // All fields are None -> return None
234        assert!(result.is_none());
235    }
236
237    #[test]
238    fn test_parse_in_reply_to_from_collected_truncated_by_limit() {
239        let attrs = vec![(b"ref".to_vec(), "tag:example.com,2024:post/1".to_string())];
240        let result = parse_in_reply_to_from_collected(&attrs, 5);
241        // Value exceeds max_attr_len, should be skipped -> all None -> None
242        assert!(result.is_none());
243    }
244
245    #[test]
246    fn test_parse_in_reply_to_from_collected_only_ref() {
247        let attrs = vec![(b"ref".to_vec(), "tag:example.com,2024:post/1".to_string())];
248        let result = parse_in_reply_to_from_collected(&attrs, 1024);
249        assert!(result.is_some());
250        let irt = result.unwrap();
251        assert_eq!(irt.ref_.as_deref(), Some("tag:example.com,2024:post/1"));
252        assert!(irt.href.is_none());
253        assert!(irt.type_.is_none());
254        assert!(irt.source.is_none());
255    }
256
257    #[test]
258    fn test_non_empty_normalization() {
259        assert_eq!(non_empty(""), None);
260        assert_eq!(non_empty("  "), None);
261        assert_eq!(non_empty("hello"), Some("hello"));
262        assert_eq!(non_empty("  hello  "), Some("hello"));
263    }
264}