feedparser_rs/namespace/
cc.rs

1//! Creative Commons namespace support for license information
2//!
3//! Handles Creative Commons license metadata in RSS and Atom feeds.
4//! Supports both the modern `cc:license` (with `rdf:resource` attribute)
5//! and legacy `creativeCommons:license` text elements.
6//!
7//! # Supported Elements
8//!
9//! - `cc:license` (with `rdf:resource` attribute) - Modern CC namespace
10//! - `creativeCommons:license` (text element) - Legacy Userland namespace
11//!
12//! # Specification
13//!
14//! Creative Commons: <http://creativecommons.org/ns>
15//! Legacy: <http://backend.userland.com/creativeCommonsRssModule>
16
17use crate::Entry;
18use crate::limits::ParserLimits;
19use crate::types::generics::LimitedCollectionExt;
20use crate::types::{FeedMeta, Link};
21
22/// Creative Commons namespace URI (modern)
23pub const CC: &str = "http://creativecommons.org/ns#";
24
25/// Creative Commons legacy namespace URI (Userland)
26pub const CREATIVE_COMMONS: &str = "http://backend.userland.com/creativeCommonsRssModule";
27
28/// Handle Creative Commons element at feed level
29///
30/// Converts CC license information to a link with `rel="license"`
31/// and adds it to the feed's links collection.
32///
33/// # Arguments
34///
35/// * `tag` - Element local name (e.g., "license")
36/// * `attrs` - Element attributes as (name, value) pairs
37/// * `text` - Element text content
38/// * `feed` - Feed metadata to update
39/// * `limits` - Parser limits for bounded collections
40///
41/// # Returns
42///
43/// `true` if element was recognized and handled, `false` otherwise
44pub fn handle_feed_element(
45    tag: &[u8],
46    attrs: &[(Vec<u8>, String)],
47    text: &str,
48    feed: &mut FeedMeta,
49    limits: &ParserLimits,
50) -> bool {
51    match tag {
52        b"license" => {
53            if let Some(license_url) = extract_license_url(attrs, text) {
54                feed.links.try_push_limited(
55                    Link {
56                        href: license_url.into(),
57                        rel: Some("license".into()),
58                        ..Default::default()
59                    },
60                    limits.max_links_per_feed,
61                );
62            }
63            true
64        }
65        _ => false,
66    }
67}
68
69/// Handle Creative Commons element at entry level
70///
71/// Converts CC license information to a link with `rel="license"`
72/// and adds it to the entry's links collection.
73///
74/// # Arguments
75///
76/// * `tag` - Element local name (e.g., "license")
77/// * `attrs` - Element attributes as (name, value) pairs
78/// * `text` - Element text content
79/// * `entry` - Entry to update
80/// * `limits` - Parser limits for bounded collections
81///
82/// # Returns
83///
84/// `true` if element was recognized and handled, `false` otherwise
85pub fn handle_entry_element(
86    tag: &[u8],
87    attrs: &[(Vec<u8>, String)],
88    text: &str,
89    entry: &mut Entry,
90    limits: &ParserLimits,
91) -> bool {
92    match tag {
93        b"license" => {
94            if let Some(license_url) = extract_license_url(attrs, text) {
95                entry.links.try_push_limited(
96                    Link {
97                        href: license_url.into(),
98                        rel: Some("license".into()),
99                        ..Default::default()
100                    },
101                    limits.max_links_per_entry,
102                );
103            }
104            true
105        }
106        _ => false,
107    }
108}
109
110/// Extract license URL from element
111///
112/// Tries two methods in order:
113/// 1. `rdf:resource` attribute (modern cc:license format)
114/// 2. Text content (legacy creativeCommons:license format)
115///
116/// # Arguments
117///
118/// * `attrs` - Element attributes
119/// * `text` - Element text content
120///
121/// # Returns
122///
123/// License URL if found, `None` otherwise
124fn extract_license_url(attrs: &[(Vec<u8>, String)], text: &str) -> Option<String> {
125    // Try rdf:resource attribute first (modern format)
126    // <cc:license rdf:resource="http://creativecommons.org/licenses/by/4.0/" />
127    for (name, value) in attrs {
128        if (name == b"resource" || name.ends_with(b":resource")) && !value.is_empty() {
129            return Some(value.clone());
130        }
131    }
132
133    // Fall back to text content (legacy format)
134    // <creativeCommons:license>http://creativecommons.org/licenses/by/4.0/</creativeCommons:license>
135    let trimmed = text.trim();
136    if !trimmed.is_empty() {
137        return Some(trimmed.to_string());
138    }
139
140    None
141}
142
143#[cfg(test)]
144mod tests {
145    use super::*;
146
147    #[test]
148    fn test_extract_license_url_from_attribute() {
149        let attrs = vec![(
150            b"resource".to_vec(),
151            "http://creativecommons.org/licenses/by/4.0/".to_string(),
152        )];
153        let url = extract_license_url(&attrs, "").unwrap();
154        assert_eq!(url, "http://creativecommons.org/licenses/by/4.0/");
155    }
156
157    #[test]
158    fn test_extract_license_url_from_namespaced_attribute() {
159        let attrs = vec![(
160            b"rdf:resource".to_vec(),
161            "http://creativecommons.org/licenses/by-sa/4.0/".to_string(),
162        )];
163        let url = extract_license_url(&attrs, "").unwrap();
164        assert_eq!(url, "http://creativecommons.org/licenses/by-sa/4.0/");
165    }
166
167    #[test]
168    fn test_extract_license_url_from_text() {
169        let url =
170            extract_license_url(&[], "http://creativecommons.org/licenses/by-nc/4.0/").unwrap();
171        assert_eq!(url, "http://creativecommons.org/licenses/by-nc/4.0/");
172    }
173
174    #[test]
175    fn test_extract_license_url_from_text_with_whitespace() {
176        let url =
177            extract_license_url(&[], "  http://creativecommons.org/licenses/by-nd/4.0/  ").unwrap();
178        assert_eq!(url, "http://creativecommons.org/licenses/by-nd/4.0/");
179    }
180
181    #[test]
182    fn test_extract_license_url_prefers_attribute() {
183        // If both attribute and text present, attribute wins
184        let attrs = vec![(
185            b"rdf:resource".to_vec(),
186            "http://creativecommons.org/licenses/by/4.0/".to_string(),
187        )];
188        let url =
189            extract_license_url(&attrs, "http://creativecommons.org/licenses/by-sa/4.0/").unwrap();
190        assert_eq!(url, "http://creativecommons.org/licenses/by/4.0/");
191    }
192
193    #[test]
194    fn test_extract_license_url_empty() {
195        assert!(extract_license_url(&[], "").is_none());
196        assert!(extract_license_url(&[], "   ").is_none());
197    }
198
199    #[test]
200    fn test_handle_feed_element_license() {
201        let mut feed = FeedMeta::default();
202        let limits = ParserLimits::default();
203
204        let attrs = vec![(
205            b"rdf:resource".to_vec(),
206            "http://creativecommons.org/licenses/by/4.0/".to_string(),
207        )];
208
209        let handled = handle_feed_element(b"license", &attrs, "", &mut feed, &limits);
210        assert!(handled);
211        assert_eq!(feed.links.len(), 1);
212        assert_eq!(
213            feed.links[0].href,
214            "http://creativecommons.org/licenses/by/4.0/"
215        );
216        assert_eq!(feed.links[0].rel.as_deref(), Some("license"));
217    }
218
219    #[test]
220    fn test_handle_entry_element_license() {
221        let mut entry = Entry::default();
222        let limits = ParserLimits::default();
223
224        let handled = handle_entry_element(
225            b"license",
226            &[],
227            "http://creativecommons.org/licenses/by-sa/4.0/",
228            &mut entry,
229            &limits,
230        );
231        assert!(handled);
232        assert_eq!(entry.links.len(), 1);
233        assert_eq!(
234            entry.links[0].href,
235            "http://creativecommons.org/licenses/by-sa/4.0/"
236        );
237        assert_eq!(entry.links[0].rel.as_deref(), Some("license"));
238    }
239
240    #[test]
241    fn test_handle_feed_element_unknown() {
242        let mut feed = FeedMeta::default();
243        let limits = ParserLimits::default();
244
245        let handled = handle_feed_element(b"unknown", &[], "", &mut feed, &limits);
246        assert!(!handled);
247    }
248
249    #[test]
250    fn test_handle_entry_element_unknown() {
251        let mut entry = Entry::default();
252        let limits = ParserLimits::default();
253
254        let handled = handle_entry_element(b"unknown", &[], "", &mut entry, &limits);
255        assert!(!handled);
256    }
257
258    #[test]
259    fn test_multiple_licenses() {
260        let mut feed = FeedMeta::default();
261        let limits = ParserLimits::default();
262
263        let attrs1 = vec![(
264            b"rdf:resource".to_vec(),
265            "http://creativecommons.org/licenses/by/4.0/".to_string(),
266        )];
267        handle_feed_element(b"license", &attrs1, "", &mut feed, &limits);
268
269        let attrs2 = vec![(
270            b"rdf:resource".to_vec(),
271            "http://creativecommons.org/licenses/by-sa/4.0/".to_string(),
272        )];
273        handle_feed_element(b"license", &attrs2, "", &mut feed, &limits);
274
275        assert_eq!(feed.links.len(), 2);
276        assert_eq!(feed.links[0].rel.as_deref(), Some("license"));
277        assert_eq!(feed.links[1].rel.as_deref(), Some("license"));
278    }
279}