Skip to main content

crispy_iptv_tools/
dedup.rs

1//! Playlist entry deduplication.
2//!
3//! Remove duplicate entries from a playlist based on configurable
4//! key strategies: URL, name, tvg_id, or a custom key function.
5
6use std::collections::HashSet;
7
8use crispy_iptv_types::PlaylistEntry;
9
10/// Strategy for determining duplicate entries.
11pub enum DeduplicateStrategy {
12    /// Same URL = duplicate.
13    ByUrl,
14    /// Same name = duplicate.
15    ByName,
16    /// Same tvg_id = duplicate.
17    ByTvgId,
18    /// Custom key function — entries with the same key are duplicates.
19    Custom(fn(&PlaylistEntry) -> String),
20}
21
22/// Deduplicate entries using the given strategy.
23///
24/// Preserves the first occurrence of each unique key. Order is preserved.
25pub fn deduplicate(
26    entries: &[PlaylistEntry],
27    strategy: &DeduplicateStrategy,
28) -> Vec<PlaylistEntry> {
29    let mut seen = HashSet::new();
30    entries
31        .iter()
32        .filter(|entry| {
33            let key = extract_key(entry, strategy);
34            // Skip entries with empty keys (no URL, no name, etc.).
35            if key.is_empty() {
36                return true;
37            }
38            seen.insert(key)
39        })
40        .cloned()
41        .collect()
42}
43
44/// Extract the deduplication key from an entry based on the strategy.
45fn extract_key(entry: &PlaylistEntry, strategy: &DeduplicateStrategy) -> String {
46    match strategy {
47        DeduplicateStrategy::ByUrl => entry.url.as_deref().unwrap_or("").to_lowercase(),
48        DeduplicateStrategy::ByName => entry.name.as_deref().unwrap_or("").to_lowercase(),
49        DeduplicateStrategy::ByTvgId => entry.tvg_id.as_deref().unwrap_or("").to_lowercase(),
50        DeduplicateStrategy::Custom(f) => f(entry),
51    }
52}
53
54#[cfg(test)]
55mod tests {
56    use super::*;
57
58    fn make_entry(name: &str, url: &str, tvg_id: &str) -> PlaylistEntry {
59        PlaylistEntry {
60            name: Some(name.to_string()),
61            url: Some(url.to_string()),
62            tvg_id: if tvg_id.is_empty() {
63                None
64            } else {
65                Some(tvg_id.to_string())
66            },
67            ..Default::default()
68        }
69    }
70
71    #[test]
72    fn dedup_by_url_removes_exact_dupes() {
73        let entries = vec![
74            make_entry("BBC One", "http://a.com/1", ""),
75            make_entry("BBC One (copy)", "http://a.com/1", ""),
76            make_entry("CNN", "http://a.com/2", ""),
77        ];
78        let result = deduplicate(&entries, &DeduplicateStrategy::ByUrl);
79        assert_eq!(result.len(), 2);
80        assert_eq!(result[0].name.as_deref().unwrap(), "BBC One");
81        assert_eq!(result[1].name.as_deref().unwrap(), "CNN");
82    }
83
84    #[test]
85    fn dedup_by_url_case_insensitive() {
86        let entries = vec![
87            make_entry("A", "HTTP://A.COM/1", ""),
88            make_entry("B", "http://a.com/1", ""),
89        ];
90        let result = deduplicate(&entries, &DeduplicateStrategy::ByUrl);
91        assert_eq!(result.len(), 1);
92    }
93
94    #[test]
95    fn dedup_by_name_removes_same_name() {
96        let entries = vec![
97            make_entry("BBC One", "http://a.com/1", ""),
98            make_entry("BBC One", "http://a.com/2", ""),
99            make_entry("CNN", "http://a.com/3", ""),
100        ];
101        let result = deduplicate(&entries, &DeduplicateStrategy::ByName);
102        assert_eq!(result.len(), 2);
103        assert_eq!(result[0].url.as_deref().unwrap(), "http://a.com/1");
104    }
105
106    #[test]
107    fn dedup_by_tvg_id() {
108        let entries = vec![
109            make_entry("A", "http://a.com/1", "bbc.uk"),
110            make_entry("B", "http://a.com/2", "bbc.uk"),
111            make_entry("C", "http://a.com/3", "cnn.us"),
112        ];
113        let result = deduplicate(&entries, &DeduplicateStrategy::ByTvgId);
114        assert_eq!(result.len(), 2);
115    }
116
117    #[test]
118    fn dedup_custom_key() {
119        let entries = vec![
120            make_entry("A", "http://a.com/1", ""),
121            make_entry("B", "http://a.com/2", ""),
122            make_entry("C", "http://b.com/3", ""),
123        ];
124        // Custom key: extract host from URL.
125        let result = deduplicate(
126            &entries,
127            &DeduplicateStrategy::Custom(|e| {
128                let url = e.url.as_deref().unwrap_or("");
129                url::Url::parse(url)
130                    .ok()
131                    .and_then(|u| u.host_str().map(|h| h.to_string()))
132                    .unwrap_or_default()
133            }),
134        );
135        assert_eq!(result.len(), 2);
136    }
137
138    #[test]
139    fn dedup_preserves_entries_with_empty_keys() {
140        let entries = vec![make_entry("A", "", ""), make_entry("B", "", "")];
141        // Both have empty URLs — both should be kept.
142        let result = deduplicate(&entries, &DeduplicateStrategy::ByUrl);
143        assert_eq!(result.len(), 2);
144    }
145
146    #[test]
147    fn dedup_empty_input() {
148        let result = deduplicate(&[], &DeduplicateStrategy::ByUrl);
149        assert!(result.is_empty());
150    }
151}