metadata_filter/
rules.rs

1//! Defines regex replacement rules to filter text with.
2//!
3//! This module defines the FilterRule type, which represents
4//! a regex replacement rule, as well as several functions
5//! that return lists of predefined filter rules.
6//!
7//! Creating a `FilterRule` compiles a [regular expression](https://docs.rs/regex/1/regex/struct.Regex.html),
8//! which means it is potentially expensive to call these predefined
9//! filter rule functions in a loop.
10
11use std::borrow::Cow;
12use std::error::Error;
13
14use regex::Regex;
15
16/// Represents a regex replacement rule with a pattern and replacement text.
17#[derive(Debug, Clone)]
18pub struct FilterRule(Regex, String);
19
20impl FilterRule {
21    /// Create a new filter rule with a pattern and a replacement text.
22    /// The pattern follows the syntax from the [`regex`](https://docs.rs/regex/1) crate.
23    /// Returns an error if the regex could not be compiled.
24    pub fn new(pattern: &str, replacement: &str) -> Result<Self, Box<dyn Error>> {
25        Ok(Self(
26            Regex::new(&pattern)?,
27            replacement.to_string(),
28        ))
29    }
30
31    /// Apply the filter rule. Returns Cow::Owned if a replacement was done,
32    /// or Cow::Borrowed (referencing the original text) if nothing was changed.
33    pub fn apply<'t>(&self, text: &'t str) -> Cow<'t, str> {
34        self.0.replace(text, &self.1[..])
35    }
36}
37
38macro_rules! filter_rules {
39    ($(#[$meta:meta])* $name:ident, $rules:expr) => {
40        $(#[$meta])*
41        pub fn $name() -> Vec<FilterRule> {
42            $rules
43                .iter()
44                .map(|rule| FilterRule::new(rule.0, rule.1).unwrap())
45                .collect()
46        }
47    };
48}
49
50filter_rules!(
51    /// Filter rules to remove YouTube suffixes and prefixes from a text.
52    youtube_track_filter_rules,
53    [
54        // Trim whitespaces
55        (r"^\s+", ""),
56        (r"\s+$", ""),
57        // **NEW**
58        (r"\*+\s?\S+\s?\*+$", ""),
59        // [whatever]
60        (r"\[[^\]]+\]", ""),
61        // (whatever version)
62        (r"(?i)\([^)]*version\)$", ""),
63        // video extensions
64        (r"(?i)\.(avi|wmv|mpg|mpeg|flv)$", ""),
65        // (LYRICs VIDEO)
66        (r"(?i)\(.*lyrics?\s*(video)?\)", ""),
67        // (Official Track Stream)
68        (r"(?i)\((of+icial\s*)?(track\s*)?stream\)", ""),
69        // (official)? (music)? video
70        (r"(?i)\((of+icial\s*)?(music\s*)?video\)", ""),
71        // (official)? (music)? audio
72        (r"(?i)\((of+icial\s*)?(music\s*)?audio\)", ""),
73        // (ALBUM TRACK)
74        (r"(?i)(album track\s*)", ""),
75        // (Cover Art)
76        (r"(?i)(cover art\s*)", ""),
77        // (official)
78        (r"(?i)\(\s*of+icial\s*\)", ""),
79        // (1999)
80        (r"(?i)\(\s*[0-9]{4}\s*\)", ""),
81        // HD (HQ)
82        (r"(HD|HQ)\s*$", ""),
83        // video clip officiel or video clip official
84        ("(?i)(vid[\u{00E9}e]o)?\\s?clip\\sof+ici[ae]l", ""),
85        // offizielles
86        (r"(?i)of+iziel+es\s*video", ""),
87        // video clip
88        ("(?i)vid[\u{00E9}e]o\\s?clip", ""),
89        // clip
90        (r"(?i)\sclip", ""),
91        // Full Album
92        (r"(?i)full\s*album", ""),
93        // (live)
94        (r"(?i)\(live.*?\)$", ""),
95        // | something
96        (r"(?i)\|.*$", ""),
97        // Artist - The new "Track title" featuring someone
98        (r#"^(|.*\s)"(.{5,})"(\s.*|)$"#, "$2"),
99        // 'Track title'
100        (r"^(|.*\s)'(.{5,})'(\s.*|)$", "$2"),
101        // (*01/01/1999*)
102        (r"(?i)\(.*[0-9]{1,2}/[0-9]{1,2}/[0-9]{2,4}.*\)", ""),
103        // Sub Español
104        (r"(?i)sub\s*español", ""),
105        // (Letra/Lyrics)
106        (r"(?i)\s\(Letra/Lyrics\)", ""),
107        // (Letra)
108        (r"(?i)\s\(Letra\)", ""),
109        // (En vivo)
110        (r"(?i)\s\(En\svivo\)", ""),
111    ]
112);
113
114filter_rules!(
115    /// Filter rules to remove leftovers after filtering text using
116    /// [`youtube_track_filter_rules`](fn.youtube_track_filter_rules.html).
117    trim_symbols_filter_rules,
118    [
119        // Leftovers after e.g. (official video)
120        (r"\(+\s*\)+", ""),
121        // trim starting white chars and dash
122        (r#"^[/,:;~\-\s"]+"#, ""),
123        // trim trailing white chars and dash
124        (r#"[/,:;~\-\s"]+$"#, ""),
125    ]
126);
127
128filter_rules!(
129    /// Filter rules to remove "Remastered..."-like strings from a text.
130    remastered_filter_rules,
131    [
132        // Here Comes The Sun - Remastered
133        (r"-\sRemastered$", ""),
134        // Hey Jude - Remastered 2015
135        (r"-\sRemastered\s\d+$", ""),
136        // Let It Be (Remastered 2009)
137        // Red Rain (Remaster 2012)
138        (r"\(Remaster(ed)?\s\d+\)$", ""),
139        // Pigs On The Wing (Part One) [2011 - Remaster]
140        (r"\[\d+\s-\sRemaster\]$", ""),
141        // Comfortably Numb (2011 - Remaster)
142        // Dancing Days (2012 Remaster)
143        (r"\(\d+(\s-)?\sRemaster\)$", ""),
144        // Outside The Wall - 2011 - Remaster
145        // China Grove - 2006 Remaster
146        (r"-\s\d+(\s-)?\sRemaster$", ""),
147        // Learning To Fly - 2001 Digital Remaster
148        (r"-\s\d+\s.+?\sRemaster$", ""),
149        // Your Possible Pasts - 2011 Remastered Version
150        (r"-\s\d+\sRemastered Version$", ""),
151        // Roll Over Beethoven (Live / Remastered)
152        (r"\(Live\s/\sRemastered\)$", ""),
153        // Ticket To Ride - Live / Remastered
154        (r"-\sLive\s/\sRemastered$", ""),
155        // Mothership (Remastered)
156        // How The West Was Won [Remastered]
157        (r"[(\[]Remastered[)\]]$", ""),
158        // A Well Respected Man (2014 Remastered Version)
159        // A Well Respected Man [2014 Remastered Version]
160        (r"[(\[]\d{4} Re[Mm]astered Version[)\]]$", ""),
161        // She Was Hot (2009 Re-Mastered Digital Version)
162        // She Was Hot (2009 Remastered Digital Version)
163        (r"[(\[]\d{4} Re-?[Mm]astered Digital Version[)\]]$", ""),
164        // In The Court Of The Crimson King (Expanded & Remastered Original Album Mix)
165        (r"\([^(]*Remaster[^)]*\)$", ""),
166    ]
167);
168
169filter_rules!(
170    /// Filter rules to remove "Live..."-like strings from a text.
171    live_filter_rules,
172    [
173        // Track - Live
174        (r"-\sLive?$", ""),
175        // Track - Live at
176        (r"-\sLive\s.+?$", ""),
177        // (Live) or [Live]
178        (r"(?i)[(\[]Live[)\]]", ""),
179    ]
180);
181
182filter_rules!(
183    /// Filter rules to remove "Explicit" and "Clean" from a text.
184    clean_explicit_filter_rules,
185    [
186        // (Explicit) or [Explicit]
187        (r"(?i)\s[(\[]Explicit[)\]]", ""),
188        // (Clean) or [Clean]
189        (r"(?i)\s[(\[]Clean[)\]]", ""),
190    ]
191);
192
193filter_rules!(
194    /// Filter rules to remove feature information from a text.
195    feature_filter_rules,
196    [
197        // [Feat. Artist] or (Feat. Artist)
198        (r"(?i)\s[(\[]feat. .+[)\]]", ""),
199    ]
200);
201
202filter_rules!(
203    /// Filter rules to normalize feature information to "Feat. Artist".
204    normalize_feature_filter_rules,
205    [
206        // [Feat. Artist] or (Feat. Artist) -> Feat. Artist
207        (r"(?i)\s[(\[](feat. .+)[)\]]", " $1"),
208    ]
209);
210
211filter_rules!(
212    /// Filter rules to remove version information (eg. "Album Version" or "Deluxe Edition")
213    /// from a text.
214    version_filter_rules,
215    [
216        // Love Will Come To You (Album Version)
217        (r"[(\[]Album Version[)\]]$", ""),
218        // I Melt With You (Rerecorded)
219        // When I Need You [Re-Recorded]
220        (r"[(\[]Re-?[Rr]ecorded[)\]]$", ""),
221        // Your Cheatin' Heart (Single Version)
222        (r"[(\[]Single Version[)\]]$", ""),
223        // All Over Now (Edit)
224        (r"[(\[]Edit[)\]]$", ""),
225        // (I Can't Get No) Satisfaction - Mono Version
226        (r"-\sMono Version$", ""),
227        // Ruby Tuesday - Stereo Version
228        (r"-\sStereo Version$", ""),
229        // Pure McCartney (Deluxe Edition)
230        (r"\(Deluxe Edition\)$", ""),
231        // 6 Foot 7 Foot (Explicit Version)
232        (r"(?i)[(\[]Explicit Version[)\]]", ""),
233    ]
234);
235
236filter_rules!(
237    /// Filter rules to normalize "- suffix" to "(suffix)" in a text.
238    suffix_filter_rules,
239    [
240        // "- X Remix" -> "(X Remix)" and similar
241        (
242            r"(?i)-\s(.+?)\s((Re)?mix|edit|dub|mix|vip|version)$",
243            "($1 $2)"
244        ),
245        (r"(?i)-\s(Remix|VIP)$", "($1)"),
246    ]
247);
248
249filter_rules!(
250    /// Filter rules to remove leading and trailing whitespace from a text.
251    trim_whitespace_filter_rules,
252    [(r"^\s+", ""), (r"\s+$", ""),]
253);
254
255#[cfg(test)]
256mod tests {
257    use super::*;
258
259    fn apply_rules(text: &str, rules: &[FilterRule]) -> String {
260        rules.iter().fold(text.to_string(), |mut result, rule| {
261            let filtered = rule.apply(&result);
262            if let Cow::Owned(filtered) = filtered {
263                result.clear();
264                result.push_str(&filtered);
265            }
266            result
267        })
268    }
269
270    fn test_rules(values: &[(&str, &str)], rules: &[FilterRule]) {
271        for value in values {
272            let filtered = apply_rules(value.0, rules);
273            println!("value: {:?}\nexpected: {:?}\nactual: {:?}\n-----", value.0, value.1, filtered);
274            assert_eq!(filtered, value.1);
275        }
276    }
277
278    #[test]
279    fn test_youtube_track_filter_rules() {
280        let titles = [
281            ("   whitespace prefix", "whitespace prefix"),
282            ("whitespace suffix   ", "whitespace suffix"),
283            ("Artist - Song Title **NEW**", "Artist - Song Title "),
284            ("Artist - Song Title [something]", "Artist - Song Title "),
285            ("Artist - Song Title (xyz version)", "Artist - Song Title "),
286            ("Artist - Song Title.avi", "Artist - Song Title"),
287            ("Artist - Song Title (lyric video)", "Artist - Song Title "),
288            ("Artist - Song Title (official track stream)", "Artist - Song Title "),
289            ("Artist - Song Title (official music video)", "Artist - Song Title "),
290            ("Artist - Song Title (official audio)", "Artist - Song Title "),
291            ("Artist - Song Title (Album Track)", "Artist - Song Title ()"),
292            ("Artist - Song Title (Cover Art)", "Artist - Song Title ()"),
293            ("Artist - Song Title (official)", "Artist - Song Title "),
294            ("Artist - Song Title (1999)", "Artist - Song Title "),
295            ("Artist - Song Title HD", "Artist - Song Title "),
296            ("Artist - Song Title (vidéo clip official)", "Artist - Song Title ()"),
297            ("Artist - Song Title offizielles video", "Artist - Song Title "),
298            ("Artist - Song Title video clip", "Artist - Song Title "),
299            ("Artist - Song Title clip", "Artist - Song Title"),
300            ("Artist - Album Title Full Album", "Artist - Album Title "),
301            ("Artist - Song Title (live)", "Artist - Song Title "),
302            ("Artist - Song Title | something", "Artist - Song Title "),
303            ("Artist - Song Title (01/01/1999)", "Artist - Song Title "),
304            ("Artist - Song Title (sub español)", "Artist - Song Title ()"),
305            ("Artist - Song Title (Letra/Lyrics)", "Artist - Song Title "),
306            ("Artist - Song Title (Letra)", "Artist - Song Title"),
307            ("Artist - Song Title (En vivo)", "Artist - Song Title"),
308        ];
309
310        test_rules(&titles, &youtube_track_filter_rules());
311    }
312
313    #[test]
314    fn test_trim_symbols_filter_rules() {
315        let titles = [
316            ("Artist - Song Title ()", "Artist - Song Title"),
317            ("Artist - Song Title - ", "Artist - Song Title"),
318            (" - Artist - Song Title", "Artist - Song Title"),
319        ];
320
321        test_rules(&titles, &trim_symbols_filter_rules());
322    }
323
324    #[test]
325    fn test_remastered_filter_rules() {
326        let titles = [
327            ("Here Comes The Sun - Remastered", "Here Comes The Sun "),
328            ("Hey Jude - Remastered 2015", "Hey Jude "),
329            ("Let It Be (Remastered 2009)", "Let It Be "),
330            ("Red Rain (Remaster 2012)", "Red Rain "),
331            (
332                "Pigs On The Wing (Part One) [2011 - Remaster]",
333                "Pigs On The Wing (Part One) ",
334            ),
335            ("Comfortably Numb (2011 - Remaster)", "Comfortably Numb "),
336            ("Dancing Days (2012 Remaster)", "Dancing Days "),
337            ("Outside The Wall - 2011 - Remaster", "Outside The Wall "),
338            ("China Grove - 2006 Remaster", "China Grove "),
339            (
340                "Learning To Fly - 2001 Digital Remaster",
341                "Learning To Fly ",
342            ),
343            (
344                "Your Possible Pasts - 2011 Remastered Version",
345                "Your Possible Pasts ",
346            ),
347            (
348                "Roll Over Beethoven (Live / Remastered)",
349                "Roll Over Beethoven ",
350            ),
351            ("Ticket To Ride - Live / Remastered", "Ticket To Ride "),
352            ("Mothership (Remastered)", "Mothership "),
353            ("How The West Was Won [Remastered]", "How The West Was Won "),
354            (
355                "A Well Respected Man (2014 Remastered Version)",
356                "A Well Respected Man ",
357            ),
358            (
359                "A Well Respected Man [2014 Remastered Version]",
360                "A Well Respected Man ",
361            ),
362            (
363                "She Was Hot (2009 Re-Mastered Digital Version)",
364                "She Was Hot ",
365            ),
366            (
367                "She Was Hot (2009 Remastered Digital Version)",
368                "She Was Hot ",
369            ),
370            (
371                "In The Court Of The Crimson King (Expanded & Remastered Original Album Mix)",
372                "In The Court Of The Crimson King ",
373            ),
374        ];
375
376        test_rules(&titles, &remastered_filter_rules());
377    }
378
379    #[test]
380    fn test_live_filter_rules() {
381        let titles = [
382            ("Song Title - Live", "Song Title "),
383            ("Song Title - Live at Location", "Song Title "),
384            ("Song Title (Live)", "Song Title "),
385            ("Song Title [Live]", "Song Title "),
386        ];
387
388        test_rules(&titles, &live_filter_rules());
389    }
390
391    #[test]
392    fn test_clean_explicit_filter_rules() {
393        let titles = [
394            ("Song Title (Explicit)", "Song Title"),
395            ("Song Title [Explicit]", "Song Title"),
396            ("Song Title (Clean)", "Song Title"),
397            ("Song Title [Clean]", "Song Title"),
398        ];
399
400        test_rules(&titles, &clean_explicit_filter_rules());
401    }
402
403    #[test]
404    fn test_feature_filter_rules() {
405        let titles = [
406            ("Song Title (Feat. Other Artist)", "Song Title"),
407            ("Song Title [Feat. Other Artist]", "Song Title"),
408        ];
409
410        test_rules(&titles, &feature_filter_rules());
411    }
412
413    #[test]
414    fn test_normalize_feature_filter_rules() {
415        let titles = [
416            ("Song Title (Feat. Other Artist)", "Song Title Feat. Other Artist"),
417            ("Song Title [Feat. Other Artist]", "Song Title Feat. Other Artist"),
418        ];
419
420        test_rules(&titles, &normalize_feature_filter_rules());
421    }
422
423    #[test]
424    fn test_version_filter_rules() {
425        let titles = [
426            (
427                "Love Will Come To You (Album Version)",
428                "Love Will Come To You ",
429            ),
430            ("I Melt With You (Rerecorded)", "I Melt With You "),
431            ("When I Need You [Re-Recorded]", "When I Need You "),
432            (
433                "Your Cheatin' Heart (Single Version)",
434                "Your Cheatin' Heart ",
435            ),
436            ("All Over Now (Edit)", "All Over Now "),
437            (
438                "(I Can't Get No) Satisfaction - Mono Version",
439                "(I Can't Get No) Satisfaction ",
440            ),
441            ("Ruby Tuesday - Stereo Version", "Ruby Tuesday "),
442            ("Pure McCartney (Deluxe Edition)", "Pure McCartney "),
443            ("6 Foot 7 Foot (Explicit Version)", "6 Foot 7 Foot "),
444        ];
445
446        test_rules(&titles, &version_filter_rules());
447    }
448
449    #[test]
450    fn test_suffix_filter_rules() {
451        let titles = [
452            ("Song Title - X Remix", "Song Title (X Remix)"),
453            ("Song Title - Y-Z Remix", "Song Title (Y-Z Remix)"),
454            ("Song Title - Y-Z Abc Remix", "Song Title (Y-Z Abc Remix)"),
455            ("Song Title - Abc Xyz Remix", "Song Title (Abc Xyz Remix)"),
456            ("Song Title - Remix", "Song Title (Remix)"),
457            ("Song Title - VIP", "Song Title (VIP)"),
458        ];
459
460        test_rules(&titles, &suffix_filter_rules());
461    }
462
463    #[test]
464    fn test_trim_whitespace_filter_rules() {
465        let titles = [
466            ("   Text   ", "Text"),
467            ("   Text", "Text"),
468            ("Text   ", "Text"),
469        ];
470
471        test_rules(&titles, &trim_whitespace_filter_rules());
472    }
473}