myrient_filter/
lib.rs

1use reqwest::Client;
2use scraper::{Html, Selector};
3use serde::{Deserialize, Serialize};
4use std::collections::BTreeMap;
5use url::Url;
6pub const NO_INTRO_DIR: &str = "No-Intro";
7pub const BASE_URL: &str = "https://myrient.erista.me/files/";
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct FilterOptions {
11    pub region_limit: bool,
12    pub region: String,
13    pub smart_filters: bool,
14    pub exclude_patterns: Vec<String>,
15    pub latest_revision: bool,
16}
17
18#[derive(Debug, Clone)]
19pub struct Rom {
20    pub filename: String,
21    pub url: String,
22}
23
24#[derive(Debug)]
25pub struct RomLister {
26    client: Client,
27    options: FilterOptions,
28}
29
30#[derive(Debug)]
31pub struct HttpDirectory {
32    pub name: String,
33}
34
35#[derive(Debug)]
36pub enum HttpListEntry {
37    Directory(HttpDirectory),
38    Rom(Rom),
39}
40
41impl HttpDirectory {
42    pub fn new(name: String) -> Self {
43        Self { name }
44    }
45}
46
47impl RomLister {
48    pub fn new(options: FilterOptions) -> Self {
49        Self {
50            client: Client::new(),
51            options,
52        }
53    }
54
55    /// List directories at the given path. If no path is provided, lists directories at the base URL
56    pub async fn list_directories(
57        &self,
58        subdir: Option<&str>,
59    ) -> Result<Vec<String>, Box<dyn std::error::Error>> {
60        let entries = self.list(subdir).await?;
61        Ok(entries
62            .into_iter()
63            .filter_map(|entry| match entry {
64                HttpListEntry::Directory(dir) => Some(dir.name),
65                _ => None,
66            })
67            .collect())
68    }
69
70    pub async fn list_rom_urls(
71        &self,
72        system: &str,
73        subdir: &str,
74    ) -> Result<Vec<String>, Box<dyn std::error::Error>> {
75        let path = format!("{}/{}", subdir, system.replace(" ", "%20"));
76        let entries = self.list(Some(&path)).await?;
77        Ok(entries
78            .into_iter()
79            .filter_map(|entry| match entry {
80                HttpListEntry::Rom(rom) => Some(rom.url),
81                _ => None,
82            })
83            .collect())
84    }
85
86    pub async fn list_roms(
87        &self,
88        system: &str,
89        subdir: &str,
90    ) -> Result<Vec<Rom>, Box<dyn std::error::Error>> {
91        let urls = self.list_rom_urls(system, subdir).await?;
92
93        if !self.options.latest_revision {
94            // If not filtering for latest revision, just return all ROMs
95            return Ok(urls
96                .into_iter()
97                .map(|url| {
98                    let url_obj = Url::parse(&url).unwrap();
99                    let path = url_obj.path();
100                    let decoded = urlencoding::decode(path).expect("UTF-8");
101                    let path = std::path::Path::new(decoded.as_ref());
102                    let filename = path.file_name().unwrap().to_string_lossy().to_string();
103                    Rom { filename, url }
104                })
105                .collect());
106        }
107
108        // Group ROMs by base name using BTreeMap for automatic sorting
109        let mut rom_groups: BTreeMap<String, Vec<Rom>> = BTreeMap::new();
110
111        for url in urls {
112            let url_obj = Url::parse(&url).unwrap();
113            let path = url_obj.path();
114            let decoded = urlencoding::decode(path).expect("UTF-8");
115            let path = std::path::Path::new(decoded.as_ref());
116            let filename = path.file_name().unwrap().to_string_lossy().to_string();
117
118            let (base_name, _revision) = Self::get_base_name_and_revision(&filename);
119            let rom = Rom { filename, url };
120
121            rom_groups.entry(base_name).or_default().push(rom);
122        }
123
124        // For each group, keep only the latest revision
125        let mut final_roms = Vec::new();
126        for roms in rom_groups.values() {
127            if roms.len() == 1 {
128                final_roms.push(roms[0].clone());
129            } else {
130                let latest = roms.iter().max_by_key(|rom| {
131                    let (_, revision) = Self::get_base_name_and_revision(&rom.filename);
132                    revision.unwrap_or(-1)
133                });
134                if let Some(rom) = latest {
135                    final_roms.push(rom.clone());
136                }
137            }
138        }
139
140        Ok(final_roms)
141    }
142
143    pub async fn list(
144        &self,
145        path: Option<&str>,
146    ) -> Result<Vec<HttpListEntry>, Box<dyn std::error::Error>> {
147        let url = match path {
148            Some(p) => format!("{}{}/", BASE_URL, p),
149            None => BASE_URL.to_string(),
150        };
151        println!("Fetching entries from: {}", url);
152
153        let response = self.client.get(&url).send().await?.text().await?;
154        let document = Html::parse_document(&response);
155        let selector = Selector::parse("tbody > tr > td.link > a").unwrap();
156
157        let entries: Vec<HttpListEntry> = document
158            .select(&selector)
159            .skip(1) // Skip parent directory link
160            .filter_map(|link| {
161                let href = link.value().attr("href")?;
162                let decoded = urlencoding::decode(href).ok()?;
163
164                if href.ends_with('/') {
165                    // Directory entry
166                    let name = decoded.trim_end_matches('/').to_string();
167                    Some(HttpListEntry::Directory(HttpDirectory::new(name)))
168                } else if self.is_valid_file(href) {
169                    // ROM entry
170                    let url = if !href.starts_with("http") {
171                        format!("{}{}", url, href)
172                    } else {
173                        href.to_string()
174                    };
175                    Some(HttpListEntry::Rom(Rom {
176                        filename: decoded.into_owned(),
177                        url,
178                    }))
179                } else {
180                    None
181                }
182            })
183            .collect();
184
185        Ok(entries)
186    }
187
188    fn is_valid_file(&self, href: &str) -> bool {
189        let file_name = urlencoding::decode(href.split('/').last().unwrap_or(""))
190            .unwrap_or_default()
191            .into_owned();
192
193        // Helper function to extract terms in parentheses
194        fn get_terms_in_parentheses(filename: &str) -> Vec<String> {
195            let mut terms = Vec::new();
196            let mut current_term = String::new();
197            let mut in_parentheses = false;
198
199            for c in filename.chars() {
200                match c {
201                    '(' => {
202                        in_parentheses = true;
203                        current_term.clear();
204                    }
205                    ')' => {
206                        if in_parentheses {
207                            terms.push(current_term.clone());
208                            in_parentheses = false;
209                        }
210                    }
211                    _ if in_parentheses => {
212                        current_term.push(c);
213                    }
214                    _ => {}
215                }
216            }
217            terms
218        }
219
220        // Get all terms in parentheses
221        let terms = get_terms_in_parentheses(&file_name);
222
223        // Check region first
224        if self.options.region_limit {
225            let regions = [&self.options.region, "World"];
226            if !terms.iter().any(|term| regions.contains(&term.as_str())) {
227                return false;
228            }
229        }
230
231        // Check excluded patterns
232        if terms.iter().any(|term| {
233            self.options
234                .exclude_patterns
235                .iter()
236                .any(|pattern| term.contains(pattern))
237        }) {
238            return false;
239        }
240
241        // Check smart filters last
242        if self.options.smart_filters {
243            let excluded_keywords = [
244                "Beta",
245                "Alpha",
246                "Proto",
247                "Virtual Console",
248                "Aftermarket",
249                "Unl",
250                "Sample",
251                "Promo",
252                "Demo",
253                "Kiosk",
254                // Exclude Arcade releases, some console games for some reason have an alternate Arcade ROM
255                // Such as the Addams Family (1992) for SNES
256                "Arcade",
257            ];
258            if terms
259                .iter()
260                .any(|term| excluded_keywords.contains(&term.as_str()))
261            {
262                return false;
263            }
264        }
265
266        true
267    }
268
269    fn get_base_name_and_revision(filename: &str) -> (String, Option<i32>) {
270        // Match everything up to the last sequence of metadata parentheses
271        // Uses negative lookahead to ensure we don't stop at parentheses that are part of the name
272        let re = regex::Regex::new(
273            r"^(.*?)(?:\s*\([^)]*(?:Rev\s*\d+|USA|Europe|World|Japan)[^)]*\))*(?:\s*\(Rev\s*(\d+)\))?(?:\s*\([^)]*\))*(?:\..*)?$"
274        ).unwrap();
275
276        if let Some(caps) = re.captures(filename) {
277            let base_name = caps.get(1).map_or("", |m| m.as_str()).trim().to_string();
278            let revision = caps
279                .get(2)
280                .and_then(|m| m.as_str().parse::<i32>().ok())
281                .or_else(|| {
282                    // Fallback: look for revision number in any parentheses
283                    let rev_re = regex::Regex::new(r"\(Rev\s*(\d+)\)").unwrap();
284                    rev_re
285                        .captures(filename)
286                        .and_then(|caps| caps.get(1))
287                        .and_then(|m| m.as_str().parse::<i32>().ok())
288                });
289            (base_name, revision)
290        } else {
291            // If the regex fails completely, return the whole filename
292            // This should rarely happen given the pattern
293            (filename.to_string(), None)
294        }
295    }
296}
297
298#[cfg(test)]
299mod tests {
300    use super::*;
301
302    #[test]
303    fn test_is_valid_file() {
304        let options = FilterOptions {
305            region_limit: true,
306            region: "Europe".to_string(),
307            smart_filters: true,
308            exclude_patterns: vec!["Beta".to_string(), "Rev B".to_string()],
309            latest_revision: true,
310        };
311
312        let rom_lister = RomLister::new(options);
313
314        assert!(rom_lister.is_valid_file("Super Game (Europe).zip"));
315        assert!(rom_lister.is_valid_file("Super Game (World).zip"));
316        assert!(!rom_lister.is_valid_file("Super Game (USA).zip"));
317        assert!(!rom_lister.is_valid_file("Super Game (Beta).zip"));
318        assert!(!rom_lister.is_valid_file("Super Game (Rev B).zip"));
319        assert!(rom_lister.is_valid_file("Beta Game (Europe).zip")); // Should pass as Beta is not in parentheses
320    }
321
322    #[test]
323    fn test_get_base_name_and_revision() {
324        let test_cases = vec![
325            ("Super Game (USA).zip", ("Super Game", None)),
326            ("Super Game (Rev 2) (USA).zip", ("Super Game", Some(2))),
327            ("Super Game (Rev 1).zip", ("Super Game", Some(1))),
328            ("Game (Rev 12) (USA).zip", ("Game", Some(12))),
329        ];
330
331        for (input, expected) in test_cases {
332            let (base, rev) = RomLister::get_base_name_and_revision(input);
333            assert_eq!((base.as_str(), rev), expected);
334        }
335    }
336
337    #[test]
338    fn test_get_base_name_and_revision_complex() {
339        let test_cases = vec![
340            ("Game (World) (Legacy Game Collection).zip", ("Game", None)),
341            (
342                "Game (Legacy Collection) (US) (Rev 1).zip",
343                ("Game", Some(1)),
344            ),
345            (
346                "Game (Rev 2) (Legacy Collection) (World).zip",
347                ("Game", Some(2)),
348            ),
349            (
350                "Game (World) (Rev 1) (Legacy Collection).zip",
351                ("Game", Some(1)),
352            ),
353            // Edge cases
354            (
355                "Game with (Parentheses) in Name (World) (Rev 3).zip",
356                ("Game with (Parentheses) in Name", Some(3)),
357            ),
358            (
359                "Game (Collection Edition) (Rev 1) (US) (Reprint).zip",
360                ("Game", Some(1)),
361            ),
362        ];
363
364        for (input, expected) in test_cases {
365            let (base, rev) = RomLister::get_base_name_and_revision(input);
366            assert_eq!(
367                (base.as_str(), rev),
368                expected,
369                "Failed for input: {}",
370                input
371            );
372        }
373    }
374
375    #[test]
376    fn test_is_valid_file_exclusions() {
377        let options = FilterOptions {
378            region_limit: true,
379            region: "USA".to_string(),
380            smart_filters: true,
381            exclude_patterns: vec!["Rental".to_string(), "Alt".to_string()],
382            latest_revision: true,
383        };
384
385        let rom_lister = RomLister::new(options);
386
387        // Region filtering
388        assert!(rom_lister.is_valid_file("Game (USA).zip"));
389        assert!(rom_lister.is_valid_file("Game (World).zip"));
390        assert!(!rom_lister.is_valid_file("Game (Europe).zip"));
391        assert!(!rom_lister.is_valid_file("Game (Japan).zip"));
392
393        // Smart filters
394        assert!(!rom_lister.is_valid_file("Game (USA) (Beta).zip"));
395        assert!(!rom_lister.is_valid_file("Game (USA) (Proto).zip"));
396        assert!(!rom_lister.is_valid_file("Game (USA) (Sample).zip"));
397        assert!(!rom_lister.is_valid_file("Game (USA) (Demo).zip"));
398        assert!(!rom_lister.is_valid_file("Game (USA) (Kiosk).zip"));
399        assert!(!rom_lister.is_valid_file("Game (USA) (Unl).zip"));
400
401        // Custom exclude patterns
402        assert!(!rom_lister.is_valid_file("Game (USA) (Rental Version).zip"));
403        assert!(!rom_lister.is_valid_file("Game (USA) (Alt Version).zip"));
404
405        // Complex combinations
406        assert!(!rom_lister.is_valid_file("Game (Beta) (USA) (Rev 1).zip")); // Smart filter should catch this
407        assert!(!rom_lister.is_valid_file("Game (Rental) (World) (Rev 2).zip")); // Custom pattern should catch this
408        assert!(!rom_lister.is_valid_file("Game (Europe) (Rev 1) (Demo).zip")); // Region and smart filter both invalid
409
410        // These should pass
411        assert!(rom_lister.is_valid_file("Game (Rev 2) (USA).zip"));
412        assert!(rom_lister.is_valid_file("Game with Beta in Title (USA).zip")); // Beta not in parentheses
413        assert!(rom_lister.is_valid_file("Alternative Game (USA).zip")); // Alt not in parentheses
414        assert!(rom_lister.is_valid_file("Game (World) (Rev 1).zip"));
415    }
416}