1use std::collections::{BTreeMap, HashSet};
8use std::path::Path;
9
10use serde::Deserialize;
11
12use crate::error::{Error, Result};
13use crate::site::{Engine, Site};
14
15const EMBEDDED_REGISTRY: &str = include_str!("../data/sites.json");
16
17const EMBEDDED_WMN_REGISTRY: &str = include_str!("../data/sites_wmn.json");
24
25#[derive(Debug, Clone, Deserialize)]
34pub struct Registry {
35 #[serde(default)]
36 engines: BTreeMap<String, Engine>,
37 sites: Vec<Site>,
38}
39
40impl Registry {
41 pub fn default_embedded() -> Result<Self> {
43 Self::from_json_str(EMBEDDED_REGISTRY)
44 }
45
46 pub fn default_embedded_with_wmn() -> Result<Self> {
61 let mut base = Self::default_embedded()?;
62 let wmn: Self = serde_json::from_str(EMBEDDED_WMN_REGISTRY)?;
63 let existing: HashSet<String> = base.sites.iter().map(|s| s.name.to_lowercase()).collect();
64 for (name, engine) in wmn.engines {
65 base.engines.entry(name).or_insert(engine);
66 }
67 for site in wmn.sites {
68 if !existing.contains(&site.name.to_lowercase()) {
69 base.sites.push(site);
70 }
71 }
72 base.resolve_engines()?;
73 base.validate()?;
74 Ok(base)
75 }
76
77 pub fn from_json_str(json: &str) -> Result<Self> {
82 let mut registry: Self = serde_json::from_str(json)?;
83 registry.resolve_engines()?;
84 registry.validate()?;
85 Ok(registry)
86 }
87
88 pub fn engines(&self) -> &BTreeMap<String, Engine> {
93 &self.engines
94 }
95
96 fn resolve_engines(&mut self) -> Result<()> {
105 for (name, engine) in &self.engines {
106 engine.validate(name)?;
107 }
108 for site in &mut self.sites {
109 let Some(name) = &site.engine else {
110 continue;
111 };
112 let Some(engine) = self.engines.get(name) else {
113 return Err(Error::InvalidSite {
114 reason: format!(
115 "site {:?}: references engine {name:?} which is not defined",
116 site.name
117 ),
118 });
119 };
120 engine.merge_into(site);
121 }
122 Ok(())
123 }
124
125 pub fn load_from_path(path: impl AsRef<Path>) -> Result<Self> {
127 let bytes = std::fs::read(path)?;
128 let json = std::str::from_utf8(&bytes).map_err(|e| Error::InvalidSite {
129 reason: format!("registry file is not valid UTF-8: {e}"),
130 })?;
131 Self::from_json_str(json)
132 }
133
134 pub fn sites(&self) -> &[Site] {
136 &self.sites
137 }
138
139 pub fn len(&self) -> usize {
141 self.sites.len()
142 }
143
144 pub fn is_empty(&self) -> bool {
147 self.sites.is_empty()
148 }
149
150 pub fn filter(
169 &self,
170 include: &[String],
171 exclude: &[String],
172 tags: &[String],
173 exclude_tags: &[String],
174 include_nsfw: bool,
175 ) -> Vec<Site> {
176 let include: Vec<String> = include.iter().map(|s| s.to_lowercase()).collect();
177 let exclude: Vec<String> = exclude.iter().map(|s| s.to_lowercase()).collect();
178 let want_tags: Vec<String> = tags.iter().map(|s| s.to_lowercase()).collect();
179 let mut drop_tags: Vec<String> = exclude_tags.iter().map(|s| s.to_lowercase()).collect();
180
181 let nsfw_tag = "nsfw".to_owned();
184 let asking_for_nsfw = want_tags.contains(&nsfw_tag);
185 if !include_nsfw && !asking_for_nsfw && !drop_tags.contains(&nsfw_tag) {
186 drop_tags.push(nsfw_tag);
187 }
188
189 self.sites
190 .iter()
191 .filter(|site| {
192 if site.disabled {
197 return false;
198 }
199 let name = site.name.to_lowercase();
200 let included = include.is_empty() || include.iter().any(|i| name.contains(i));
201 let excluded = exclude.iter().any(|x| name.contains(x));
202 let lower_tags: Vec<String> = site.tags.iter().map(|t| t.to_lowercase()).collect();
203 let tagged =
204 want_tags.is_empty() || lower_tags.iter().any(|t| want_tags.contains(t));
205 let tag_excluded = lower_tags.iter().any(|t| drop_tags.contains(t));
206 included && !excluded && tagged && !tag_excluded
207 })
208 .cloned()
209 .collect()
210 }
211
212 pub fn tag_counts(&self) -> Vec<(String, usize)> {
215 let mut counts: std::collections::BTreeMap<String, usize> =
216 std::collections::BTreeMap::new();
217 for site in &self.sites {
218 for tag in &site.tags {
219 *counts.entry(tag.clone()).or_insert(0) += 1;
220 }
221 }
222 counts.into_iter().collect()
223 }
224
225 fn validate(&self) -> Result<()> {
226 if self.sites.is_empty() {
227 return Err(Error::InvalidSite {
228 reason: "registry has no sites".into(),
229 });
230 }
231 for site in &self.sites {
232 site.validate()?;
233 }
234 let mut seen: HashSet<String> = HashSet::new();
235 for site in &self.sites {
236 let key = site.name.to_lowercase();
237 if !seen.insert(key) {
238 return Err(Error::InvalidSite {
239 reason: format!("duplicate site name: {:?}", site.name),
240 });
241 }
242 }
243 Ok(())
244 }
245}
246
247#[cfg(test)]
248mod tests {
249 use super::*;
250
251 #[test]
252 fn embedded_registry_loads_and_validates() {
253 let registry = Registry::default_embedded().expect("embedded registry must load");
254 assert!(
257 registry.len() >= 100,
258 "imported registry should have ≥100 sites, got {}",
259 registry.len()
260 );
261 let names: Vec<&str> = registry.sites().iter().map(|s| s.name.as_str()).collect();
267 assert!(names.contains(&"GitHub"));
268 assert!(names.contains(&"Reddit"));
269 assert!(names.contains(&"Telegram"));
270 }
271
272 #[test]
273 fn wmn_embedded_registry_loads_and_supersets_default() {
274 let base = Registry::default_embedded().unwrap();
275 let merged = Registry::default_embedded_with_wmn().expect("WMN-merged registry must load");
276 assert!(
277 merged.len() > base.len(),
278 "WMN merge must add sites: base={} merged={}",
279 base.len(),
280 merged.len()
281 );
282 let merged_names: HashSet<String> = merged
285 .sites()
286 .iter()
287 .map(|s| s.name.to_lowercase())
288 .collect();
289 for s in base.sites() {
290 assert!(
291 merged_names.contains(&s.name.to_lowercase()),
292 "merge dropped base-tranche site {:?}",
293 s.name
294 );
295 }
296 let has_wmn_tag = merged
298 .sites()
299 .iter()
300 .any(|s| s.tags.iter().any(|t| t == "source:wmn"));
301 assert!(has_wmn_tag, "no site carries the source:wmn tag");
302 }
303
304 #[test]
305 fn rejects_empty_registry() {
306 let err = Registry::from_json_str(r#"{ "sites": [] }"#).unwrap_err();
307 assert!(matches!(err, Error::InvalidSite { .. }));
308 }
309
310 #[test]
311 fn rejects_duplicate_site_names() {
312 let json = r#"{
313 "sites": [
314 { "name": "GitHub", "url": "https://github.com/{username}",
315 "signals": [{ "kind": "status_found", "codes": [200] }] },
316 { "name": "github", "url": "https://github.com/{username}",
317 "signals": [{ "kind": "status_found", "codes": [200] }] }
318 ]
319 }"#;
320 let err = Registry::from_json_str(json).unwrap_err();
321 assert!(matches!(err, Error::InvalidSite { .. }));
322 assert!(err.to_string().contains("duplicate"));
323 }
324
325 #[test]
326 fn rejects_invalid_site_definition() {
327 let json = r#"{
329 "sites": [
330 { "name": "Bad", "url": "https://example.com/",
331 "signals": [{ "kind": "status_found", "codes": [200] }] }
332 ]
333 }"#;
334 assert!(Registry::from_json_str(json).is_err());
335 }
336
337 #[test]
338 fn rejects_malformed_json() {
339 let err = Registry::from_json_str("{").unwrap_err();
340 assert!(matches!(err, Error::Json(_)));
341 }
342
343 #[test]
344 fn filter_include_is_case_insensitive_substring() {
345 let registry = Registry::default_embedded().unwrap();
346 let only_github = registry.filter(&["github".into()], &[], &[], &[], false);
347 assert_eq!(only_github.len(), 1);
348 assert_eq!(only_github[0].name, "GitHub");
349
350 let many = registry.filter(&["e".into()], &[], &[], &[], false); assert!(many.len() > 1);
352 }
353
354 #[test]
355 fn filter_exclude_drops_matches() {
356 let registry = Registry::default_embedded().unwrap();
357 let without_github = registry.filter(&[], &["github".into()], &[], &[], true);
360 assert!(without_github.iter().all(|s| s.name != "GitHub"));
361 assert_eq!(without_github.len(), registry.len() - 1);
362 }
363
364 #[test]
365 fn filter_include_and_exclude_compose() {
366 let registry = Registry::default_embedded().unwrap();
367 let filtered = registry.filter(&["git".into()], &["lab".into()], &[], &[], false);
369 let names: Vec<&str> = filtered.iter().map(|s| s.name.as_str()).collect();
370 assert!(names.contains(&"GitHub"));
371 assert!(!names.contains(&"GitLab"));
372 }
374
375 #[test]
376 fn filter_with_no_matches_returns_empty() {
377 let registry = Registry::default_embedded().unwrap();
378 let filtered = registry.filter(&["does-not-exist-xyz".into()], &[], &[], &[], false);
379 assert!(filtered.is_empty());
380 }
381
382 #[test]
383 fn disabled_sites_are_skipped_by_filter() {
384 let json = r#"{
385 "sites": [
386 { "name": "Alive", "url": "https://alive.example/{username}",
387 "signals": [{ "kind": "status_found", "codes": [200] }] },
388 { "name": "Parked", "url": "https://parked.example/{username}",
389 "signals": [{ "kind": "status_found", "codes": [200] }],
390 "disabled": true }
391 ]
392 }"#;
393 let registry = Registry::from_json_str(json).unwrap();
394 assert_eq!(registry.sites().len(), 2);
398 let scanned = registry.filter(&[], &[], &[], &[], false);
399 let names: Vec<&str> = scanned.iter().map(|s| s.name.as_str()).collect();
400 assert_eq!(names, vec!["Alive"]);
401 }
402
403 #[test]
404 fn source_field_round_trips() {
405 let json = r#"{
406 "sites": [
407 { "name": "Nitter", "url": "https://nitter.example/{username}",
408 "signals": [{ "kind": "status_found", "codes": [200] }],
409 "source": "Twitter" }
410 ]
411 }"#;
412 let registry = Registry::from_json_str(json).unwrap();
413 assert_eq!(registry.sites()[0].source.as_deref(), Some("Twitter"));
414 }
415
416 fn tagged_registry() -> Registry {
417 let json = r#"{
418 "sites": [
419 { "name": "Soc", "url": "https://soc.example/{username}",
420 "signals": [{ "kind": "status_found", "codes": [200] }],
421 "tags": ["social", "region:ru"] },
422 { "name": "Dev", "url": "https://dev.example/{username}",
423 "signals": [{ "kind": "status_found", "codes": [200] }],
424 "tags": ["dev"] },
425 { "name": "Plain", "url": "https://plain.example/{username}",
426 "signals": [{ "kind": "status_found", "codes": [200] }] }
427 ]
428 }"#;
429 Registry::from_json_str(json).unwrap()
430 }
431
432 #[test]
433 fn tag_filter_keeps_only_matching_tags_and_drops_untagged() {
434 let r = tagged_registry();
435 let social = r.filter(&[], &[], &["social".into()], &[], false);
436 let names: Vec<&str> = social.iter().map(|s| s.name.as_str()).collect();
437 assert_eq!(names, ["Soc"], "tag filter should keep only tagged matches");
438 }
439
440 #[test]
441 fn tag_filter_is_or_within_requested_tags_and_case_insensitive() {
442 let r = tagged_registry();
443 let either = r.filter(&[], &[], &["DEV".into(), "social".into()], &[], false);
444 let names: Vec<&str> = either.iter().map(|s| s.name.as_str()).collect();
445 assert_eq!(names, ["Soc", "Dev"]);
446 }
447
448 #[test]
449 fn no_tag_filter_includes_untagged_sites() {
450 let r = tagged_registry();
451 assert_eq!(r.filter(&[], &[], &[], &[], false).len(), 3);
452 }
453
454 #[test]
455 fn exclude_tag_drops_matching_sites() {
456 let r = tagged_registry();
457 let kept = r.filter(&[], &[], &[], &["social".into()], false);
458 let names: Vec<&str> = kept.iter().map(|s| s.name.as_str()).collect();
459 assert_eq!(names, ["Dev", "Plain"], "{names:?}");
461 }
462
463 fn nsfw_registry() -> Registry {
464 let json = r#"{
465 "sites": [
466 { "name": "Family", "url": "https://family.example/{username}",
467 "signals": [{ "kind": "status_found", "codes": [200] }],
468 "tags": ["social"] },
469 { "name": "Adult", "url": "https://adult.example/{username}",
470 "signals": [{ "kind": "status_found", "codes": [200] }],
471 "tags": ["nsfw"] }
472 ]
473 }"#;
474 Registry::from_json_str(json).unwrap()
475 }
476
477 #[test]
478 fn nsfw_sites_excluded_by_default() {
479 let r = nsfw_registry();
480 let kept = r.filter(&[], &[], &[], &[], false);
481 let names: Vec<&str> = kept.iter().map(|s| s.name.as_str()).collect();
482 assert_eq!(names, ["Family"], "nsfw site must be excluded by default");
483 }
484
485 #[test]
486 fn nsfw_sites_included_when_flag_set() {
487 let r = nsfw_registry();
488 let kept = r.filter(&[], &[], &[], &[], true);
489 assert_eq!(kept.len(), 2, "both sites present with include_nsfw=true");
490 }
491
492 #[test]
493 fn nsfw_sites_included_when_tag_asked_for_explicitly() {
494 let r = nsfw_registry();
497 let kept = r.filter(&[], &[], &["nsfw".into()], &[], false);
498 let names: Vec<&str> = kept.iter().map(|s| s.name.as_str()).collect();
499 assert_eq!(names, ["Adult"]);
500 }
501
502 #[test]
503 fn tag_counts_are_sorted_with_per_tag_totals() {
504 let r = tagged_registry();
505 assert_eq!(
506 r.tag_counts(),
507 vec![
508 ("dev".to_owned(), 1),
509 ("region:ru".to_owned(), 1),
510 ("social".to_owned(), 1),
511 ]
512 );
513 }
514
515 #[test]
516 fn engine_inheritance_fills_empty_site_signals() {
517 let json = r#"{
519 "engines": {
520 "Discourse": {
521 "signals": [
522 { "kind": "status_found", "codes": [200] },
523 { "kind": "body_absent", "text": "Oops! That page doesn't exist" }
524 ]
525 }
526 },
527 "sites": [
528 { "name": "Mozilla Forum", "url": "https://discourse.mozilla.org/u/{username}",
529 "engine": "Discourse" }
530 ]
531 }"#;
532 let r = Registry::from_json_str(json).unwrap();
533 let site = &r.sites()[0];
534 assert_eq!(site.signals.len(), 2);
535 assert_eq!(site.engine.as_deref(), Some("Discourse"));
536 assert!(r.engines().contains_key("Discourse"));
538 }
539
540 #[test]
541 fn site_overrides_engine_signals_on_conflict() {
542 let json = r#"{
544 "engines": {
545 "Discourse": {
546 "signals": [{ "kind": "status_found", "codes": [200] }]
547 }
548 },
549 "sites": [
550 { "name": "Custom", "url": "https://example.com/{username}",
551 "engine": "Discourse",
552 "signals": [
553 { "kind": "status_found", "codes": [200] },
554 { "kind": "status_not_found", "codes": [404] }
555 ] }
556 ]
557 }"#;
558 let r = Registry::from_json_str(json).unwrap();
559 assert_eq!(r.sites()[0].signals.len(), 2);
561 }
562
563 #[test]
564 fn engine_headers_merge_with_site_headers_per_key() {
565 let json = r#"{
568 "engines": {
569 "Foo": {
570 "signals": [{ "kind": "status_found", "codes": [200] }],
571 "request_headers": {
572 "X-Engine": "engine-value",
573 "User-Agent": "engine-ua"
574 }
575 }
576 },
577 "sites": [
578 { "name": "S", "url": "https://example.com/{username}",
579 "engine": "Foo",
580 "request_headers": { "User-Agent": "site-ua" } }
581 ]
582 }"#;
583 let r = Registry::from_json_str(json).unwrap();
584 let h = &r.sites()[0].request_headers;
585 assert_eq!(h.get("X-Engine").map(String::as_str), Some("engine-value"));
586 assert_eq!(h.get("User-Agent").map(String::as_str), Some("site-ua"));
587 }
588
589 #[test]
590 fn missing_engine_reference_fails_load() {
591 let json = r#"{
592 "engines": {},
593 "sites": [
594 { "name": "Mock", "url": "https://example.com/{username}",
595 "engine": "DoesNotExist" }
596 ]
597 }"#;
598 let err = Registry::from_json_str(json).unwrap_err();
599 assert!(
600 err.to_string()
601 .contains("references engine \"DoesNotExist\""),
602 "expected missing-engine error, got: {err}"
603 );
604 }
605
606 #[test]
607 fn engine_regex_check_inherited_when_site_has_none() {
608 let json = r#"{
609 "engines": {
610 "Bounded": {
611 "signals": [{ "kind": "status_found", "codes": [200] }],
612 "regex_check": "^[a-z]{3,16}$"
613 }
614 },
615 "sites": [
616 { "name": "S", "url": "https://example.com/{username}",
617 "engine": "Bounded" }
618 ]
619 }"#;
620 let r = Registry::from_json_str(json).unwrap();
621 assert_eq!(r.sites()[0].regex_check.as_deref(), Some("^[a-z]{3,16}$"));
622 }
623
624 #[test]
625 fn load_from_path_round_trips_via_tempfile() {
626 let mut path = std::env::temp_dir();
627 path.push(format!("adler-test-registry-{}.json", std::process::id()));
628 std::fs::write(
629 &path,
630 r#"{
631 "sites": [
632 { "name": "Mock", "url": "https://example.com/{username}",
633 "signals": [{ "kind": "status_found", "codes": [200] }] }
634 ]
635 }"#,
636 )
637 .unwrap();
638 let result = Registry::load_from_path(&path);
639 let _ = std::fs::remove_file(&path);
640 let registry = result.unwrap();
641 assert_eq!(registry.len(), 1);
642 assert_eq!(registry.sites()[0].name, "Mock");
643 }
644}