1use std::collections::{BTreeMap, HashSet};
8use std::path::Path;
9
10use serde::Deserialize;
11
12use crate::error::{Error, Result};
13use crate::site::{Engine, Site};
14
15const EMBEDDED_REGISTRY: &str = include_str!("../data/sites.json");
16
17const EMBEDDED_WMN_REGISTRY: &str = include_str!("../data/sites_wmn.json");
24
25#[derive(Debug, Clone, Deserialize)]
34pub struct Registry {
35 #[serde(default)]
36 engines: BTreeMap<String, Engine>,
37 sites: Vec<Site>,
38}
39
40impl Registry {
41 pub fn default_embedded() -> Result<Self> {
43 Self::from_json_str(EMBEDDED_REGISTRY)
44 }
45
46 pub fn default_embedded_with_wmn() -> Result<Self> {
61 let mut base = Self::default_embedded()?;
62 let wmn: Self = serde_json::from_str(EMBEDDED_WMN_REGISTRY)?;
63 let existing: HashSet<String> = base.sites.iter().map(|s| s.name.to_lowercase()).collect();
64 for (name, engine) in wmn.engines {
65 base.engines.entry(name).or_insert(engine);
66 }
67 for site in wmn.sites {
68 if !existing.contains(&site.name.to_lowercase()) {
69 base.sites.push(site);
70 }
71 }
72 base.resolve_engines()?;
73 base.validate()?;
74 Ok(base)
75 }
76
77 pub fn from_json_str(json: &str) -> Result<Self> {
82 let mut registry: Self = serde_json::from_str(json)?;
83 registry.resolve_engines()?;
84 registry.validate()?;
85 Ok(registry)
86 }
87
88 pub fn engines(&self) -> &BTreeMap<String, Engine> {
93 &self.engines
94 }
95
96 fn resolve_engines(&mut self) -> Result<()> {
105 for (name, engine) in &self.engines {
106 engine.validate(name)?;
107 }
108 for site in &mut self.sites {
109 let Some(name) = &site.engine else {
110 continue;
111 };
112 let Some(engine) = self.engines.get(name) else {
113 return Err(Error::InvalidSite {
114 reason: format!(
115 "site {:?}: references engine {name:?} which is not defined",
116 site.name
117 ),
118 });
119 };
120 engine.merge_into(site);
121 }
122 Ok(())
123 }
124
125 pub fn load_from_path(path: impl AsRef<Path>) -> Result<Self> {
127 let bytes = std::fs::read(path)?;
128 let json = std::str::from_utf8(&bytes).map_err(|e| Error::InvalidSite {
129 reason: format!("registry file is not valid UTF-8: {e}"),
130 })?;
131 Self::from_json_str(json)
132 }
133
134 pub fn sites(&self) -> &[Site] {
136 &self.sites
137 }
138
139 pub fn len(&self) -> usize {
141 self.sites.len()
142 }
143
144 pub fn is_empty(&self) -> bool {
147 self.sites.is_empty()
148 }
149
150 pub fn filter(
169 &self,
170 include: &[String],
171 exclude: &[String],
172 tags: &[String],
173 exclude_tags: &[String],
174 include_nsfw: bool,
175 ) -> Vec<Site> {
176 let include: Vec<String> = include.iter().map(|s| s.to_lowercase()).collect();
177 let exclude: Vec<String> = exclude.iter().map(|s| s.to_lowercase()).collect();
178 let want_tags: Vec<String> = tags.iter().map(|s| s.to_lowercase()).collect();
179 let mut drop_tags: Vec<String> = exclude_tags.iter().map(|s| s.to_lowercase()).collect();
180
181 let nsfw_tag = "nsfw".to_owned();
184 let asking_for_nsfw = want_tags.contains(&nsfw_tag);
185 if !include_nsfw && !asking_for_nsfw && !drop_tags.contains(&nsfw_tag) {
186 drop_tags.push(nsfw_tag);
187 }
188
189 self.sites
190 .iter()
191 .filter(|site| {
192 let name = site.name.to_lowercase();
193 let included = include.is_empty() || include.iter().any(|i| name.contains(i));
194 let excluded = exclude.iter().any(|x| name.contains(x));
195 let lower_tags: Vec<String> = site.tags.iter().map(|t| t.to_lowercase()).collect();
196 let tagged =
197 want_tags.is_empty() || lower_tags.iter().any(|t| want_tags.contains(t));
198 let tag_excluded = lower_tags.iter().any(|t| drop_tags.contains(t));
199 included && !excluded && tagged && !tag_excluded
200 })
201 .cloned()
202 .collect()
203 }
204
205 pub fn tag_counts(&self) -> Vec<(String, usize)> {
208 let mut counts: std::collections::BTreeMap<String, usize> =
209 std::collections::BTreeMap::new();
210 for site in &self.sites {
211 for tag in &site.tags {
212 *counts.entry(tag.clone()).or_insert(0) += 1;
213 }
214 }
215 counts.into_iter().collect()
216 }
217
218 fn validate(&self) -> Result<()> {
219 if self.sites.is_empty() {
220 return Err(Error::InvalidSite {
221 reason: "registry has no sites".into(),
222 });
223 }
224 for site in &self.sites {
225 site.validate()?;
226 }
227 let mut seen: HashSet<String> = HashSet::new();
228 for site in &self.sites {
229 let key = site.name.to_lowercase();
230 if !seen.insert(key) {
231 return Err(Error::InvalidSite {
232 reason: format!("duplicate site name: {:?}", site.name),
233 });
234 }
235 }
236 Ok(())
237 }
238}
239
240#[cfg(test)]
241mod tests {
242 use super::*;
243
244 #[test]
245 fn embedded_registry_loads_and_validates() {
246 let registry = Registry::default_embedded().expect("embedded registry must load");
247 assert!(
250 registry.len() >= 100,
251 "imported registry should have ≥100 sites, got {}",
252 registry.len()
253 );
254 let names: Vec<&str> = registry.sites().iter().map(|s| s.name.as_str()).collect();
260 assert!(names.contains(&"GitHub"));
261 assert!(names.contains(&"Reddit"));
262 assert!(names.contains(&"Telegram"));
263 }
264
265 #[test]
266 fn wmn_embedded_registry_loads_and_supersets_default() {
267 let base = Registry::default_embedded().unwrap();
268 let merged = Registry::default_embedded_with_wmn().expect("WMN-merged registry must load");
269 assert!(
270 merged.len() > base.len(),
271 "WMN merge must add sites: base={} merged={}",
272 base.len(),
273 merged.len()
274 );
275 let merged_names: HashSet<String> = merged
278 .sites()
279 .iter()
280 .map(|s| s.name.to_lowercase())
281 .collect();
282 for s in base.sites() {
283 assert!(
284 merged_names.contains(&s.name.to_lowercase()),
285 "merge dropped base-tranche site {:?}",
286 s.name
287 );
288 }
289 let has_wmn_tag = merged
291 .sites()
292 .iter()
293 .any(|s| s.tags.iter().any(|t| t == "source:wmn"));
294 assert!(has_wmn_tag, "no site carries the source:wmn tag");
295 }
296
297 #[test]
298 fn rejects_empty_registry() {
299 let err = Registry::from_json_str(r#"{ "sites": [] }"#).unwrap_err();
300 assert!(matches!(err, Error::InvalidSite { .. }));
301 }
302
303 #[test]
304 fn rejects_duplicate_site_names() {
305 let json = r#"{
306 "sites": [
307 { "name": "GitHub", "url": "https://github.com/{username}",
308 "signals": [{ "kind": "status_found", "codes": [200] }] },
309 { "name": "github", "url": "https://github.com/{username}",
310 "signals": [{ "kind": "status_found", "codes": [200] }] }
311 ]
312 }"#;
313 let err = Registry::from_json_str(json).unwrap_err();
314 assert!(matches!(err, Error::InvalidSite { .. }));
315 assert!(err.to_string().contains("duplicate"));
316 }
317
318 #[test]
319 fn rejects_invalid_site_definition() {
320 let json = r#"{
322 "sites": [
323 { "name": "Bad", "url": "https://example.com/",
324 "signals": [{ "kind": "status_found", "codes": [200] }] }
325 ]
326 }"#;
327 assert!(Registry::from_json_str(json).is_err());
328 }
329
330 #[test]
331 fn rejects_malformed_json() {
332 let err = Registry::from_json_str("{").unwrap_err();
333 assert!(matches!(err, Error::Json(_)));
334 }
335
336 #[test]
337 fn filter_include_is_case_insensitive_substring() {
338 let registry = Registry::default_embedded().unwrap();
339 let only_github = registry.filter(&["github".into()], &[], &[], &[], false);
340 assert_eq!(only_github.len(), 1);
341 assert_eq!(only_github[0].name, "GitHub");
342
343 let many = registry.filter(&["e".into()], &[], &[], &[], false); assert!(many.len() > 1);
345 }
346
347 #[test]
348 fn filter_exclude_drops_matches() {
349 let registry = Registry::default_embedded().unwrap();
350 let without_github = registry.filter(&[], &["github".into()], &[], &[], true);
353 assert!(without_github.iter().all(|s| s.name != "GitHub"));
354 assert_eq!(without_github.len(), registry.len() - 1);
355 }
356
357 #[test]
358 fn filter_include_and_exclude_compose() {
359 let registry = Registry::default_embedded().unwrap();
360 let filtered = registry.filter(&["git".into()], &["lab".into()], &[], &[], false);
362 let names: Vec<&str> = filtered.iter().map(|s| s.name.as_str()).collect();
363 assert!(names.contains(&"GitHub"));
364 assert!(!names.contains(&"GitLab"));
365 }
367
368 #[test]
369 fn filter_with_no_matches_returns_empty() {
370 let registry = Registry::default_embedded().unwrap();
371 let filtered = registry.filter(&["does-not-exist-xyz".into()], &[], &[], &[], false);
372 assert!(filtered.is_empty());
373 }
374
375 fn tagged_registry() -> Registry {
376 let json = r#"{
377 "sites": [
378 { "name": "Soc", "url": "https://soc.example/{username}",
379 "signals": [{ "kind": "status_found", "codes": [200] }],
380 "tags": ["social", "region:ru"] },
381 { "name": "Dev", "url": "https://dev.example/{username}",
382 "signals": [{ "kind": "status_found", "codes": [200] }],
383 "tags": ["dev"] },
384 { "name": "Plain", "url": "https://plain.example/{username}",
385 "signals": [{ "kind": "status_found", "codes": [200] }] }
386 ]
387 }"#;
388 Registry::from_json_str(json).unwrap()
389 }
390
391 #[test]
392 fn tag_filter_keeps_only_matching_tags_and_drops_untagged() {
393 let r = tagged_registry();
394 let social = r.filter(&[], &[], &["social".into()], &[], false);
395 let names: Vec<&str> = social.iter().map(|s| s.name.as_str()).collect();
396 assert_eq!(names, ["Soc"], "tag filter should keep only tagged matches");
397 }
398
399 #[test]
400 fn tag_filter_is_or_within_requested_tags_and_case_insensitive() {
401 let r = tagged_registry();
402 let either = r.filter(&[], &[], &["DEV".into(), "social".into()], &[], false);
403 let names: Vec<&str> = either.iter().map(|s| s.name.as_str()).collect();
404 assert_eq!(names, ["Soc", "Dev"]);
405 }
406
407 #[test]
408 fn no_tag_filter_includes_untagged_sites() {
409 let r = tagged_registry();
410 assert_eq!(r.filter(&[], &[], &[], &[], false).len(), 3);
411 }
412
413 #[test]
414 fn exclude_tag_drops_matching_sites() {
415 let r = tagged_registry();
416 let kept = r.filter(&[], &[], &[], &["social".into()], false);
417 let names: Vec<&str> = kept.iter().map(|s| s.name.as_str()).collect();
418 assert_eq!(names, ["Dev", "Plain"], "{names:?}");
420 }
421
422 fn nsfw_registry() -> Registry {
423 let json = r#"{
424 "sites": [
425 { "name": "Family", "url": "https://family.example/{username}",
426 "signals": [{ "kind": "status_found", "codes": [200] }],
427 "tags": ["social"] },
428 { "name": "Adult", "url": "https://adult.example/{username}",
429 "signals": [{ "kind": "status_found", "codes": [200] }],
430 "tags": ["nsfw"] }
431 ]
432 }"#;
433 Registry::from_json_str(json).unwrap()
434 }
435
436 #[test]
437 fn nsfw_sites_excluded_by_default() {
438 let r = nsfw_registry();
439 let kept = r.filter(&[], &[], &[], &[], false);
440 let names: Vec<&str> = kept.iter().map(|s| s.name.as_str()).collect();
441 assert_eq!(names, ["Family"], "nsfw site must be excluded by default");
442 }
443
444 #[test]
445 fn nsfw_sites_included_when_flag_set() {
446 let r = nsfw_registry();
447 let kept = r.filter(&[], &[], &[], &[], true);
448 assert_eq!(kept.len(), 2, "both sites present with include_nsfw=true");
449 }
450
451 #[test]
452 fn nsfw_sites_included_when_tag_asked_for_explicitly() {
453 let r = nsfw_registry();
456 let kept = r.filter(&[], &[], &["nsfw".into()], &[], false);
457 let names: Vec<&str> = kept.iter().map(|s| s.name.as_str()).collect();
458 assert_eq!(names, ["Adult"]);
459 }
460
461 #[test]
462 fn tag_counts_are_sorted_with_per_tag_totals() {
463 let r = tagged_registry();
464 assert_eq!(
465 r.tag_counts(),
466 vec![
467 ("dev".to_owned(), 1),
468 ("region:ru".to_owned(), 1),
469 ("social".to_owned(), 1),
470 ]
471 );
472 }
473
474 #[test]
475 fn engine_inheritance_fills_empty_site_signals() {
476 let json = r#"{
478 "engines": {
479 "Discourse": {
480 "signals": [
481 { "kind": "status_found", "codes": [200] },
482 { "kind": "body_absent", "text": "Oops! That page doesn't exist" }
483 ]
484 }
485 },
486 "sites": [
487 { "name": "Mozilla Forum", "url": "https://discourse.mozilla.org/u/{username}",
488 "engine": "Discourse" }
489 ]
490 }"#;
491 let r = Registry::from_json_str(json).unwrap();
492 let site = &r.sites()[0];
493 assert_eq!(site.signals.len(), 2);
494 assert_eq!(site.engine.as_deref(), Some("Discourse"));
495 assert!(r.engines().contains_key("Discourse"));
497 }
498
499 #[test]
500 fn site_overrides_engine_signals_on_conflict() {
501 let json = r#"{
503 "engines": {
504 "Discourse": {
505 "signals": [{ "kind": "status_found", "codes": [200] }]
506 }
507 },
508 "sites": [
509 { "name": "Custom", "url": "https://example.com/{username}",
510 "engine": "Discourse",
511 "signals": [
512 { "kind": "status_found", "codes": [200] },
513 { "kind": "status_not_found", "codes": [404] }
514 ] }
515 ]
516 }"#;
517 let r = Registry::from_json_str(json).unwrap();
518 assert_eq!(r.sites()[0].signals.len(), 2);
520 }
521
522 #[test]
523 fn engine_headers_merge_with_site_headers_per_key() {
524 let json = r#"{
527 "engines": {
528 "Foo": {
529 "signals": [{ "kind": "status_found", "codes": [200] }],
530 "request_headers": {
531 "X-Engine": "engine-value",
532 "User-Agent": "engine-ua"
533 }
534 }
535 },
536 "sites": [
537 { "name": "S", "url": "https://example.com/{username}",
538 "engine": "Foo",
539 "request_headers": { "User-Agent": "site-ua" } }
540 ]
541 }"#;
542 let r = Registry::from_json_str(json).unwrap();
543 let h = &r.sites()[0].request_headers;
544 assert_eq!(h.get("X-Engine").map(String::as_str), Some("engine-value"));
545 assert_eq!(h.get("User-Agent").map(String::as_str), Some("site-ua"));
546 }
547
548 #[test]
549 fn missing_engine_reference_fails_load() {
550 let json = r#"{
551 "engines": {},
552 "sites": [
553 { "name": "Mock", "url": "https://example.com/{username}",
554 "engine": "DoesNotExist" }
555 ]
556 }"#;
557 let err = Registry::from_json_str(json).unwrap_err();
558 assert!(
559 err.to_string()
560 .contains("references engine \"DoesNotExist\""),
561 "expected missing-engine error, got: {err}"
562 );
563 }
564
565 #[test]
566 fn engine_regex_check_inherited_when_site_has_none() {
567 let json = r#"{
568 "engines": {
569 "Bounded": {
570 "signals": [{ "kind": "status_found", "codes": [200] }],
571 "regex_check": "^[a-z]{3,16}$"
572 }
573 },
574 "sites": [
575 { "name": "S", "url": "https://example.com/{username}",
576 "engine": "Bounded" }
577 ]
578 }"#;
579 let r = Registry::from_json_str(json).unwrap();
580 assert_eq!(r.sites()[0].regex_check.as_deref(), Some("^[a-z]{3,16}$"));
581 }
582
583 #[test]
584 fn load_from_path_round_trips_via_tempfile() {
585 let mut path = std::env::temp_dir();
586 path.push(format!("adler-test-registry-{}.json", std::process::id()));
587 std::fs::write(
588 &path,
589 r#"{
590 "sites": [
591 { "name": "Mock", "url": "https://example.com/{username}",
592 "signals": [{ "kind": "status_found", "codes": [200] }] }
593 ]
594 }"#,
595 )
596 .unwrap();
597 let result = Registry::load_from_path(&path);
598 let _ = std::fs::remove_file(&path);
599 let registry = result.unwrap();
600 assert_eq!(registry.len(), 1);
601 assert_eq!(registry.sites()[0].name, "Mock");
602 }
603}