1use regex::Regex;
11use scraper::{Html, Selector, ElementRef};
12use serde::{Deserialize, Serialize};
13use lazy_static::lazy_static;
14use std::collections::HashSet;
15
16use crate::types::ParserResult;
17
18#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
24pub struct ContactInfo {
25 pub emails: Vec<Email>,
27 pub phones: Vec<Phone>,
29 pub addresses: Vec<Address>,
31 pub social_links: Vec<SocialLink>,
33 pub contact_page: Option<String>,
35 pub business_name: Option<String>,
37}
38
39impl ContactInfo {
40 pub fn new() -> Self {
41 Self::default()
42 }
43
44 pub fn has_contact_info(&self) -> bool {
46 !self.emails.is_empty() ||
47 !self.phones.is_empty() ||
48 !self.addresses.is_empty() ||
49 !self.social_links.is_empty()
50 }
51
52 pub fn primary_email(&self) -> Option<&Email> {
54 self.emails.iter().find(|e| !e.is_generic)
55 .or_else(|| self.emails.first())
56 }
57
58 pub fn primary_phone(&self) -> Option<&Phone> {
60 self.phones.first()
61 }
62
63 pub fn email_addresses(&self) -> Vec<&str> {
65 self.emails.iter().map(|e| e.address.as_str()).collect()
66 }
67}
68
69#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
71pub struct Email {
72 pub address: String,
74 pub label: Option<String>,
76 pub is_generic: bool,
78 pub source: EmailSource,
80}
81
82impl Email {
83 pub fn new(address: String) -> Self {
84 let is_generic = Self::check_is_generic(&address);
85 Self {
86 address,
87 label: None,
88 is_generic,
89 source: EmailSource::Text,
90 }
91 }
92
93 fn check_is_generic(address: &str) -> bool {
94 let local = address.split('@').next().unwrap_or("").to_lowercase();
95 let generic_prefixes = [
96 "info", "contact", "hello", "hi", "support", "help",
97 "sales", "admin", "webmaster", "noreply", "no-reply",
98 "mail", "email", "enquiries", "enquiry", "general",
99 ];
100 generic_prefixes.iter().any(|p| local == *p || local.starts_with(&format!("{}.", p)))
101 }
102}
103
104#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
106pub enum EmailSource {
107 MailtoLink,
109 #[default]
111 Text,
112 StructuredData,
114 MetaTag,
116}
117
118#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
120pub struct Phone {
121 pub raw: String,
123 pub normalized: String,
125 pub international: Option<String>,
127 pub phone_type: PhoneType,
129 pub label: Option<String>,
131}
132
133impl Phone {
134 pub fn new(raw: String) -> Self {
135 let normalized = Self::normalize(&raw);
136 Self {
137 raw,
138 normalized,
139 international: None,
140 phone_type: PhoneType::Unknown,
141 label: None,
142 }
143 }
144
145 fn normalize(raw: &str) -> String {
146 raw.chars().filter(|c| c.is_ascii_digit() || *c == '+').collect()
147 }
148}
149
150#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
152pub enum PhoneType {
153 Main,
155 Mobile,
157 Fax,
159 TollFree,
161 #[default]
163 Unknown,
164}
165
166#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
168pub struct Address {
169 pub full_text: String,
171 pub street: Option<String>,
173 pub city: Option<String>,
175 pub state: Option<String>,
177 pub postal_code: Option<String>,
179 pub country: Option<String>,
181 pub coordinates: Option<Coordinates>,
183}
184
185#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
187pub struct Coordinates {
188 pub latitude: f64,
189 pub longitude: f64,
190}
191
192#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
194pub struct SocialLink {
195 pub platform: SocialPlatform,
197 pub url: String,
199 pub username: Option<String>,
201}
202
203#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
205pub enum SocialPlatform {
206 Facebook,
207 Twitter,
208 Instagram,
209 LinkedIn,
210 YouTube,
211 TikTok,
212 Pinterest,
213 GitHub,
214 Reddit,
215 Discord,
216 Telegram,
217 WhatsApp,
218 Snapchat,
219 Tumblr,
220 Medium,
221 Twitch,
222 Vimeo,
223 Flickr,
224 Other,
225}
226
227lazy_static! {
232 static ref EMAIL_PATTERN: Regex = Regex::new(
234 r"(?i)[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,}"
235 ).unwrap();
236
237 static ref PHONE_PATTERN: Regex = Regex::new(
239 r"(?x)
240 (?:\+?1[-.\s]?)? # Optional country code
241 (?:\(?\d{3}\)?[-.\s]?) # Area code
242 \d{3}[-.\s]? # Exchange
243 \d{4} # Subscriber
244 |
245 \+\d{1,3}[-.\s]?\d{1,4}[-.\s]?\d{1,4}[-.\s]?\d{1,9} # International
246 "
247 ).unwrap();
248
249 static ref TOLL_FREE_PATTERN: Regex = Regex::new(
251 r"(?i)1?[-.\s]?(?:800|888|877|866|855|844|833)[-.\s]?\d{3}[-.\s]?\d{4}"
252 ).unwrap();
253
254 static ref US_ZIP_PATTERN: Regex = Regex::new(r"\b\d{5}(?:-\d{4})?\b").unwrap();
256 static ref UK_POSTAL_PATTERN: Regex = Regex::new(
257 r"(?i)\b[A-Z]{1,2}\d[A-Z\d]?\s*\d[A-Z]{2}\b"
258 ).unwrap();
259 static ref CA_POSTAL_PATTERN: Regex = Regex::new(
260 r"(?i)\b[A-Z]\d[A-Z]\s*\d[A-Z]\d\b"
261 ).unwrap();
262}
263
264pub fn extract_contact_info(document: &Html) -> ParserResult<ContactInfo> {
270 let mut info = ContactInfo::new();
271
272 info.emails = extract_emails(document);
274
275 info.phones = extract_phones(document);
277
278 info.addresses = extract_addresses(document);
280
281 info.social_links = extract_social_links(document);
283
284 info.contact_page = find_contact_page(document);
286
287 info.business_name = extract_business_name(document);
289
290 Ok(info)
291}
292
293pub fn extract_emails(document: &Html) -> Vec<Email> {
295 let mut emails = Vec::new();
296 let mut seen = HashSet::new();
297
298 if let Ok(sel) = Selector::parse("a[href^='mailto:']") {
300 for el in document.select(&sel) {
301 if let Some(href) = el.value().attr("href") {
302 let addr = href.trim_start_matches("mailto:")
303 .split('?').next()
304 .unwrap_or("")
305 .to_lowercase();
306
307 if EMAIL_PATTERN.is_match(&addr) && !seen.contains(&addr) {
308 seen.insert(addr.clone());
309 let mut email = Email::new(addr);
310 email.source = EmailSource::MailtoLink;
311 email.label = extract_email_label(&el);
312 emails.push(email);
313 }
314 }
315 }
316 }
317
318 let text = document.root_element().text().collect::<String>();
320 for caps in EMAIL_PATTERN.find_iter(&text) {
321 let addr = caps.as_str().to_lowercase();
322 if !seen.contains(&addr) && is_valid_email(&addr) {
323 seen.insert(addr.clone());
324 emails.push(Email::new(addr));
325 }
326 }
327
328 if let Ok(sel) = Selector::parse("[itemprop='email'], [property='email']") {
330 for el in document.select(&sel) {
331 let content = el.value().attr("content")
332 .or_else(|| el.value().attr("href"))
333 .map(|s| s.trim_start_matches("mailto:").to_lowercase())
334 .or_else(|| Some(el.text().collect::<String>().trim().to_lowercase()));
335
336 if let Some(addr) = content {
337 if EMAIL_PATTERN.is_match(&addr) && !seen.contains(&addr) {
338 seen.insert(addr.clone());
339 let mut email = Email::new(addr);
340 email.source = EmailSource::StructuredData;
341 emails.push(email);
342 }
343 }
344 }
345 }
346
347 emails
348}
349
350fn is_valid_email(email: &str) -> bool {
352 let invalid_endings = [".png", ".jpg", ".jpeg", ".gif", ".webp", ".svg"];
354 !invalid_endings.iter().any(|e| email.ends_with(e))
355}
356
357fn extract_email_label(element: &ElementRef) -> Option<String> {
359 let text = element.text().collect::<String>().trim().to_lowercase();
360
361 let labels = ["support", "sales", "info", "contact", "help", "billing", "press", "careers", "jobs"];
362 for label in labels {
363 if text.contains(label) {
364 return Some(label.to_string());
365 }
366 }
367
368 None
369}
370
371pub fn extract_phones(document: &Html) -> Vec<Phone> {
373 let mut phones = Vec::new();
374 let mut seen = HashSet::new();
375
376 if let Ok(sel) = Selector::parse("a[href^='tel:']") {
378 for el in document.select(&sel) {
379 if let Some(href) = el.value().attr("href") {
380 let raw = href.trim_start_matches("tel:").to_string();
381 let normalized = Phone::new(raw.clone()).normalized.clone();
382
383 if normalized.len() >= 10 && !seen.contains(&normalized) {
384 seen.insert(normalized);
385 let mut phone = Phone::new(raw);
386 phone.label = extract_phone_label(&el);
387 phone.phone_type = detect_phone_type(&phone);
388 phones.push(phone);
389 }
390 }
391 }
392 }
393
394 let text = document.root_element().text().collect::<String>();
396 for caps in PHONE_PATTERN.find_iter(&text) {
397 let raw = caps.as_str().to_string();
398 let normalized = Phone::new(raw.clone()).normalized.clone();
399
400 if normalized.len() >= 10 && !seen.contains(&normalized) {
401 seen.insert(normalized);
402 let mut phone = Phone::new(raw);
403 phone.phone_type = detect_phone_type(&phone);
404 phones.push(phone);
405 }
406 }
407
408 if let Ok(sel) = Selector::parse("[itemprop='telephone'], [property='telephone']") {
410 for el in document.select(&sel) {
411 let content = el.value().attr("content")
412 .map(|s| s.to_string())
413 .or_else(|| Some(el.text().collect::<String>().trim().to_string()));
414
415 if let Some(raw) = content {
416 let normalized = Phone::new(raw.clone()).normalized.clone();
417 if normalized.len() >= 10 && !seen.contains(&normalized) {
418 seen.insert(normalized);
419 phones.push(Phone::new(raw));
420 }
421 }
422 }
423 }
424
425 phones
426}
427
428fn extract_phone_label(element: &ElementRef) -> Option<String> {
430 let text = element.text().collect::<String>().to_lowercase();
431
432 if text.contains("fax") {
433 Some("fax".to_string())
434 } else if text.contains("mobile") || text.contains("cell") {
435 Some("mobile".to_string())
436 } else if text.contains("main") || text.contains("office") {
437 Some("main".to_string())
438 } else {
439 None
440 }
441}
442
443fn detect_phone_type(phone: &Phone) -> PhoneType {
445 if TOLL_FREE_PATTERN.is_match(&phone.raw) {
446 PhoneType::TollFree
447 } else if phone.label.as_ref().map(|l| l.contains("fax")).unwrap_or(false) {
448 PhoneType::Fax
449 } else if phone.label.as_ref().map(|l| l.contains("mobile") || l.contains("cell")).unwrap_or(false) {
450 PhoneType::Mobile
451 } else {
452 PhoneType::Unknown
453 }
454}
455
456pub fn extract_addresses(document: &Html) -> Vec<Address> {
458 let mut addresses = Vec::new();
459
460 if let Ok(sel) = Selector::parse("[itemtype*='PostalAddress'], [itemprop='address']") {
462 for el in document.select(&sel) {
463 if let Some(addr) = extract_structured_address(&el) {
464 addresses.push(addr);
465 }
466 }
467 }
468
469 if let Ok(sel) = Selector::parse("address") {
471 for el in document.select(&sel) {
472 let text = el.text().collect::<String>().trim().to_string();
473 if !text.is_empty() && text.len() > 10 {
474 let mut addr = Address {
475 full_text: text,
476 ..Default::default()
477 };
478 extract_address_components(&mut addr);
479 addresses.push(addr);
480 }
481 }
482 }
483
484 addresses.dedup_by(|a, b| a.full_text == b.full_text);
486
487 addresses
488}
489
490fn extract_structured_address(element: &ElementRef) -> Option<Address> {
492 let mut addr = Address::default();
493
494 let selectors = [
495 ("streetAddress", "street"),
496 ("addressLocality", "city"),
497 ("addressRegion", "state"),
498 ("postalCode", "postal_code"),
499 ("addressCountry", "country"),
500 ];
501
502 for (itemprop, field) in selectors {
503 if let Ok(sel) = Selector::parse(&format!("[itemprop='{}']", itemprop)) {
504 if let Some(el) = element.select(&sel).next() {
505 let value = el.value().attr("content")
506 .map(|s| s.to_string())
507 .or_else(|| Some(el.text().collect::<String>().trim().to_string()));
508
509 match field {
510 "street" => addr.street = value,
511 "city" => addr.city = value,
512 "state" => addr.state = value,
513 "postal_code" => addr.postal_code = value,
514 "country" => addr.country = value,
515 _ => {}
516 }
517 }
518 }
519 }
520
521 let parts: Vec<&str> = [
523 addr.street.as_deref(),
524 addr.city.as_deref(),
525 addr.state.as_deref(),
526 addr.postal_code.as_deref(),
527 addr.country.as_deref(),
528 ].into_iter().flatten().collect();
529
530 if parts.is_empty() {
531 return None;
532 }
533
534 addr.full_text = parts.join(", ");
535 Some(addr)
536}
537
538fn extract_address_components(addr: &mut Address) {
540 if let Some(caps) = US_ZIP_PATTERN.find(&addr.full_text) {
542 addr.postal_code = Some(caps.as_str().to_string());
543 } else if let Some(caps) = UK_POSTAL_PATTERN.find(&addr.full_text) {
544 addr.postal_code = Some(caps.as_str().to_string());
545 } else if let Some(caps) = CA_POSTAL_PATTERN.find(&addr.full_text) {
546 addr.postal_code = Some(caps.as_str().to_string());
547 }
548}
549
550pub fn extract_social_links(document: &Html) -> Vec<SocialLink> {
552 let mut links = Vec::new();
553 let mut seen = HashSet::new();
554
555 if let Ok(sel) = Selector::parse("a[href]") {
556 for el in document.select(&sel) {
557 if let Some(href) = el.value().attr("href") {
558 if let Some(platform) = detect_social_platform(href) {
559 let url = href.to_string();
560 if !seen.contains(&url) {
561 seen.insert(url.clone());
562 links.push(SocialLink {
563 platform,
564 url: url.clone(),
565 username: extract_social_username(&url, platform),
566 });
567 }
568 }
569 }
570 }
571 }
572
573 links
574}
575
576fn detect_social_platform(url: &str) -> Option<SocialPlatform> {
578 let url_lower = url.to_lowercase();
579
580 let platforms = [
581 ("facebook.com", SocialPlatform::Facebook),
582 ("fb.com", SocialPlatform::Facebook),
583 ("twitter.com", SocialPlatform::Twitter),
584 ("x.com", SocialPlatform::Twitter),
585 ("instagram.com", SocialPlatform::Instagram),
586 ("linkedin.com", SocialPlatform::LinkedIn),
587 ("youtube.com", SocialPlatform::YouTube),
588 ("youtu.be", SocialPlatform::YouTube),
589 ("tiktok.com", SocialPlatform::TikTok),
590 ("pinterest.com", SocialPlatform::Pinterest),
591 ("github.com", SocialPlatform::GitHub),
592 ("reddit.com", SocialPlatform::Reddit),
593 ("discord.gg", SocialPlatform::Discord),
594 ("discord.com", SocialPlatform::Discord),
595 ("t.me", SocialPlatform::Telegram),
596 ("telegram.me", SocialPlatform::Telegram),
597 ("wa.me", SocialPlatform::WhatsApp),
598 ("whatsapp.com", SocialPlatform::WhatsApp),
599 ("snapchat.com", SocialPlatform::Snapchat),
600 ("tumblr.com", SocialPlatform::Tumblr),
601 ("medium.com", SocialPlatform::Medium),
602 ("twitch.tv", SocialPlatform::Twitch),
603 ("vimeo.com", SocialPlatform::Vimeo),
604 ("flickr.com", SocialPlatform::Flickr),
605 ];
606
607 for (domain, platform) in platforms {
608 if url_lower.contains(domain) {
609 return Some(platform);
610 }
611 }
612
613 None
614}
615
616fn extract_social_username(url: &str, platform: SocialPlatform) -> Option<String> {
618 let url_lower = url.to_lowercase();
619
620 let cleaned = url_lower
622 .trim_start_matches("https://")
623 .trim_start_matches("http://")
624 .trim_start_matches("www.");
625
626 let parts: Vec<&str> = cleaned.split('/').collect();
628
629 match platform {
630 SocialPlatform::Twitter | SocialPlatform::Instagram |
631 SocialPlatform::GitHub | SocialPlatform::TikTok => {
632 parts.get(1).filter(|s| !s.is_empty() && !s.starts_with('?')).map(|s| s.to_string())
634 }
635 SocialPlatform::Facebook => {
636 parts.get(1).filter(|s| !s.is_empty() && **s != "pages" && !s.starts_with('?')).map(|s| s.to_string())
638 }
639 SocialPlatform::YouTube => {
640 if let Some(p) = parts.get(1) {
642 if *p == "c" || *p == "channel" || *p == "user" {
643 return parts.get(2).map(|s| s.to_string());
644 }
645 if p.starts_with('@') {
646 return Some(p.to_string());
647 }
648 }
649 None
650 }
651 _ => None,
652 }
653}
654
655fn find_contact_page(document: &Html) -> Option<String> {
657 let contact_patterns = ["contact", "kontakt", "contacto", "contato"];
658
659 if let Ok(sel) = Selector::parse("a[href]") {
660 for el in document.select(&sel) {
661 if let Some(href) = el.value().attr("href") {
662 let href_lower = href.to_lowercase();
663 let text_lower = el.text().collect::<String>().to_lowercase();
664
665 for pattern in contact_patterns {
666 if href_lower.contains(pattern) || text_lower.contains(pattern) {
667 return Some(href.to_string());
668 }
669 }
670 }
671 }
672 }
673
674 None
675}
676
677fn extract_business_name(document: &Html) -> Option<String> {
679 if let Ok(sel) = Selector::parse("[itemprop='name'], [property='og:site_name']") {
681 if let Some(el) = document.select(&sel).next() {
682 let text_content = el.text().collect::<String>();
683 let name = el.value().attr("content")
684 .or(Some(text_content.as_str()))
685 .map(|s| s.trim().to_string());
686
687 if let Some(ref n) = name {
688 if !n.is_empty() && n.len() < 100 {
689 return name;
690 }
691 }
692 }
693 }
694
695 None
696}
697
698pub fn has_contact_info(document: &Html) -> bool {
704 extract_contact_info(document)
705 .map(|c| c.has_contact_info())
706 .unwrap_or(false)
707}
708
709pub fn get_emails(document: &Html) -> Vec<String> {
711 extract_emails(document)
712 .into_iter()
713 .map(|e| e.address)
714 .collect()
715}
716
717pub fn get_phones(document: &Html) -> Vec<String> {
719 extract_phones(document)
720 .into_iter()
721 .map(|p| p.raw)
722 .collect()
723}
724
725pub fn get_social_links(document: &Html) -> Vec<String> {
727 extract_social_links(document)
728 .into_iter()
729 .map(|s| s.url)
730 .collect()
731}
732
733#[cfg(test)]
738mod tests {
739 use super::*;
740
741 fn parse_html(html: &str) -> Html {
742 Html::parse_document(html)
743 }
744
745 #[test]
746 fn test_extract_mailto_email() {
747 let html = r#"
748 <a href="mailto:contact@example.com">Contact Us</a>
749 "#;
750
751 let doc = parse_html(html);
752 let emails = extract_emails(&doc);
753
754 assert_eq!(emails.len(), 1);
755 assert_eq!(emails[0].address, "contact@example.com");
756 assert_eq!(emails[0].source, EmailSource::MailtoLink);
757 assert!(emails[0].is_generic);
758 }
759
760 #[test]
761 fn test_extract_text_email() {
762 let html = r#"
763 <p>Email us at john.doe@company.org for inquiries.</p>
764 "#;
765
766 let doc = parse_html(html);
767 let emails = extract_emails(&doc);
768
769 assert_eq!(emails.len(), 1);
770 assert_eq!(emails[0].address, "john.doe@company.org");
771 assert!(!emails[0].is_generic);
772 }
773
774 #[test]
775 fn test_extract_phone_tel() {
776 let html = r#"
777 <a href="tel:+1-555-123-4567">Call Us</a>
778 "#;
779
780 let doc = parse_html(html);
781 let phones = extract_phones(&doc);
782
783 assert_eq!(phones.len(), 1);
784 assert_eq!(phones[0].normalized, "+15551234567");
785 }
786
787 #[test]
788 fn test_extract_toll_free() {
789 let html = r#"
790 <p>Call us at 1-800-555-1234</p>
791 "#;
792
793 let doc = parse_html(html);
794 let phones = extract_phones(&doc);
795
796 assert!(!phones.is_empty());
797 assert_eq!(phones[0].phone_type, PhoneType::TollFree);
798 }
799
800 #[test]
801 fn test_extract_social_links() {
802 let html = r#"
803 <a href="https://twitter.com/example">Twitter</a>
804 <a href="https://facebook.com/example">Facebook</a>
805 <a href="https://github.com/example">GitHub</a>
806 "#;
807
808 let doc = parse_html(html);
809 let links = extract_social_links(&doc);
810
811 assert_eq!(links.len(), 3);
812 assert!(links.iter().any(|l| l.platform == SocialPlatform::Twitter));
813 assert!(links.iter().any(|l| l.platform == SocialPlatform::Facebook));
814 assert!(links.iter().any(|l| l.platform == SocialPlatform::GitHub));
815 }
816
817 #[test]
818 fn test_extract_social_username() {
819 assert_eq!(
820 extract_social_username("https://twitter.com/johndoe", SocialPlatform::Twitter),
821 Some("johndoe".to_string())
822 );
823 assert_eq!(
824 extract_social_username("https://github.com/rust-lang", SocialPlatform::GitHub),
825 Some("rust-lang".to_string())
826 );
827 }
828
829 #[test]
830 fn test_extract_structured_address() {
831 let html = r#"
832 <div itemscope itemtype="https://schema.org/PostalAddress">
833 <span itemprop="streetAddress">123 Main St</span>
834 <span itemprop="addressLocality">Springfield</span>
835 <span itemprop="addressRegion">IL</span>
836 <span itemprop="postalCode">62701</span>
837 <span itemprop="addressCountry">USA</span>
838 </div>
839 "#;
840
841 let doc = parse_html(html);
842 let addresses = extract_addresses(&doc);
843
844 assert_eq!(addresses.len(), 1);
845 assert_eq!(addresses[0].street, Some("123 Main St".to_string()));
846 assert_eq!(addresses[0].city, Some("Springfield".to_string()));
847 assert_eq!(addresses[0].postal_code, Some("62701".to_string()));
848 }
849
850 #[test]
851 fn test_find_contact_page() {
852 let html = r#"
853 <nav>
854 <a href="/about">About</a>
855 <a href="/contact">Contact Us</a>
856 </nav>
857 "#;
858
859 let doc = parse_html(html);
860 let contact = find_contact_page(&doc);
861
862 assert_eq!(contact, Some("/contact".to_string()));
863 }
864
865 #[test]
866 fn test_email_is_generic() {
867 assert!(Email::new("info@example.com".to_string()).is_generic);
868 assert!(Email::new("contact@example.com".to_string()).is_generic);
869 assert!(Email::new("support@example.com".to_string()).is_generic);
870 assert!(!Email::new("john.doe@example.com".to_string()).is_generic);
871 }
872
873 #[test]
874 fn test_primary_email() {
875 let mut info = ContactInfo::new();
876 info.emails = vec![
877 Email::new("info@example.com".to_string()),
878 Email::new("john@example.com".to_string()),
879 ];
880
881 let primary = info.primary_email().unwrap();
882 assert_eq!(primary.address, "john@example.com");
883 }
884
885 #[test]
886 fn test_has_contact_info() {
887 let html = r#"
888 <a href="mailto:test@example.com">Email</a>
889 "#;
890
891 let doc = parse_html(html);
892 assert!(has_contact_info(&doc));
893
894 let html_empty = "<html><body><p>No contact info</p></body></html>";
895 let doc_empty = parse_html(html_empty);
896 assert!(!has_contact_info(&doc_empty));
897 }
898
899 #[test]
900 fn test_x_twitter_detection() {
901 let html = r#"
902 <a href="https://x.com/example">X (Twitter)</a>
903 "#;
904
905 let doc = parse_html(html);
906 let links = extract_social_links(&doc);
907
908 assert_eq!(links.len(), 1);
909 assert_eq!(links[0].platform, SocialPlatform::Twitter);
910 }
911
912 #[test]
913 fn test_youtube_username_extraction() {
914 assert_eq!(
915 extract_social_username("https://youtube.com/@handle", SocialPlatform::YouTube),
916 Some("@handle".to_string())
917 );
918 assert_eq!(
919 extract_social_username("https://youtube.com/c/channelname", SocialPlatform::YouTube),
920 Some("channelname".to_string())
921 );
922 }
923}