1use regex::Regex;
2use reqwest::Client;
3use scraper::{Html, Selector};
4use serde::{Deserialize, Serialize};
5use std::collections::HashMap;
6use std::time::{Duration, Instant};
7
8const TRACKING_TOOLS: &[(&str, &[&str])] = &[
11 (
12 "Google Tag Manager",
13 &["googletagmanager.com/gtm.js", "dataLayer"],
14 ),
15 (
16 "Google Ads",
17 &["googleads.g.doubleclick.net", "googlesyndication.com"],
18 ),
19 ("Facebook Pixel", &["connect.facebook.net", "fbq("]),
20 (
21 "LinkedIn Insight",
22 &["snap.licdn.com", "_linkedin_partner_id"],
23 ),
24 ("TikTok Pixel", &["analytics.tiktok.com", "ttq."]),
25 ("Hotjar", &["static.hotjar.com", "hjid"]),
26 ("Mixpanel", &["cdn.mxpnl.com", "mixpanel.init"]),
27 ("Segment", &["cdn.segment.com", "analytics.load"]),
28 ("Intercom", &["widget.intercom.io"]),
29 ("Zendesk", &["static.zdassets.com"]),
30 ("Crisp", &["client.crisp.chat"]),
31];
32
33const SEO_RESOURCES: &[&str] = &["robots.txt", "sitemap.xml", "humans.txt", "ads.txt"];
35
36#[derive(Debug, Clone, Serialize, Deserialize)]
39pub struct SeoAnalysisResult {
40 pub domain: String,
41 pub basic_seo: BasicSeoResult,
42 pub content_analysis: ContentAnalysisResult,
43 pub technical_seo: TechnicalSeoResult,
44 pub social_media: SocialMediaResult,
45 pub analytics: HashMap<String, String>,
46 pub performance: PerformanceResult,
47 pub mobile_accessibility: MobileAccessibilityResult,
48 pub seo_resources: HashMap<String, String>,
49 pub schema_markup: SchemaMarkupResult,
50 pub link_analysis: LinkAnalysisResult,
51 pub image_seo: ImageSeoResult,
52 pub page_speed_factors: PageSpeedResult,
53 pub seo_score: SeoScoreResult,
54}
55
56#[derive(Debug, Clone, Serialize, Deserialize)]
57pub struct TitleAnalysis {
58 pub text: String,
59 pub length: usize,
60 pub status: String,
61}
62
63#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct MetaDescAnalysis {
65 pub text: String,
66 pub length: usize,
67 pub status: String,
68}
69
70#[derive(Debug, Clone, Serialize, Deserialize)]
71pub struct BasicSeoResult {
72 pub title: TitleAnalysis,
73 pub meta_description: MetaDescAnalysis,
74 pub meta_keywords: String,
75 pub canonical_url: String,
76 pub meta_robots: String,
77 pub viewport: String,
78 pub language: String,
79 pub charset: String,
80}
81
82#[derive(Debug, Clone, Serialize, Deserialize)]
83pub struct HeadingInfo {
84 pub count: usize,
85 pub texts: Vec<String>,
86}
87
88#[derive(Debug, Clone, Serialize, Deserialize)]
89pub struct KeywordInfo {
90 pub word: String,
91 pub count: usize,
92 pub density: String,
93}
94
95#[derive(Debug, Clone, Serialize, Deserialize)]
96pub struct ContentAnalysisResult {
97 pub headings: HashMap<String, HeadingInfo>,
98 pub heading_issues: Vec<String>,
99 pub word_count: usize,
100 pub word_count_status: String,
101 pub paragraphs: usize,
102 pub text_to_html_ratio: String,
103 pub top_keywords: Vec<KeywordInfo>,
104}
105
106#[derive(Debug, Clone, Serialize, Deserialize)]
107pub struct TechnicalSeoResult {
108 pub page_size_bytes: usize,
109 pub http_status: u16,
110 pub redirects: usize,
111 pub internal_links: usize,
112 pub external_links: usize,
113 pub structured_data_count: usize,
114 pub has_breadcrumbs: bool,
115}
116
117#[derive(Debug, Clone, Serialize, Deserialize)]
118pub struct SocialMediaResult {
119 pub open_graph: HashMap<String, String>,
120 pub twitter_cards: HashMap<String, String>,
121}
122
123#[derive(Debug, Clone, Serialize, Deserialize)]
124pub struct PerformanceResult {
125 pub load_time_secs: f64,
126 pub load_time_status: String,
127 pub content_size_kb: f64,
128 pub compression: String,
129 pub server: String,
130 pub cache_control: String,
131 pub etag: bool,
132}
133
134#[derive(Debug, Clone, Serialize, Deserialize)]
135pub struct AltAttributeResult {
136 pub total_images: usize,
137 pub images_with_alt: usize,
138 pub missing_alt: usize,
139 pub alt_coverage: String,
140}
141
142#[derive(Debug, Clone, Serialize, Deserialize)]
143pub struct MobileAccessibilityResult {
144 pub viewport_present: bool,
145 pub mobile_friendly: bool,
146 pub alt_attributes: AltAttributeResult,
147 pub aria_labels: usize,
148}
149
150#[derive(Debug, Clone, Serialize, Deserialize)]
151pub struct SchemaMarkupResult {
152 pub json_ld_count: usize,
153 pub json_ld_types: Vec<String>,
154 pub microdata_items: usize,
155 pub total_structured_data: usize,
156}
157
158#[derive(Debug, Clone, Serialize, Deserialize)]
159pub struct LinkAnalysisResult {
160 pub total_links: usize,
161 pub internal_links: usize,
162 pub external_links: usize,
163 pub nofollow_links: usize,
164}
165
166#[derive(Debug, Clone, Serialize, Deserialize)]
167pub struct ImageSeoResult {
168 pub total_images: usize,
169 pub lazy_loaded: usize,
170 pub with_alt_text: usize,
171 pub with_title: usize,
172 pub optimization_score: String,
173}
174
175#[derive(Debug, Clone, Serialize, Deserialize)]
176pub struct PageSpeedResult {
177 pub css_files: usize,
178 pub js_files: usize,
179 pub inline_styles: usize,
180 pub inline_scripts: usize,
181 pub compression: String,
182}
183
184#[derive(Debug, Clone, Serialize, Deserialize)]
185pub struct SeoScoreResult {
186 pub score: u32,
187 pub max_score: u32,
188 pub percentage: String,
189 pub grade: String,
190}
191
192pub async fn analyze_advanced_seo(
195 domain: &str,
196 progress_tx: Option<tokio::sync::mpsc::Sender<crate::ScanProgress>>,
197) -> Result<SeoAnalysisResult, Box<dyn std::error::Error + Send + Sync>> {
198 let url = if domain.starts_with("http") {
199 domain.to_string()
200 } else {
201 format!("https://{}", domain)
202 };
203
204 if let Some(t) = &progress_tx { let _ = t.send(crate::ScanProgress { module: "SEO Analysis".into(), percentage: 5.0, message: "Fetching homepage HTML...".into(), status: "Info".into() }).await; }
205
206 let client = Client::builder()
207 .timeout(Duration::from_secs(20))
208 .danger_accept_invalid_certs(true)
209 .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
210 .build()?;
211
212 let start = Instant::now();
213 let resp = client.get(&url).send().await?;
214 let load_time = start.elapsed().as_secs_f64();
215
216 let status_code = resp.status().as_u16();
217 let redirects = resp.url().to_string() != url; let headers = resp.headers().clone();
219 let content_bytes = resp.bytes().await?;
220 let content_size = content_bytes.len();
221 let html_text = String::from_utf8_lossy(&content_bytes).to_string();
222 let base_domain = domain
223 .replace("https://", "")
224 .replace("http://", "")
225 .replace("www.", "");
226
227 if let Some(t) = &progress_tx { let _ = t.send(crate::ScanProgress { module: "SEO Analysis".into(), percentage: 20.0, message: "HTML fetched. Searching for SEO resources (sitemap, robots)...".into(), status: "Success".into() }).await; }
228
229 let seo_resources = check_seo_resources(&client, domain).await;
231
232 if let Some(t) = &progress_tx { let _ = t.send(crate::ScanProgress { module: "SEO Analysis".into(), percentage: 40.0, message: "Parsing HTML document...".into(), status: "Info".into() }).await; }
233
234 let document = Html::parse_document(&html_text);
235
236 let basic_seo = analyze_basic_seo(&document);
238
239 let content_analysis = analyze_content(&document);
241
242 let technical_seo = analyze_technical(
244 &document,
245 status_code,
246 content_size,
247 redirects as usize,
248 &base_domain,
249 );
250
251 if let Some(t) = &progress_tx { let _ = t.send(crate::ScanProgress { module: "SEO Analysis".into(), percentage: 60.0, message: "Analyzing Social Media & Analytics...".into(), status: "Info".into() }).await; }
252
253 let social_media = analyze_social_tags(&document);
255
256 let analytics = analyze_analytics(&html_text);
258
259 if let Some(t) = &progress_tx { let _ = t.send(crate::ScanProgress { module: "SEO Analysis".into(), percentage: 80.0, message: "Calculating SEO Core Web Factors...".into(), status: "Info".into() }).await; }
260
261 let performance = analyze_performance(&headers, load_time, content_size);
263
264 let mobile_accessibility = analyze_mobile(&document);
266
267 let schema_markup = analyze_schema(&document, &html_text);
269
270 let link_analysis = analyze_links(&document, &base_domain);
272
273 let image_seo = analyze_images(&document);
275
276 let page_speed_factors = analyze_speed_factors(&document, &headers);
278
279 let seo_score = calculate_seo_score(
281 &basic_seo,
282 &content_analysis,
283 &seo_resources,
284 &schema_markup,
285 &performance,
286 &mobile_accessibility,
287 );
288
289 Ok(SeoAnalysisResult {
290 domain: domain.to_string(),
291 basic_seo,
292 content_analysis,
293 technical_seo,
294 social_media,
295 analytics,
296 performance,
297 mobile_accessibility,
298 seo_resources,
299 schema_markup,
300 link_analysis,
301 image_seo,
302 page_speed_factors,
303 seo_score,
304 })
305}
306
307fn analyze_basic_seo(doc: &Html) -> BasicSeoResult {
310 let title_sel = Selector::parse("title").unwrap();
311 let title_text = doc
312 .select(&title_sel)
313 .next()
314 .map(|el| el.text().collect::<String>().trim().to_string())
315 .unwrap_or_default();
316
317 let title_len = title_text.len();
318 let title_status = if title_text.is_empty() {
319 "Missing"
320 } else if title_len < 30 {
321 "Too short"
322 } else if title_len > 60 {
323 "Too long"
324 } else {
325 "Good"
326 };
327
328 let desc = get_meta_content(doc, "name", "description");
329 let desc_len = if desc == "Not Found" { 0 } else { desc.len() };
330 let desc_status = if desc == "Not Found" {
331 "Missing"
332 } else if desc_len < 120 {
333 "Too short"
334 } else if desc_len > 160 {
335 "Too long"
336 } else {
337 "Good"
338 };
339
340 BasicSeoResult {
341 title: TitleAnalysis {
342 text: if title_text.is_empty() {
343 "Missing".into()
344 } else {
345 title_text
346 },
347 length: title_len,
348 status: title_status.into(),
349 },
350 meta_description: MetaDescAnalysis {
351 text: desc.clone(),
352 length: desc_len,
353 status: desc_status.into(),
354 },
355 meta_keywords: get_meta_content(doc, "name", "keywords"),
356 canonical_url: get_link_href(doc, "canonical"),
357 meta_robots: get_meta_content(doc, "name", "robots"),
358 viewport: get_meta_content(doc, "name", "viewport"),
359 language: doc
360 .root_element()
361 .value()
362 .attr("lang")
363 .unwrap_or("Not specified")
364 .to_string(),
365 charset: get_charset(doc),
366 }
367}
368
369fn get_meta_content(doc: &Html, attr: &str, value: &str) -> String {
370 let selector_str = format!("meta[{}=\"{}\"]", attr, value);
371 if let Ok(sel) = Selector::parse(&selector_str) {
372 if let Some(el) = doc.select(&sel).next() {
373 if let Some(content) = el.value().attr("content") {
374 return content.trim().to_string();
375 }
376 }
377 }
378 "Not Found".into()
379}
380
381fn get_link_href(doc: &Html, rel: &str) -> String {
382 let selector_str = format!("link[rel=\"{}\"]", rel);
383 if let Ok(sel) = Selector::parse(&selector_str) {
384 if let Some(el) = doc.select(&sel).next() {
385 if let Some(href) = el.value().attr("href") {
386 return href.trim().to_string();
387 }
388 }
389 }
390 "Not Found".into()
391}
392
393fn get_charset(doc: &Html) -> String {
394 if let Ok(sel) = Selector::parse("meta[charset]") {
395 if let Some(el) = doc.select(&sel).next() {
396 if let Some(cs) = el.value().attr("charset") {
397 return cs.to_string();
398 }
399 }
400 }
401 if let Ok(sel) = Selector::parse("meta[http-equiv=\"Content-Type\"]") {
402 if let Some(el) = doc.select(&sel).next() {
403 if let Some(content) = el.value().attr("content") {
404 if let Some(cs) = Regex::new(r"charset=([^;]+)")
405 .ok()
406 .and_then(|r| r.captures(content))
407 {
408 return cs.get(1).unwrap().as_str().to_string();
409 }
410 }
411 }
412 }
413 "Unknown".into()
414}
415
416fn analyze_content(doc: &Html) -> ContentAnalysisResult {
419 let mut headings = HashMap::new();
420 let mut hierarchy: Vec<(u8, String)> = Vec::new();
421
422 let h_selectors = [
423 (1u8, Selector::parse("h1").unwrap()),
424 (2, Selector::parse("h2").unwrap()),
425 (3, Selector::parse("h3").unwrap()),
426 (4, Selector::parse("h4").unwrap()),
427 (5, Selector::parse("h5").unwrap()),
428 (6, Selector::parse("h6").unwrap()),
429 ];
430
431 for (i, sel) in &h_selectors {
432 let elements: Vec<_> = doc.select(sel).collect();
433 if !elements.is_empty() {
434 let texts: Vec<String> = elements
435 .iter()
436 .take(3)
437 .map(|e| {
438 let t = e.text().collect::<String>();
439 t.trim().chars().take(100).collect()
440 })
441 .collect();
442 headings.insert(
443 format!("H{}", i),
444 HeadingInfo {
445 count: elements.len(),
446 texts,
447 },
448 );
449 for e in &elements {
450 let t = e.text().collect::<String>().trim().to_string();
451 hierarchy.push((*i, t));
452 }
453 }
454 }
455
456 let heading_issues = check_heading_issues(&hierarchy);
457
458 let text = doc.root_element().text().collect::<String>();
459 let words: Vec<&str> = text.split_whitespace().collect();
460 let word_count = words.len();
461
462 let p_sel = Selector::parse("p").unwrap();
463 let paragraphs = doc.select(&p_sel).count();
464
465 let html_len = doc.html().len();
466 let text_len = text.len();
467 let ratio = if html_len > 0 {
468 (text_len as f64 / html_len as f64) * 100.0
469 } else {
470 0.0
471 };
472
473 let top_keywords = analyze_keyword_density(&words);
474
475 ContentAnalysisResult {
476 headings,
477 heading_issues,
478 word_count,
479 word_count_status: if word_count >= 300 {
480 "Good"
481 } else {
482 "Too short"
483 }
484 .into(),
485 paragraphs,
486 text_to_html_ratio: format!("{:.1}%", ratio),
487 top_keywords,
488 }
489}
490
491fn check_heading_issues(hierarchy: &[(u8, String)]) -> Vec<String> {
492 let mut issues = Vec::new();
493 if hierarchy.is_empty() {
494 issues.push("No headings found".into());
495 return issues;
496 }
497
498 let h1_count = hierarchy.iter().filter(|(l, _)| *l == 1).count();
499 if h1_count == 0 {
500 issues.push("Missing H1 tag".into());
501 } else if h1_count > 1 {
502 issues.push(format!("Multiple H1 tags ({})", h1_count));
503 }
504
505 let mut prev = 0u8;
506 for &(level, _) in hierarchy {
507 if prev > 0 && level > prev + 1 {
508 issues.push(format!(
509 "Skipped heading level (from H{} to H{})",
510 prev, level
511 ));
512 }
513 prev = level;
514 }
515 issues
516}
517
518fn analyze_keyword_density(words: &[&str]) -> Vec<KeywordInfo> {
519 let total = words.len();
520 if total == 0 {
521 return vec![];
522 }
523
524 let mut freq: HashMap<String, usize> = HashMap::new();
525 for &w in words {
526 let lower = w.to_lowercase();
527 if lower.len() > 3 {
528 *freq.entry(lower).or_insert(0) += 1;
529 }
530 }
531
532 let mut sorted: Vec<_> = freq.into_iter().collect();
533 sorted.sort_by(|a, b| b.1.cmp(&a.1));
534
535 sorted
536 .into_iter()
537 .take(5)
538 .map(|(word, count)| KeywordInfo {
539 word,
540 count,
541 density: format!("{:.2}%", (count as f64 / total as f64) * 100.0),
542 })
543 .collect()
544}
545
546fn analyze_technical(
549 doc: &Html,
550 status: u16,
551 size: usize,
552 redirects: usize,
553 base_domain: &str,
554) -> TechnicalSeoResult {
555 let link_sel = Selector::parse("a[href]").unwrap();
556 let mut internal = 0;
557 let mut external = 0;
558
559 for el in doc.select(&link_sel) {
560 if let Some(href) = el.value().attr("href") {
561 if href.starts_with("http") && !href.contains(base_domain) {
562 external += 1;
563 } else if !href.starts_with("mailto:")
564 && !href.starts_with("tel:")
565 && !href.starts_with('#')
566 {
567 internal += 1;
568 }
569 }
570 }
571
572 let json_ld = Selector::parse("script[type=\"application/ld+json\"]")
573 .ok()
574 .map(|s| doc.select(&s).count())
575 .unwrap_or(0);
576 let microdata = Selector::parse("[itemtype]")
577 .ok()
578 .map(|s| doc.select(&s).count())
579 .unwrap_or(0);
580
581 let breadcrumb = Selector::parse("[typeof=\"BreadcrumbList\"]")
582 .ok()
583 .map(|s| doc.select(&s).next().is_some())
584 .unwrap_or(false)
585 || doc.html().to_lowercase().contains("breadcrumb");
586
587 TechnicalSeoResult {
588 page_size_bytes: size,
589 http_status: status,
590 redirects,
591 internal_links: internal,
592 external_links: external,
593 structured_data_count: json_ld + microdata,
594 has_breadcrumbs: breadcrumb,
595 }
596}
597
598fn analyze_social_tags(doc: &Html) -> SocialMediaResult {
601 let og_keys = [
602 "og:title",
603 "og:description",
604 "og:image",
605 "og:url",
606 "og:type",
607 "og:site_name",
608 ];
609 let tw_keys = [
610 "twitter:card",
611 "twitter:title",
612 "twitter:description",
613 "twitter:image",
614 "twitter:site",
615 ];
616
617 let mut og = HashMap::new();
618 for key in &og_keys {
619 og.insert(key.to_string(), get_meta_content(doc, "property", key));
620 }
621
622 let mut tw = HashMap::new();
623 for key in &tw_keys {
624 tw.insert(key.to_string(), get_meta_content(doc, "name", key));
625 }
626
627 SocialMediaResult {
628 open_graph: og,
629 twitter_cards: tw,
630 }
631}
632
633fn analyze_analytics(html: &str) -> HashMap<String, String> {
636 let mut results = HashMap::new();
637
638 let has_ga4 = Regex::new(r#"gtag\(['"]config['"],\s*['"]G-[A-Z0-9]+['"]\)"#)
640 .ok()
641 .map(|r| r.is_match(html))
642 .unwrap_or(false);
643 let has_ua = Regex::new(r#"gtag\(['"]config['"],\s*['"]UA-[0-9-]+['"]\)"#)
644 .ok()
645 .map(|r| r.is_match(html))
646 .unwrap_or(false);
647 results.insert(
648 "Google Analytics GA4".into(),
649 if has_ga4 { "Found" } else { "Not Found" }.into(),
650 );
651 results.insert(
652 "Google Analytics UA".into(),
653 if has_ua { "Found" } else { "Not Found" }.into(),
654 );
655
656 let lower = html.to_lowercase();
658 for &(name, patterns) in TRACKING_TOOLS {
659 let found = patterns.iter().any(|p| lower.contains(&p.to_lowercase()));
660 results.insert(
661 name.to_string(),
662 if found { "Found" } else { "Not Found" }.into(),
663 );
664 }
665
666 results
667}
668
669fn analyze_performance(
672 headers: &reqwest::header::HeaderMap,
673 load_time: f64,
674 size: usize,
675) -> PerformanceResult {
676 let status = if load_time < 1.0 {
677 "Excellent"
678 } else if load_time < 3.0 {
679 "Good"
680 } else {
681 "Poor"
682 };
683
684 PerformanceResult {
685 load_time_secs: (load_time * 100.0).round() / 100.0,
686 load_time_status: status.into(),
687 content_size_kb: (size as f64 / 1024.0 * 100.0).round() / 100.0,
688 compression: headers
689 .get("content-encoding")
690 .and_then(|v| v.to_str().ok())
691 .unwrap_or("None")
692 .into(),
693 server: headers
694 .get("server")
695 .and_then(|v| v.to_str().ok())
696 .unwrap_or("Unknown")
697 .into(),
698 cache_control: headers
699 .get("cache-control")
700 .and_then(|v| v.to_str().ok())
701 .unwrap_or("Not Set")
702 .into(),
703 etag: headers.contains_key("etag"),
704 }
705}
706
707fn analyze_mobile(doc: &Html) -> MobileAccessibilityResult {
710 let viewport_content = get_meta_content(doc, "name", "viewport");
711 let has_viewport = viewport_content != "Not Found";
712 let mobile_friendly = viewport_content.contains("width=device-width");
713
714 let img_sel = Selector::parse("img").unwrap();
715 let images: Vec<_> = doc.select(&img_sel).collect();
716 let total = images.len();
717 let with_alt = images
718 .iter()
719 .filter(|i| i.value().attr("alt").is_some())
720 .count();
721
722 let aria_sel = Selector::parse("[aria-label]").unwrap();
723 let aria_count = doc.select(&aria_sel).count();
724
725 MobileAccessibilityResult {
726 viewport_present: has_viewport,
727 mobile_friendly,
728 alt_attributes: AltAttributeResult {
729 total_images: total,
730 images_with_alt: with_alt,
731 missing_alt: total - with_alt,
732 alt_coverage: if total > 0 {
733 format!("{:.1}%", (with_alt as f64 / total as f64) * 100.0)
734 } else {
735 "0%".into()
736 },
737 },
738 aria_labels: aria_count,
739 }
740}
741
742async fn check_seo_resources(client: &Client, domain: &str) -> HashMap<String, String> {
745 let mut results = HashMap::new();
746 for &file in SEO_RESOURCES {
747 let url = format!("https://{}/{}", domain, file);
748 let found = match client.get(&url).send().await {
749 Ok(r) if r.status().is_success() => "Found",
750 _ => "Not Found",
751 };
752 results.insert(file.to_string(), found.into());
753 }
754 results
755}
756
757fn analyze_schema(doc: &Html, html: &str) -> SchemaMarkupResult {
760 let json_ld_sel = Selector::parse("script[type=\"application/ld+json\"]").unwrap();
761 let json_lds: Vec<_> = doc.select(&json_ld_sel).collect();
762 let json_ld_count = json_lds.len();
763
764 let mut types = Vec::new();
765 for script in &json_lds {
766 let text = script.text().collect::<String>();
767 if let Ok(val) = serde_json::from_str::<serde_json::Value>(&text) {
768 extract_types(&val, &mut types);
769 }
770 }
771
772 let microdata = Selector::parse("[itemtype]")
773 .ok()
774 .map(|s| doc.select(&s).count())
775 .unwrap_or(0);
776
777 let additional = Regex::new(r#""@type"\s*:\s*"([^"]+)""#)
779 .ok()
780 .map(|r| {
781 r.captures_iter(html)
782 .filter_map(|c| c.get(1).map(|m| m.as_str().to_string()))
783 .collect::<Vec<_>>()
784 })
785 .unwrap_or_default();
786
787 for t in additional {
788 if !types.contains(&t) {
789 types.push(t);
790 }
791 }
792
793 SchemaMarkupResult {
794 json_ld_count,
795 json_ld_types: types,
796 microdata_items: microdata,
797 total_structured_data: json_ld_count + microdata,
798 }
799}
800
801fn extract_types(val: &serde_json::Value, types: &mut Vec<String>) {
802 match val {
803 serde_json::Value::Object(map) => {
804 if let Some(t) = map.get("@type").and_then(|v| v.as_str()) {
805 types.push(t.to_string());
806 }
807 for (_, v) in map {
808 extract_types(v, types);
809 }
810 }
811 serde_json::Value::Array(arr) => {
812 for v in arr {
813 extract_types(v, types);
814 }
815 }
816 _ => {}
817 }
818}
819
820fn analyze_links(doc: &Html, base_domain: &str) -> LinkAnalysisResult {
823 let link_sel = Selector::parse("a[href]").unwrap();
824 let mut internal = 0;
825 let mut external = 0;
826 let mut nofollow = 0;
827 let mut total = 0;
828
829 for el in doc.select(&link_sel) {
830 total += 1;
831 if let Some(href) = el.value().attr("href") {
832 if href.starts_with("http") && !href.contains(base_domain) {
833 external += 1;
834 } else if !href.starts_with("mailto:")
835 && !href.starts_with("tel:")
836 && !href.starts_with('#')
837 {
838 internal += 1;
839 }
840 }
841 if let Some(rel) = el.value().attr("rel") {
842 if rel.contains("nofollow") {
843 nofollow += 1;
844 }
845 }
846 }
847
848 LinkAnalysisResult {
849 total_links: total,
850 internal_links: internal,
851 external_links: external,
852 nofollow_links: nofollow,
853 }
854}
855
856fn analyze_images(doc: &Html) -> ImageSeoResult {
859 let img_sel = Selector::parse("img").unwrap();
860 let images: Vec<_> = doc.select(&img_sel).collect();
861 let total = images.len();
862 let lazy = images
863 .iter()
864 .filter(|i| i.value().attr("loading") == Some("lazy"))
865 .count();
866 let alt = images
867 .iter()
868 .filter(|i| i.value().attr("alt").is_some())
869 .count();
870 let title = images
871 .iter()
872 .filter(|i| i.value().attr("title").is_some())
873 .count();
874
875 let opt_score = if total > 0 {
876 format!("{:.1}%", ((lazy + alt) as f64 / (total * 2) as f64) * 100.0)
877 } else {
878 "0%".into()
879 };
880
881 ImageSeoResult {
882 total_images: total,
883 lazy_loaded: lazy,
884 with_alt_text: alt,
885 with_title: title,
886 optimization_score: opt_score,
887 }
888}
889
890fn analyze_speed_factors(doc: &Html, headers: &reqwest::header::HeaderMap) -> PageSpeedResult {
893 let css_sel = Selector::parse("link[rel=\"stylesheet\"]").unwrap();
894 let js_sel = Selector::parse("script[src]").unwrap();
895 let style_sel = Selector::parse("style").unwrap();
896 let inline_js_sel = Selector::parse("script:not([src])").unwrap();
897
898 PageSpeedResult {
899 css_files: doc.select(&css_sel).count(),
900 js_files: doc.select(&js_sel).count(),
901 inline_styles: doc.select(&style_sel).count(),
902 inline_scripts: doc.select(&inline_js_sel).count(),
903 compression: headers
904 .get("content-encoding")
905 .and_then(|v| v.to_str().ok())
906 .unwrap_or("None")
907 .into(),
908 }
909}
910
911fn calculate_seo_score(
914 basic: &BasicSeoResult,
915 content: &ContentAnalysisResult,
916 resources: &HashMap<String, String>,
917 schema: &SchemaMarkupResult,
918 perf: &PerformanceResult,
919 mobile: &MobileAccessibilityResult,
920) -> SeoScoreResult {
921 let mut score: u32 = 0;
922
923 if basic.title.status == "Good" {
925 score += 10;
926 }
927 if basic.meta_description.status == "Good" {
928 score += 10;
929 }
930 if basic.canonical_url != "Not Found" {
931 score += 5;
932 }
933 if basic.viewport != "Not Found" {
934 score += 5;
935 }
936
937 if content.word_count_status == "Good" {
939 score += 10;
940 }
941 if content.headings.contains_key("H1") {
942 score += 10;
943 }
944
945 if resources.get("robots.txt").map(|s| s.as_str()) == Some("Found") {
947 score += 5;
948 }
949 if resources.get("sitemap.xml").map(|s| s.as_str()) == Some("Found") {
950 score += 5;
951 }
952 if schema.total_structured_data > 0 {
953 score += 10;
954 }
955
956 match perf.load_time_status.as_str() {
958 "Excellent" | "Good" => score += 15,
959 _ => {}
960 }
961
962 score += 5; if mobile.mobile_friendly {
967 score += 5;
968 }
969
970 let max_score = 100u32;
971 let pct = (score as f64 / max_score as f64) * 100.0;
972 let grade = if pct >= 90.0 {
973 "A+"
974 } else if pct >= 80.0 {
975 "A"
976 } else if pct >= 70.0 {
977 "B"
978 } else if pct >= 60.0 {
979 "C"
980 } else if pct >= 50.0 {
981 "D"
982 } else {
983 "F"
984 };
985
986 SeoScoreResult {
987 score,
988 max_score,
989 percentage: format!("{:.1}%", pct),
990 grade: grade.into(),
991 }
992}