1use regex::Regex;
2use reqwest::Client;
3use scraper::{Html, Selector};
4use serde::{Deserialize, Serialize};
5use std::collections::HashMap;
6use std::time::{Duration, Instant};
7
8const TRACKING_TOOLS: &[(&str, &[&str])] = &[
11 (
12 "Google Tag Manager",
13 &["googletagmanager.com/gtm.js", "dataLayer"],
14 ),
15 (
16 "Google Ads",
17 &["googleads.g.doubleclick.net", "googlesyndication.com"],
18 ),
19 ("Facebook Pixel", &["connect.facebook.net", "fbq("]),
20 (
21 "LinkedIn Insight",
22 &["snap.licdn.com", "_linkedin_partner_id"],
23 ),
24 ("TikTok Pixel", &["analytics.tiktok.com", "ttq."]),
25 ("Hotjar", &["static.hotjar.com", "hjid"]),
26 ("Mixpanel", &["cdn.mxpnl.com", "mixpanel.init"]),
27 ("Segment", &["cdn.segment.com", "analytics.load"]),
28 ("Intercom", &["widget.intercom.io"]),
29 ("Zendesk", &["static.zdassets.com"]),
30 ("Crisp", &["client.crisp.chat"]),
31];
32
33const SEO_RESOURCES: &[&str] = &["robots.txt", "sitemap.xml", "humans.txt", "ads.txt"];
35
36#[derive(Debug, Clone, Serialize, Deserialize)]
39pub struct SeoAnalysisResult {
40 pub domain: String,
41 pub basic_seo: BasicSeoResult,
42 pub content_analysis: ContentAnalysisResult,
43 pub technical_seo: TechnicalSeoResult,
44 pub social_media: SocialMediaResult,
45 pub analytics: HashMap<String, String>,
46 pub performance: PerformanceResult,
47 pub mobile_accessibility: MobileAccessibilityResult,
48 pub seo_resources: HashMap<String, String>,
49 pub schema_markup: SchemaMarkupResult,
50 pub link_analysis: LinkAnalysisResult,
51 pub image_seo: ImageSeoResult,
52 pub page_speed_factors: PageSpeedResult,
53 pub seo_score: SeoScoreResult,
54}
55
56#[derive(Debug, Clone, Serialize, Deserialize)]
57pub struct TitleAnalysis {
58 pub text: String,
59 pub length: usize,
60 pub status: String,
61}
62
63#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct MetaDescAnalysis {
65 pub text: String,
66 pub length: usize,
67 pub status: String,
68}
69
70#[derive(Debug, Clone, Serialize, Deserialize)]
71pub struct BasicSeoResult {
72 pub title: TitleAnalysis,
73 pub meta_description: MetaDescAnalysis,
74 pub meta_keywords: String,
75 pub canonical_url: String,
76 pub meta_robots: String,
77 pub viewport: String,
78 pub language: String,
79 pub charset: String,
80}
81
82#[derive(Debug, Clone, Serialize, Deserialize)]
83pub struct HeadingInfo {
84 pub count: usize,
85 pub texts: Vec<String>,
86}
87
88#[derive(Debug, Clone, Serialize, Deserialize)]
89pub struct KeywordInfo {
90 pub word: String,
91 pub count: usize,
92 pub density: String,
93}
94
95#[derive(Debug, Clone, Serialize, Deserialize)]
96pub struct ContentAnalysisResult {
97 pub headings: HashMap<String, HeadingInfo>,
98 pub heading_issues: Vec<String>,
99 pub word_count: usize,
100 pub word_count_status: String,
101 pub paragraphs: usize,
102 pub text_to_html_ratio: String,
103 pub top_keywords: Vec<KeywordInfo>,
104}
105
106#[derive(Debug, Clone, Serialize, Deserialize)]
107pub struct TechnicalSeoResult {
108 pub page_size_bytes: usize,
109 pub http_status: u16,
110 pub redirects: usize,
111 pub internal_links: usize,
112 pub external_links: usize,
113 pub structured_data_count: usize,
114 pub has_breadcrumbs: bool,
115}
116
117#[derive(Debug, Clone, Serialize, Deserialize)]
118pub struct SocialMediaResult {
119 pub open_graph: HashMap<String, String>,
120 pub twitter_cards: HashMap<String, String>,
121}
122
123#[derive(Debug, Clone, Serialize, Deserialize)]
124pub struct PerformanceResult {
125 pub load_time_secs: f64,
126 pub load_time_status: String,
127 pub content_size_kb: f64,
128 pub compression: String,
129 pub server: String,
130 pub cache_control: String,
131 pub etag: bool,
132}
133
134#[derive(Debug, Clone, Serialize, Deserialize)]
135pub struct AltAttributeResult {
136 pub total_images: usize,
137 pub images_with_alt: usize,
138 pub missing_alt: usize,
139 pub alt_coverage: String,
140}
141
142#[derive(Debug, Clone, Serialize, Deserialize)]
143pub struct MobileAccessibilityResult {
144 pub viewport_present: bool,
145 pub mobile_friendly: bool,
146 pub alt_attributes: AltAttributeResult,
147 pub aria_labels: usize,
148}
149
150#[derive(Debug, Clone, Serialize, Deserialize)]
151pub struct SchemaMarkupResult {
152 pub json_ld_count: usize,
153 pub json_ld_types: Vec<String>,
154 pub microdata_items: usize,
155 pub total_structured_data: usize,
156}
157
158#[derive(Debug, Clone, Serialize, Deserialize)]
159pub struct LinkAnalysisResult {
160 pub total_links: usize,
161 pub internal_links: usize,
162 pub external_links: usize,
163 pub nofollow_links: usize,
164}
165
166#[derive(Debug, Clone, Serialize, Deserialize)]
167pub struct ImageSeoResult {
168 pub total_images: usize,
169 pub lazy_loaded: usize,
170 pub with_alt_text: usize,
171 pub with_title: usize,
172 pub optimization_score: String,
173}
174
175#[derive(Debug, Clone, Serialize, Deserialize)]
176pub struct PageSpeedResult {
177 pub css_files: usize,
178 pub js_files: usize,
179 pub inline_styles: usize,
180 pub inline_scripts: usize,
181 pub compression: String,
182}
183
184#[derive(Debug, Clone, Serialize, Deserialize)]
185pub struct SeoScoreResult {
186 pub score: u32,
187 pub max_score: u32,
188 pub percentage: String,
189 pub grade: String,
190}
191
192pub async fn analyze_advanced_seo(
195 domain: &str,
196 progress_tx: Option<tokio::sync::mpsc::Sender<crate::ScanProgress>>,
197) -> Result<SeoAnalysisResult, Box<dyn std::error::Error + Send + Sync>> {
198 let url = if domain.starts_with("http") {
199 domain.to_string()
200 } else {
201 format!("https://{}", domain)
202 };
203
204 if let Some(t) = &progress_tx { let _ = t.try_send(crate::ScanProgress { module: "SEO Analysis".into(), percentage: 5.0, message: "Fetching homepage HTML...".into(), status: "Info".into() }); }
205
206 let client = Client::builder()
207 .timeout(Duration::from_secs(20))
208 .danger_accept_invalid_certs(true)
209 .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
210 .build()?;
211
212 let start = Instant::now();
213 let resp = client.get(&url).send().await?;
214 let load_time = start.elapsed().as_secs_f64();
215
216 let status_code = resp.status().as_u16();
217 let redirects = resp.url().to_string() != url; let headers = resp.headers().clone();
219 let content_bytes = resp.bytes().await?;
220 let content_size = content_bytes.len();
221 let html_text = String::from_utf8_lossy(&content_bytes).to_string();
222 let base_domain = domain
223 .replace("https://", "")
224 .replace("http://", "")
225 .replace("www.", "");
226
227 if let Some(t) = &progress_tx { let _ = t.try_send(crate::ScanProgress { module: "SEO Analysis".into(), percentage: 20.0, message: "HTML fetched. Searching for SEO resources (sitemap, robots)...".into(), status: "Success".into() }); }
228
229 let seo_resources = check_seo_resources(&client, domain).await;
231
232 if let Some(t) = &progress_tx { let _ = t.try_send(crate::ScanProgress { module: "SEO Analysis".into(), percentage: 40.0, message: "Parsing HTML document...".into(), status: "Info".into() }); }
233
234 let document = Html::parse_document(&html_text);
235
236 let basic_seo = analyze_basic_seo(&document);
238
239 let content_analysis = analyze_content(&document);
241
242 let technical_seo = analyze_technical(
244 &document,
245 status_code,
246 content_size,
247 redirects as usize,
248 &base_domain,
249 );
250
251 if let Some(t) = &progress_tx { let _ = t.try_send(crate::ScanProgress { module: "SEO Analysis".into(), percentage: 60.0, message: "Analyzing Social Media & Analytics...".into(), status: "Info".into() }); }
252
253 let social_media = analyze_social_tags(&document);
255
256 let analytics = analyze_analytics(&html_text);
258
259 if let Some(t) = &progress_tx { let _ = t.try_send(crate::ScanProgress { module: "SEO Analysis".into(), percentage: 80.0, message: "Calculating SEO Core Web Factors...".into(), status: "Info".into() }); }
260
261 let performance = analyze_performance(&headers, load_time, content_size);
263
264 let mobile_accessibility = analyze_mobile(&document);
266
267 let schema_markup = analyze_schema(&document, &html_text);
269
270 let link_analysis = analyze_links(&document, &base_domain);
272
273 let image_seo = analyze_images(&document);
275
276 let page_speed_factors = analyze_speed_factors(&document, &headers);
278
279 let seo_score = calculate_seo_score(
281 &basic_seo,
282 &content_analysis,
283 &seo_resources,
284 &schema_markup,
285 &performance,
286 &mobile_accessibility,
287 );
288
289 if let Some(t) = &progress_tx { let _ = t.try_send(crate::ScanProgress { module: "SEO Analysis".into(), percentage: 100.0, message: "SEO Analysis successfully completed.".into(), status: "Success".into() }); }
290
291 Ok(SeoAnalysisResult {
292 domain: domain.to_string(),
293 basic_seo,
294 content_analysis,
295 technical_seo,
296 social_media,
297 analytics,
298 performance,
299 mobile_accessibility,
300 seo_resources,
301 schema_markup,
302 link_analysis,
303 image_seo,
304 page_speed_factors,
305 seo_score,
306 })
307}
308
309fn analyze_basic_seo(doc: &Html) -> BasicSeoResult {
312 let title_sel = Selector::parse("title").unwrap();
313 let title_text = doc
314 .select(&title_sel)
315 .next()
316 .map(|el| el.text().collect::<String>().trim().to_string())
317 .unwrap_or_default();
318
319 let title_len = title_text.len();
320 let title_status = if title_text.is_empty() {
321 "Missing"
322 } else if title_len < 30 {
323 "Too short"
324 } else if title_len > 60 {
325 "Too long"
326 } else {
327 "Good"
328 };
329
330 let desc = get_meta_content(doc, "name", "description");
331 let desc_len = if desc == "Not Found" { 0 } else { desc.len() };
332 let desc_status = if desc == "Not Found" {
333 "Missing"
334 } else if desc_len < 120 {
335 "Too short"
336 } else if desc_len > 160 {
337 "Too long"
338 } else {
339 "Good"
340 };
341
342 BasicSeoResult {
343 title: TitleAnalysis {
344 text: if title_text.is_empty() {
345 "Missing".into()
346 } else {
347 title_text
348 },
349 length: title_len,
350 status: title_status.into(),
351 },
352 meta_description: MetaDescAnalysis {
353 text: desc.clone(),
354 length: desc_len,
355 status: desc_status.into(),
356 },
357 meta_keywords: get_meta_content(doc, "name", "keywords"),
358 canonical_url: get_link_href(doc, "canonical"),
359 meta_robots: get_meta_content(doc, "name", "robots"),
360 viewport: get_meta_content(doc, "name", "viewport"),
361 language: doc
362 .root_element()
363 .value()
364 .attr("lang")
365 .unwrap_or("Not specified")
366 .to_string(),
367 charset: get_charset(doc),
368 }
369}
370
371fn get_meta_content(doc: &Html, attr: &str, value: &str) -> String {
372 let selector_str = format!("meta[{}=\"{}\"]", attr, value);
373 if let Ok(sel) = Selector::parse(&selector_str) {
374 if let Some(el) = doc.select(&sel).next() {
375 if let Some(content) = el.value().attr("content") {
376 return content.trim().to_string();
377 }
378 }
379 }
380 "Not Found".into()
381}
382
383fn get_link_href(doc: &Html, rel: &str) -> String {
384 let selector_str = format!("link[rel=\"{}\"]", rel);
385 if let Ok(sel) = Selector::parse(&selector_str) {
386 if let Some(el) = doc.select(&sel).next() {
387 if let Some(href) = el.value().attr("href") {
388 return href.trim().to_string();
389 }
390 }
391 }
392 "Not Found".into()
393}
394
395fn get_charset(doc: &Html) -> String {
396 if let Ok(sel) = Selector::parse("meta[charset]") {
397 if let Some(el) = doc.select(&sel).next() {
398 if let Some(cs) = el.value().attr("charset") {
399 return cs.to_string();
400 }
401 }
402 }
403 if let Ok(sel) = Selector::parse("meta[http-equiv=\"Content-Type\"]") {
404 if let Some(el) = doc.select(&sel).next() {
405 if let Some(content) = el.value().attr("content") {
406 if let Some(cs) = Regex::new(r"charset=([^;]+)")
407 .ok()
408 .and_then(|r| r.captures(content))
409 {
410 return cs.get(1).unwrap().as_str().to_string();
411 }
412 }
413 }
414 }
415 "Unknown".into()
416}
417
418fn analyze_content(doc: &Html) -> ContentAnalysisResult {
421 let mut headings = HashMap::new();
422 let mut hierarchy: Vec<(u8, String)> = Vec::new();
423
424 let h_selectors = [
425 (1u8, Selector::parse("h1").unwrap()),
426 (2, Selector::parse("h2").unwrap()),
427 (3, Selector::parse("h3").unwrap()),
428 (4, Selector::parse("h4").unwrap()),
429 (5, Selector::parse("h5").unwrap()),
430 (6, Selector::parse("h6").unwrap()),
431 ];
432
433 for (i, sel) in &h_selectors {
434 let elements: Vec<_> = doc.select(sel).collect();
435 if !elements.is_empty() {
436 let texts: Vec<String> = elements
437 .iter()
438 .take(3)
439 .map(|e| {
440 let t = e.text().collect::<String>();
441 t.trim().chars().take(100).collect()
442 })
443 .collect();
444 headings.insert(
445 format!("H{}", i),
446 HeadingInfo {
447 count: elements.len(),
448 texts,
449 },
450 );
451 for e in &elements {
452 let t = e.text().collect::<String>().trim().to_string();
453 hierarchy.push((*i, t));
454 }
455 }
456 }
457
458 let heading_issues = check_heading_issues(&hierarchy);
459
460 let text = doc.root_element().text().collect::<String>();
461 let words: Vec<&str> = text.split_whitespace().collect();
462 let word_count = words.len();
463
464 let p_sel = Selector::parse("p").unwrap();
465 let paragraphs = doc.select(&p_sel).count();
466
467 let html_len = doc.html().len();
468 let text_len = text.len();
469 let ratio = if html_len > 0 {
470 (text_len as f64 / html_len as f64) * 100.0
471 } else {
472 0.0
473 };
474
475 let top_keywords = analyze_keyword_density(&words);
476
477 ContentAnalysisResult {
478 headings,
479 heading_issues,
480 word_count,
481 word_count_status: if word_count >= 300 {
482 "Good"
483 } else {
484 "Too short"
485 }
486 .into(),
487 paragraphs,
488 text_to_html_ratio: format!("{:.1}%", ratio),
489 top_keywords,
490 }
491}
492
493fn check_heading_issues(hierarchy: &[(u8, String)]) -> Vec<String> {
494 let mut issues = Vec::new();
495 if hierarchy.is_empty() {
496 issues.push("No headings found".into());
497 return issues;
498 }
499
500 let h1_count = hierarchy.iter().filter(|(l, _)| *l == 1).count();
501 if h1_count == 0 {
502 issues.push("Missing H1 tag".into());
503 } else if h1_count > 1 {
504 issues.push(format!("Multiple H1 tags ({})", h1_count));
505 }
506
507 let mut prev = 0u8;
508 for &(level, _) in hierarchy {
509 if prev > 0 && level > prev + 1 {
510 issues.push(format!(
511 "Skipped heading level (from H{} to H{})",
512 prev, level
513 ));
514 }
515 prev = level;
516 }
517 issues
518}
519
520fn analyze_keyword_density(words: &[&str]) -> Vec<KeywordInfo> {
521 let total = words.len();
522 if total == 0 {
523 return vec![];
524 }
525
526 let mut freq: HashMap<String, usize> = HashMap::new();
527 for &w in words {
528 let lower = w.to_lowercase();
529 if lower.len() > 3 {
530 *freq.entry(lower).or_insert(0) += 1;
531 }
532 }
533
534 let mut sorted: Vec<_> = freq.into_iter().collect();
535 sorted.sort_by(|a, b| b.1.cmp(&a.1));
536
537 sorted
538 .into_iter()
539 .take(5)
540 .map(|(word, count)| KeywordInfo {
541 word,
542 count,
543 density: format!("{:.2}%", (count as f64 / total as f64) * 100.0),
544 })
545 .collect()
546}
547
548fn analyze_technical(
551 doc: &Html,
552 status: u16,
553 size: usize,
554 redirects: usize,
555 base_domain: &str,
556) -> TechnicalSeoResult {
557 let link_sel = Selector::parse("a[href]").unwrap();
558 let mut internal = 0;
559 let mut external = 0;
560
561 for el in doc.select(&link_sel) {
562 if let Some(href) = el.value().attr("href") {
563 if href.starts_with("http") && !href.contains(base_domain) {
564 external += 1;
565 } else if !href.starts_with("mailto:")
566 && !href.starts_with("tel:")
567 && !href.starts_with('#')
568 {
569 internal += 1;
570 }
571 }
572 }
573
574 let json_ld = Selector::parse("script[type=\"application/ld+json\"]")
575 .ok()
576 .map(|s| doc.select(&s).count())
577 .unwrap_or(0);
578 let microdata = Selector::parse("[itemtype]")
579 .ok()
580 .map(|s| doc.select(&s).count())
581 .unwrap_or(0);
582
583 let breadcrumb = Selector::parse("[typeof=\"BreadcrumbList\"]")
584 .ok()
585 .map(|s| doc.select(&s).next().is_some())
586 .unwrap_or(false)
587 || doc.html().to_lowercase().contains("breadcrumb");
588
589 TechnicalSeoResult {
590 page_size_bytes: size,
591 http_status: status,
592 redirects,
593 internal_links: internal,
594 external_links: external,
595 structured_data_count: json_ld + microdata,
596 has_breadcrumbs: breadcrumb,
597 }
598}
599
600fn analyze_social_tags(doc: &Html) -> SocialMediaResult {
603 let og_keys = [
604 "og:title",
605 "og:description",
606 "og:image",
607 "og:url",
608 "og:type",
609 "og:site_name",
610 ];
611 let tw_keys = [
612 "twitter:card",
613 "twitter:title",
614 "twitter:description",
615 "twitter:image",
616 "twitter:site",
617 ];
618
619 let mut og = HashMap::new();
620 for key in &og_keys {
621 og.insert(key.to_string(), get_meta_content(doc, "property", key));
622 }
623
624 let mut tw = HashMap::new();
625 for key in &tw_keys {
626 tw.insert(key.to_string(), get_meta_content(doc, "name", key));
627 }
628
629 SocialMediaResult {
630 open_graph: og,
631 twitter_cards: tw,
632 }
633}
634
635fn analyze_analytics(html: &str) -> HashMap<String, String> {
638 let mut results = HashMap::new();
639
640 let has_ga4 = Regex::new(r#"gtag\(['"]config['"],\s*['"]G-[A-Z0-9]+['"]\)"#)
642 .ok()
643 .map(|r| r.is_match(html))
644 .unwrap_or(false);
645 let has_ua = Regex::new(r#"gtag\(['"]config['"],\s*['"]UA-[0-9-]+['"]\)"#)
646 .ok()
647 .map(|r| r.is_match(html))
648 .unwrap_or(false);
649 results.insert(
650 "Google Analytics GA4".into(),
651 if has_ga4 { "Found" } else { "Not Found" }.into(),
652 );
653 results.insert(
654 "Google Analytics UA".into(),
655 if has_ua { "Found" } else { "Not Found" }.into(),
656 );
657
658 let lower = html.to_lowercase();
660 for &(name, patterns) in TRACKING_TOOLS {
661 let found = patterns.iter().any(|p| lower.contains(&p.to_lowercase()));
662 results.insert(
663 name.to_string(),
664 if found { "Found" } else { "Not Found" }.into(),
665 );
666 }
667
668 results
669}
670
671fn analyze_performance(
674 headers: &reqwest::header::HeaderMap,
675 load_time: f64,
676 size: usize,
677) -> PerformanceResult {
678 let status = if load_time < 1.0 {
679 "Excellent"
680 } else if load_time < 3.0 {
681 "Good"
682 } else {
683 "Poor"
684 };
685
686 PerformanceResult {
687 load_time_secs: (load_time * 100.0).round() / 100.0,
688 load_time_status: status.into(),
689 content_size_kb: (size as f64 / 1024.0 * 100.0).round() / 100.0,
690 compression: headers
691 .get("content-encoding")
692 .and_then(|v| v.to_str().ok())
693 .unwrap_or("None")
694 .into(),
695 server: headers
696 .get("server")
697 .and_then(|v| v.to_str().ok())
698 .unwrap_or("Unknown")
699 .into(),
700 cache_control: headers
701 .get("cache-control")
702 .and_then(|v| v.to_str().ok())
703 .unwrap_or("Not Set")
704 .into(),
705 etag: headers.contains_key("etag"),
706 }
707}
708
709fn analyze_mobile(doc: &Html) -> MobileAccessibilityResult {
712 let viewport_content = get_meta_content(doc, "name", "viewport");
713 let has_viewport = viewport_content != "Not Found";
714 let mobile_friendly = viewport_content.contains("width=device-width");
715
716 let img_sel = Selector::parse("img").unwrap();
717 let images: Vec<_> = doc.select(&img_sel).collect();
718 let total = images.len();
719 let with_alt = images
720 .iter()
721 .filter(|i| i.value().attr("alt").is_some())
722 .count();
723
724 let aria_sel = Selector::parse("[aria-label]").unwrap();
725 let aria_count = doc.select(&aria_sel).count();
726
727 MobileAccessibilityResult {
728 viewport_present: has_viewport,
729 mobile_friendly,
730 alt_attributes: AltAttributeResult {
731 total_images: total,
732 images_with_alt: with_alt,
733 missing_alt: total - with_alt,
734 alt_coverage: if total > 0 {
735 format!("{:.1}%", (with_alt as f64 / total as f64) * 100.0)
736 } else {
737 "0%".into()
738 },
739 },
740 aria_labels: aria_count,
741 }
742}
743
744async fn check_seo_resources(client: &Client, domain: &str) -> HashMap<String, String> {
747 let mut results = HashMap::new();
748 for &file in SEO_RESOURCES {
749 let url = format!("https://{}/{}", domain, file);
750 let found = match client.get(&url).send().await {
751 Ok(r) if r.status().is_success() => "Found",
752 _ => "Not Found",
753 };
754 results.insert(file.to_string(), found.into());
755 }
756 results
757}
758
759fn analyze_schema(doc: &Html, html: &str) -> SchemaMarkupResult {
762 let json_ld_sel = Selector::parse("script[type=\"application/ld+json\"]").unwrap();
763 let json_lds: Vec<_> = doc.select(&json_ld_sel).collect();
764 let json_ld_count = json_lds.len();
765
766 let mut types = Vec::new();
767 for script in &json_lds {
768 let text = script.text().collect::<String>();
769 if let Ok(val) = serde_json::from_str::<serde_json::Value>(&text) {
770 extract_types(&val, &mut types);
771 }
772 }
773
774 let microdata = Selector::parse("[itemtype]")
775 .ok()
776 .map(|s| doc.select(&s).count())
777 .unwrap_or(0);
778
779 let additional = Regex::new(r#""@type"\s*:\s*"([^"]+)""#)
781 .ok()
782 .map(|r| {
783 r.captures_iter(html)
784 .filter_map(|c| c.get(1).map(|m| m.as_str().to_string()))
785 .collect::<Vec<_>>()
786 })
787 .unwrap_or_default();
788
789 for t in additional {
790 if !types.contains(&t) {
791 types.push(t);
792 }
793 }
794
795 SchemaMarkupResult {
796 json_ld_count,
797 json_ld_types: types,
798 microdata_items: microdata,
799 total_structured_data: json_ld_count + microdata,
800 }
801}
802
803fn extract_types(val: &serde_json::Value, types: &mut Vec<String>) {
804 match val {
805 serde_json::Value::Object(map) => {
806 if let Some(t) = map.get("@type").and_then(|v| v.as_str()) {
807 types.push(t.to_string());
808 }
809 for (_, v) in map {
810 extract_types(v, types);
811 }
812 }
813 serde_json::Value::Array(arr) => {
814 for v in arr {
815 extract_types(v, types);
816 }
817 }
818 _ => {}
819 }
820}
821
822fn analyze_links(doc: &Html, base_domain: &str) -> LinkAnalysisResult {
825 let link_sel = Selector::parse("a[href]").unwrap();
826 let mut internal = 0;
827 let mut external = 0;
828 let mut nofollow = 0;
829 let mut total = 0;
830
831 for el in doc.select(&link_sel) {
832 total += 1;
833 if let Some(href) = el.value().attr("href") {
834 if href.starts_with("http") && !href.contains(base_domain) {
835 external += 1;
836 } else if !href.starts_with("mailto:")
837 && !href.starts_with("tel:")
838 && !href.starts_with('#')
839 {
840 internal += 1;
841 }
842 }
843 if let Some(rel) = el.value().attr("rel") {
844 if rel.contains("nofollow") {
845 nofollow += 1;
846 }
847 }
848 }
849
850 LinkAnalysisResult {
851 total_links: total,
852 internal_links: internal,
853 external_links: external,
854 nofollow_links: nofollow,
855 }
856}
857
858fn analyze_images(doc: &Html) -> ImageSeoResult {
861 let img_sel = Selector::parse("img").unwrap();
862 let images: Vec<_> = doc.select(&img_sel).collect();
863 let total = images.len();
864 let lazy = images
865 .iter()
866 .filter(|i| i.value().attr("loading") == Some("lazy"))
867 .count();
868 let alt = images
869 .iter()
870 .filter(|i| i.value().attr("alt").is_some())
871 .count();
872 let title = images
873 .iter()
874 .filter(|i| i.value().attr("title").is_some())
875 .count();
876
877 let opt_score = if total > 0 {
878 format!("{:.1}%", ((lazy + alt) as f64 / (total * 2) as f64) * 100.0)
879 } else {
880 "0%".into()
881 };
882
883 ImageSeoResult {
884 total_images: total,
885 lazy_loaded: lazy,
886 with_alt_text: alt,
887 with_title: title,
888 optimization_score: opt_score,
889 }
890}
891
892fn analyze_speed_factors(doc: &Html, headers: &reqwest::header::HeaderMap) -> PageSpeedResult {
895 let css_sel = Selector::parse("link[rel=\"stylesheet\"]").unwrap();
896 let js_sel = Selector::parse("script[src]").unwrap();
897 let style_sel = Selector::parse("style").unwrap();
898 let inline_js_sel = Selector::parse("script:not([src])").unwrap();
899
900 PageSpeedResult {
901 css_files: doc.select(&css_sel).count(),
902 js_files: doc.select(&js_sel).count(),
903 inline_styles: doc.select(&style_sel).count(),
904 inline_scripts: doc.select(&inline_js_sel).count(),
905 compression: headers
906 .get("content-encoding")
907 .and_then(|v| v.to_str().ok())
908 .unwrap_or("None")
909 .into(),
910 }
911}
912
913fn calculate_seo_score(
916 basic: &BasicSeoResult,
917 content: &ContentAnalysisResult,
918 resources: &HashMap<String, String>,
919 schema: &SchemaMarkupResult,
920 perf: &PerformanceResult,
921 mobile: &MobileAccessibilityResult,
922) -> SeoScoreResult {
923 let mut score: u32 = 0;
924
925 if basic.title.status == "Good" {
927 score += 10;
928 }
929 if basic.meta_description.status == "Good" {
930 score += 10;
931 }
932 if basic.canonical_url != "Not Found" {
933 score += 5;
934 }
935 if basic.viewport != "Not Found" {
936 score += 5;
937 }
938
939 if content.word_count_status == "Good" {
941 score += 10;
942 }
943 if content.headings.contains_key("H1") {
944 score += 10;
945 }
946
947 if resources.get("robots.txt").map(|s| s.as_str()) == Some("Found") {
949 score += 5;
950 }
951 if resources.get("sitemap.xml").map(|s| s.as_str()) == Some("Found") {
952 score += 5;
953 }
954 if schema.total_structured_data > 0 {
955 score += 10;
956 }
957
958 match perf.load_time_status.as_str() {
960 "Excellent" | "Good" => score += 15,
961 _ => {}
962 }
963
964 score += 5; if mobile.mobile_friendly {
969 score += 5;
970 }
971
972 let max_score = 100u32;
973 let pct = (score as f64 / max_score as f64) * 100.0;
974 let grade = if pct >= 90.0 {
975 "A+"
976 } else if pct >= 80.0 {
977 "A"
978 } else if pct >= 70.0 {
979 "B"
980 } else if pct >= 60.0 {
981 "C"
982 } else if pct >= 50.0 {
983 "D"
984 } else {
985 "F"
986 };
987
988 SeoScoreResult {
989 score,
990 max_score,
991 percentage: format!("{:.1}%", pct),
992 grade: grade.into(),
993 }
994}