1use regex::Regex;
2use reqwest::Client;
3use scraper::{Html, Selector};
4use serde::{Deserialize, Serialize};
5use std::collections::HashMap;
6use std::time::{Duration, Instant};
7
8const TRACKING_TOOLS: &[(&str, &[&str])] = &[
11 (
12 "Google Tag Manager",
13 &["googletagmanager.com/gtm.js", "dataLayer"],
14 ),
15 (
16 "Google Ads",
17 &["googleads.g.doubleclick.net", "googlesyndication.com"],
18 ),
19 ("Facebook Pixel", &["connect.facebook.net", "fbq("]),
20 (
21 "LinkedIn Insight",
22 &["snap.licdn.com", "_linkedin_partner_id"],
23 ),
24 ("TikTok Pixel", &["analytics.tiktok.com", "ttq."]),
25 ("Hotjar", &["static.hotjar.com", "hjid"]),
26 ("Mixpanel", &["cdn.mxpnl.com", "mixpanel.init"]),
27 ("Segment", &["cdn.segment.com", "analytics.load"]),
28 ("Intercom", &["widget.intercom.io"]),
29 ("Zendesk", &["static.zdassets.com"]),
30 ("Crisp", &["client.crisp.chat"]),
31];
32
33const SEO_RESOURCES: &[&str] = &["robots.txt", "sitemap.xml", "humans.txt", "ads.txt"];
35
36#[derive(Debug, Clone, Serialize, Deserialize)]
39pub struct SeoAnalysisResult {
40 pub domain: String,
41 pub basic_seo: BasicSeoResult,
42 pub content_analysis: ContentAnalysisResult,
43 pub technical_seo: TechnicalSeoResult,
44 pub social_media: SocialMediaResult,
45 pub analytics: HashMap<String, String>,
46 pub performance: PerformanceResult,
47 pub mobile_accessibility: MobileAccessibilityResult,
48 pub seo_resources: HashMap<String, String>,
49 pub schema_markup: SchemaMarkupResult,
50 pub link_analysis: LinkAnalysisResult,
51 pub image_seo: ImageSeoResult,
52 pub page_speed_factors: PageSpeedResult,
53 pub seo_score: SeoScoreResult,
54}
55
56#[derive(Debug, Clone, Serialize, Deserialize)]
57pub struct TitleAnalysis {
58 pub text: String,
59 pub length: usize,
60 pub status: String,
61}
62
63#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct MetaDescAnalysis {
65 pub text: String,
66 pub length: usize,
67 pub status: String,
68}
69
70#[derive(Debug, Clone, Serialize, Deserialize)]
71pub struct BasicSeoResult {
72 pub title: TitleAnalysis,
73 pub meta_description: MetaDescAnalysis,
74 pub meta_keywords: String,
75 pub canonical_url: String,
76 pub meta_robots: String,
77 pub viewport: String,
78 pub language: String,
79 pub charset: String,
80}
81
82#[derive(Debug, Clone, Serialize, Deserialize)]
83pub struct HeadingInfo {
84 pub count: usize,
85 pub texts: Vec<String>,
86}
87
88#[derive(Debug, Clone, Serialize, Deserialize)]
89pub struct KeywordInfo {
90 pub word: String,
91 pub count: usize,
92 pub density: String,
93}
94
95#[derive(Debug, Clone, Serialize, Deserialize)]
96pub struct ContentAnalysisResult {
97 pub headings: HashMap<String, HeadingInfo>,
98 pub heading_issues: Vec<String>,
99 pub word_count: usize,
100 pub word_count_status: String,
101 pub paragraphs: usize,
102 pub text_to_html_ratio: String,
103 pub top_keywords: Vec<KeywordInfo>,
104}
105
106#[derive(Debug, Clone, Serialize, Deserialize)]
107pub struct TechnicalSeoResult {
108 pub page_size_bytes: usize,
109 pub http_status: u16,
110 pub redirects: usize,
111 pub internal_links: usize,
112 pub external_links: usize,
113 pub structured_data_count: usize,
114 pub has_breadcrumbs: bool,
115}
116
117#[derive(Debug, Clone, Serialize, Deserialize)]
118pub struct SocialMediaResult {
119 pub open_graph: HashMap<String, String>,
120 pub twitter_cards: HashMap<String, String>,
121}
122
123#[derive(Debug, Clone, Serialize, Deserialize)]
124pub struct PerformanceResult {
125 pub load_time_secs: f64,
126 pub load_time_status: String,
127 pub content_size_kb: f64,
128 pub compression: String,
129 pub server: String,
130 pub cache_control: String,
131 pub etag: bool,
132}
133
134#[derive(Debug, Clone, Serialize, Deserialize)]
135pub struct AltAttributeResult {
136 pub total_images: usize,
137 pub images_with_alt: usize,
138 pub missing_alt: usize,
139 pub alt_coverage: String,
140}
141
142#[derive(Debug, Clone, Serialize, Deserialize)]
143pub struct MobileAccessibilityResult {
144 pub viewport_present: bool,
145 pub mobile_friendly: bool,
146 pub alt_attributes: AltAttributeResult,
147 pub aria_labels: usize,
148}
149
150#[derive(Debug, Clone, Serialize, Deserialize)]
151pub struct SchemaMarkupResult {
152 pub json_ld_count: usize,
153 pub json_ld_types: Vec<String>,
154 pub microdata_items: usize,
155 pub total_structured_data: usize,
156}
157
158#[derive(Debug, Clone, Serialize, Deserialize)]
159pub struct LinkAnalysisResult {
160 pub total_links: usize,
161 pub internal_links: usize,
162 pub external_links: usize,
163 pub nofollow_links: usize,
164}
165
166#[derive(Debug, Clone, Serialize, Deserialize)]
167pub struct ImageSeoResult {
168 pub total_images: usize,
169 pub lazy_loaded: usize,
170 pub with_alt_text: usize,
171 pub with_title: usize,
172 pub optimization_score: String,
173}
174
175#[derive(Debug, Clone, Serialize, Deserialize)]
176pub struct PageSpeedResult {
177 pub css_files: usize,
178 pub js_files: usize,
179 pub inline_styles: usize,
180 pub inline_scripts: usize,
181 pub compression: String,
182}
183
184#[derive(Debug, Clone, Serialize, Deserialize)]
185pub struct SeoScoreResult {
186 pub score: u32,
187 pub max_score: u32,
188 pub percentage: String,
189 pub grade: String,
190}
191
192pub async fn analyze_advanced_seo(
195 domain: &str,
196) -> Result<SeoAnalysisResult, Box<dyn std::error::Error + Send + Sync>> {
197 let url = if domain.starts_with("http") {
198 domain.to_string()
199 } else {
200 format!("https://{}", domain)
201 };
202
203 let client = Client::builder()
204 .timeout(Duration::from_secs(20))
205 .danger_accept_invalid_certs(true)
206 .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
207 .build()?;
208
209 let start = Instant::now();
210 let resp = client.get(&url).send().await?;
211 let load_time = start.elapsed().as_secs_f64();
212
213 let status_code = resp.status().as_u16();
214 let redirects = resp.url().to_string() != url; let headers = resp.headers().clone();
216 let content_bytes = resp.bytes().await?;
217 let content_size = content_bytes.len();
218 let html_text = String::from_utf8_lossy(&content_bytes).to_string();
219 let base_domain = domain
220 .replace("https://", "")
221 .replace("http://", "")
222 .replace("www.", "");
223
224 let seo_resources = check_seo_resources(&client, domain).await;
226
227 let document = Html::parse_document(&html_text);
228
229 let basic_seo = analyze_basic_seo(&document);
231
232 let content_analysis = analyze_content(&document);
234
235 let technical_seo = analyze_technical(
237 &document,
238 status_code,
239 content_size,
240 redirects as usize,
241 &base_domain,
242 );
243
244 let social_media = analyze_social_tags(&document);
246
247 let analytics = analyze_analytics(&html_text);
249
250 let performance = analyze_performance(&headers, load_time, content_size);
252
253 let mobile_accessibility = analyze_mobile(&document);
255
256 let schema_markup = analyze_schema(&document, &html_text);
258
259 let link_analysis = analyze_links(&document, &base_domain);
261
262 let image_seo = analyze_images(&document);
264
265 let page_speed_factors = analyze_speed_factors(&document, &headers);
267
268 let seo_score = calculate_seo_score(
270 &basic_seo,
271 &content_analysis,
272 &seo_resources,
273 &schema_markup,
274 &performance,
275 &mobile_accessibility,
276 );
277
278 Ok(SeoAnalysisResult {
279 domain: domain.to_string(),
280 basic_seo,
281 content_analysis,
282 technical_seo,
283 social_media,
284 analytics,
285 performance,
286 mobile_accessibility,
287 seo_resources,
288 schema_markup,
289 link_analysis,
290 image_seo,
291 page_speed_factors,
292 seo_score,
293 })
294}
295
296fn analyze_basic_seo(doc: &Html) -> BasicSeoResult {
299 let title_sel = Selector::parse("title").unwrap();
300 let title_text = doc
301 .select(&title_sel)
302 .next()
303 .map(|el| el.text().collect::<String>().trim().to_string())
304 .unwrap_or_default();
305
306 let title_len = title_text.len();
307 let title_status = if title_text.is_empty() {
308 "Missing"
309 } else if title_len < 30 {
310 "Too short"
311 } else if title_len > 60 {
312 "Too long"
313 } else {
314 "Good"
315 };
316
317 let desc = get_meta_content(doc, "name", "description");
318 let desc_len = if desc == "Not Found" { 0 } else { desc.len() };
319 let desc_status = if desc == "Not Found" {
320 "Missing"
321 } else if desc_len < 120 {
322 "Too short"
323 } else if desc_len > 160 {
324 "Too long"
325 } else {
326 "Good"
327 };
328
329 BasicSeoResult {
330 title: TitleAnalysis {
331 text: if title_text.is_empty() {
332 "Missing".into()
333 } else {
334 title_text
335 },
336 length: title_len,
337 status: title_status.into(),
338 },
339 meta_description: MetaDescAnalysis {
340 text: desc.clone(),
341 length: desc_len,
342 status: desc_status.into(),
343 },
344 meta_keywords: get_meta_content(doc, "name", "keywords"),
345 canonical_url: get_link_href(doc, "canonical"),
346 meta_robots: get_meta_content(doc, "name", "robots"),
347 viewport: get_meta_content(doc, "name", "viewport"),
348 language: doc
349 .root_element()
350 .value()
351 .attr("lang")
352 .unwrap_or("Not specified")
353 .to_string(),
354 charset: get_charset(doc),
355 }
356}
357
358fn get_meta_content(doc: &Html, attr: &str, value: &str) -> String {
359 let selector_str = format!("meta[{}=\"{}\"]", attr, value);
360 if let Ok(sel) = Selector::parse(&selector_str) {
361 if let Some(el) = doc.select(&sel).next() {
362 if let Some(content) = el.value().attr("content") {
363 return content.trim().to_string();
364 }
365 }
366 }
367 "Not Found".into()
368}
369
370fn get_link_href(doc: &Html, rel: &str) -> String {
371 let selector_str = format!("link[rel=\"{}\"]", rel);
372 if let Ok(sel) = Selector::parse(&selector_str) {
373 if let Some(el) = doc.select(&sel).next() {
374 if let Some(href) = el.value().attr("href") {
375 return href.trim().to_string();
376 }
377 }
378 }
379 "Not Found".into()
380}
381
382fn get_charset(doc: &Html) -> String {
383 if let Ok(sel) = Selector::parse("meta[charset]") {
384 if let Some(el) = doc.select(&sel).next() {
385 if let Some(cs) = el.value().attr("charset") {
386 return cs.to_string();
387 }
388 }
389 }
390 if let Ok(sel) = Selector::parse("meta[http-equiv=\"Content-Type\"]") {
391 if let Some(el) = doc.select(&sel).next() {
392 if let Some(content) = el.value().attr("content") {
393 if let Some(cs) = Regex::new(r"charset=([^;]+)")
394 .ok()
395 .and_then(|r| r.captures(content))
396 {
397 return cs.get(1).unwrap().as_str().to_string();
398 }
399 }
400 }
401 }
402 "Unknown".into()
403}
404
405fn analyze_content(doc: &Html) -> ContentAnalysisResult {
408 let mut headings = HashMap::new();
409 let mut hierarchy: Vec<(u8, String)> = Vec::new();
410
411 let h_selectors = [
412 (1u8, Selector::parse("h1").unwrap()),
413 (2, Selector::parse("h2").unwrap()),
414 (3, Selector::parse("h3").unwrap()),
415 (4, Selector::parse("h4").unwrap()),
416 (5, Selector::parse("h5").unwrap()),
417 (6, Selector::parse("h6").unwrap()),
418 ];
419
420 for (i, sel) in &h_selectors {
421 let elements: Vec<_> = doc.select(sel).collect();
422 if !elements.is_empty() {
423 let texts: Vec<String> = elements
424 .iter()
425 .take(3)
426 .map(|e| {
427 let t = e.text().collect::<String>();
428 t.trim().chars().take(100).collect()
429 })
430 .collect();
431 headings.insert(
432 format!("H{}", i),
433 HeadingInfo {
434 count: elements.len(),
435 texts,
436 },
437 );
438 for e in &elements {
439 let t = e.text().collect::<String>().trim().to_string();
440 hierarchy.push((*i, t));
441 }
442 }
443 }
444
445 let heading_issues = check_heading_issues(&hierarchy);
446
447 let text = doc.root_element().text().collect::<String>();
448 let words: Vec<&str> = text.split_whitespace().collect();
449 let word_count = words.len();
450
451 let p_sel = Selector::parse("p").unwrap();
452 let paragraphs = doc.select(&p_sel).count();
453
454 let html_len = doc.html().len();
455 let text_len = text.len();
456 let ratio = if html_len > 0 {
457 (text_len as f64 / html_len as f64) * 100.0
458 } else {
459 0.0
460 };
461
462 let top_keywords = analyze_keyword_density(&words);
463
464 ContentAnalysisResult {
465 headings,
466 heading_issues,
467 word_count,
468 word_count_status: if word_count >= 300 {
469 "Good"
470 } else {
471 "Too short"
472 }
473 .into(),
474 paragraphs,
475 text_to_html_ratio: format!("{:.1}%", ratio),
476 top_keywords,
477 }
478}
479
480fn check_heading_issues(hierarchy: &[(u8, String)]) -> Vec<String> {
481 let mut issues = Vec::new();
482 if hierarchy.is_empty() {
483 issues.push("No headings found".into());
484 return issues;
485 }
486
487 let h1_count = hierarchy.iter().filter(|(l, _)| *l == 1).count();
488 if h1_count == 0 {
489 issues.push("Missing H1 tag".into());
490 } else if h1_count > 1 {
491 issues.push(format!("Multiple H1 tags ({})", h1_count));
492 }
493
494 let mut prev = 0u8;
495 for &(level, _) in hierarchy {
496 if prev > 0 && level > prev + 1 {
497 issues.push(format!(
498 "Skipped heading level (from H{} to H{})",
499 prev, level
500 ));
501 }
502 prev = level;
503 }
504 issues
505}
506
507fn analyze_keyword_density(words: &[&str]) -> Vec<KeywordInfo> {
508 let total = words.len();
509 if total == 0 {
510 return vec![];
511 }
512
513 let mut freq: HashMap<String, usize> = HashMap::new();
514 for &w in words {
515 let lower = w.to_lowercase();
516 if lower.len() > 3 {
517 *freq.entry(lower).or_insert(0) += 1;
518 }
519 }
520
521 let mut sorted: Vec<_> = freq.into_iter().collect();
522 sorted.sort_by(|a, b| b.1.cmp(&a.1));
523
524 sorted
525 .into_iter()
526 .take(5)
527 .map(|(word, count)| KeywordInfo {
528 word,
529 count,
530 density: format!("{:.2}%", (count as f64 / total as f64) * 100.0),
531 })
532 .collect()
533}
534
535fn analyze_technical(
538 doc: &Html,
539 status: u16,
540 size: usize,
541 redirects: usize,
542 base_domain: &str,
543) -> TechnicalSeoResult {
544 let link_sel = Selector::parse("a[href]").unwrap();
545 let mut internal = 0;
546 let mut external = 0;
547
548 for el in doc.select(&link_sel) {
549 if let Some(href) = el.value().attr("href") {
550 if href.starts_with("http") && !href.contains(base_domain) {
551 external += 1;
552 } else if !href.starts_with("mailto:")
553 && !href.starts_with("tel:")
554 && !href.starts_with('#')
555 {
556 internal += 1;
557 }
558 }
559 }
560
561 let json_ld = Selector::parse("script[type=\"application/ld+json\"]")
562 .ok()
563 .map(|s| doc.select(&s).count())
564 .unwrap_or(0);
565 let microdata = Selector::parse("[itemtype]")
566 .ok()
567 .map(|s| doc.select(&s).count())
568 .unwrap_or(0);
569
570 let breadcrumb = Selector::parse("[typeof=\"BreadcrumbList\"]")
571 .ok()
572 .map(|s| doc.select(&s).next().is_some())
573 .unwrap_or(false)
574 || doc.html().to_lowercase().contains("breadcrumb");
575
576 TechnicalSeoResult {
577 page_size_bytes: size,
578 http_status: status,
579 redirects,
580 internal_links: internal,
581 external_links: external,
582 structured_data_count: json_ld + microdata,
583 has_breadcrumbs: breadcrumb,
584 }
585}
586
587fn analyze_social_tags(doc: &Html) -> SocialMediaResult {
590 let og_keys = [
591 "og:title",
592 "og:description",
593 "og:image",
594 "og:url",
595 "og:type",
596 "og:site_name",
597 ];
598 let tw_keys = [
599 "twitter:card",
600 "twitter:title",
601 "twitter:description",
602 "twitter:image",
603 "twitter:site",
604 ];
605
606 let mut og = HashMap::new();
607 for key in &og_keys {
608 og.insert(key.to_string(), get_meta_content(doc, "property", key));
609 }
610
611 let mut tw = HashMap::new();
612 for key in &tw_keys {
613 tw.insert(key.to_string(), get_meta_content(doc, "name", key));
614 }
615
616 SocialMediaResult {
617 open_graph: og,
618 twitter_cards: tw,
619 }
620}
621
622fn analyze_analytics(html: &str) -> HashMap<String, String> {
625 let mut results = HashMap::new();
626
627 let has_ga4 = Regex::new(r#"gtag\(['"]config['"],\s*['"]G-[A-Z0-9]+['"]\)"#)
629 .ok()
630 .map(|r| r.is_match(html))
631 .unwrap_or(false);
632 let has_ua = Regex::new(r#"gtag\(['"]config['"],\s*['"]UA-[0-9-]+['"]\)"#)
633 .ok()
634 .map(|r| r.is_match(html))
635 .unwrap_or(false);
636 results.insert(
637 "Google Analytics GA4".into(),
638 if has_ga4 { "Found" } else { "Not Found" }.into(),
639 );
640 results.insert(
641 "Google Analytics UA".into(),
642 if has_ua { "Found" } else { "Not Found" }.into(),
643 );
644
645 let lower = html.to_lowercase();
647 for &(name, patterns) in TRACKING_TOOLS {
648 let found = patterns.iter().any(|p| lower.contains(&p.to_lowercase()));
649 results.insert(
650 name.to_string(),
651 if found { "Found" } else { "Not Found" }.into(),
652 );
653 }
654
655 results
656}
657
658fn analyze_performance(
661 headers: &reqwest::header::HeaderMap,
662 load_time: f64,
663 size: usize,
664) -> PerformanceResult {
665 let status = if load_time < 1.0 {
666 "Excellent"
667 } else if load_time < 3.0 {
668 "Good"
669 } else {
670 "Poor"
671 };
672
673 PerformanceResult {
674 load_time_secs: (load_time * 100.0).round() / 100.0,
675 load_time_status: status.into(),
676 content_size_kb: (size as f64 / 1024.0 * 100.0).round() / 100.0,
677 compression: headers
678 .get("content-encoding")
679 .and_then(|v| v.to_str().ok())
680 .unwrap_or("None")
681 .into(),
682 server: headers
683 .get("server")
684 .and_then(|v| v.to_str().ok())
685 .unwrap_or("Unknown")
686 .into(),
687 cache_control: headers
688 .get("cache-control")
689 .and_then(|v| v.to_str().ok())
690 .unwrap_or("Not Set")
691 .into(),
692 etag: headers.contains_key("etag"),
693 }
694}
695
696fn analyze_mobile(doc: &Html) -> MobileAccessibilityResult {
699 let viewport_content = get_meta_content(doc, "name", "viewport");
700 let has_viewport = viewport_content != "Not Found";
701 let mobile_friendly = viewport_content.contains("width=device-width");
702
703 let img_sel = Selector::parse("img").unwrap();
704 let images: Vec<_> = doc.select(&img_sel).collect();
705 let total = images.len();
706 let with_alt = images
707 .iter()
708 .filter(|i| i.value().attr("alt").is_some())
709 .count();
710
711 let aria_sel = Selector::parse("[aria-label]").unwrap();
712 let aria_count = doc.select(&aria_sel).count();
713
714 MobileAccessibilityResult {
715 viewport_present: has_viewport,
716 mobile_friendly,
717 alt_attributes: AltAttributeResult {
718 total_images: total,
719 images_with_alt: with_alt,
720 missing_alt: total - with_alt,
721 alt_coverage: if total > 0 {
722 format!("{:.1}%", (with_alt as f64 / total as f64) * 100.0)
723 } else {
724 "0%".into()
725 },
726 },
727 aria_labels: aria_count,
728 }
729}
730
731async fn check_seo_resources(client: &Client, domain: &str) -> HashMap<String, String> {
734 let mut results = HashMap::new();
735 for &file in SEO_RESOURCES {
736 let url = format!("https://{}/{}", domain, file);
737 let found = match client.get(&url).send().await {
738 Ok(r) if r.status().is_success() => "Found",
739 _ => "Not Found",
740 };
741 results.insert(file.to_string(), found.into());
742 }
743 results
744}
745
746fn analyze_schema(doc: &Html, html: &str) -> SchemaMarkupResult {
749 let json_ld_sel = Selector::parse("script[type=\"application/ld+json\"]").unwrap();
750 let json_lds: Vec<_> = doc.select(&json_ld_sel).collect();
751 let json_ld_count = json_lds.len();
752
753 let mut types = Vec::new();
754 for script in &json_lds {
755 let text = script.text().collect::<String>();
756 if let Ok(val) = serde_json::from_str::<serde_json::Value>(&text) {
757 extract_types(&val, &mut types);
758 }
759 }
760
761 let microdata = Selector::parse("[itemtype]")
762 .ok()
763 .map(|s| doc.select(&s).count())
764 .unwrap_or(0);
765
766 let additional = Regex::new(r#""@type"\s*:\s*"([^"]+)""#)
768 .ok()
769 .map(|r| {
770 r.captures_iter(html)
771 .filter_map(|c| c.get(1).map(|m| m.as_str().to_string()))
772 .collect::<Vec<_>>()
773 })
774 .unwrap_or_default();
775
776 for t in additional {
777 if !types.contains(&t) {
778 types.push(t);
779 }
780 }
781
782 SchemaMarkupResult {
783 json_ld_count,
784 json_ld_types: types,
785 microdata_items: microdata,
786 total_structured_data: json_ld_count + microdata,
787 }
788}
789
790fn extract_types(val: &serde_json::Value, types: &mut Vec<String>) {
791 match val {
792 serde_json::Value::Object(map) => {
793 if let Some(t) = map.get("@type").and_then(|v| v.as_str()) {
794 types.push(t.to_string());
795 }
796 for (_, v) in map {
797 extract_types(v, types);
798 }
799 }
800 serde_json::Value::Array(arr) => {
801 for v in arr {
802 extract_types(v, types);
803 }
804 }
805 _ => {}
806 }
807}
808
809fn analyze_links(doc: &Html, base_domain: &str) -> LinkAnalysisResult {
812 let link_sel = Selector::parse("a[href]").unwrap();
813 let mut internal = 0;
814 let mut external = 0;
815 let mut nofollow = 0;
816 let mut total = 0;
817
818 for el in doc.select(&link_sel) {
819 total += 1;
820 if let Some(href) = el.value().attr("href") {
821 if href.starts_with("http") && !href.contains(base_domain) {
822 external += 1;
823 } else if !href.starts_with("mailto:")
824 && !href.starts_with("tel:")
825 && !href.starts_with('#')
826 {
827 internal += 1;
828 }
829 }
830 if let Some(rel) = el.value().attr("rel") {
831 if rel.contains("nofollow") {
832 nofollow += 1;
833 }
834 }
835 }
836
837 LinkAnalysisResult {
838 total_links: total,
839 internal_links: internal,
840 external_links: external,
841 nofollow_links: nofollow,
842 }
843}
844
845fn analyze_images(doc: &Html) -> ImageSeoResult {
848 let img_sel = Selector::parse("img").unwrap();
849 let images: Vec<_> = doc.select(&img_sel).collect();
850 let total = images.len();
851 let lazy = images
852 .iter()
853 .filter(|i| i.value().attr("loading") == Some("lazy"))
854 .count();
855 let alt = images
856 .iter()
857 .filter(|i| i.value().attr("alt").is_some())
858 .count();
859 let title = images
860 .iter()
861 .filter(|i| i.value().attr("title").is_some())
862 .count();
863
864 let opt_score = if total > 0 {
865 format!("{:.1}%", ((lazy + alt) as f64 / (total * 2) as f64) * 100.0)
866 } else {
867 "0%".into()
868 };
869
870 ImageSeoResult {
871 total_images: total,
872 lazy_loaded: lazy,
873 with_alt_text: alt,
874 with_title: title,
875 optimization_score: opt_score,
876 }
877}
878
879fn analyze_speed_factors(doc: &Html, headers: &reqwest::header::HeaderMap) -> PageSpeedResult {
882 let css_sel = Selector::parse("link[rel=\"stylesheet\"]").unwrap();
883 let js_sel = Selector::parse("script[src]").unwrap();
884 let style_sel = Selector::parse("style").unwrap();
885 let inline_js_sel = Selector::parse("script:not([src])").unwrap();
886
887 PageSpeedResult {
888 css_files: doc.select(&css_sel).count(),
889 js_files: doc.select(&js_sel).count(),
890 inline_styles: doc.select(&style_sel).count(),
891 inline_scripts: doc.select(&inline_js_sel).count(),
892 compression: headers
893 .get("content-encoding")
894 .and_then(|v| v.to_str().ok())
895 .unwrap_or("None")
896 .into(),
897 }
898}
899
900fn calculate_seo_score(
903 basic: &BasicSeoResult,
904 content: &ContentAnalysisResult,
905 resources: &HashMap<String, String>,
906 schema: &SchemaMarkupResult,
907 perf: &PerformanceResult,
908 mobile: &MobileAccessibilityResult,
909) -> SeoScoreResult {
910 let mut score: u32 = 0;
911
912 if basic.title.status == "Good" {
914 score += 10;
915 }
916 if basic.meta_description.status == "Good" {
917 score += 10;
918 }
919 if basic.canonical_url != "Not Found" {
920 score += 5;
921 }
922 if basic.viewport != "Not Found" {
923 score += 5;
924 }
925
926 if content.word_count_status == "Good" {
928 score += 10;
929 }
930 if content.headings.contains_key("H1") {
931 score += 10;
932 }
933
934 if resources.get("robots.txt").map(|s| s.as_str()) == Some("Found") {
936 score += 5;
937 }
938 if resources.get("sitemap.xml").map(|s| s.as_str()) == Some("Found") {
939 score += 5;
940 }
941 if schema.total_structured_data > 0 {
942 score += 10;
943 }
944
945 match perf.load_time_status.as_str() {
947 "Excellent" | "Good" => score += 15,
948 _ => {}
949 }
950
951 score += 5; if mobile.mobile_friendly {
956 score += 5;
957 }
958
959 let max_score = 100u32;
960 let pct = (score as f64 / max_score as f64) * 100.0;
961 let grade = if pct >= 90.0 {
962 "A+"
963 } else if pct >= 80.0 {
964 "A"
965 } else if pct >= 70.0 {
966 "B"
967 } else if pct >= 60.0 {
968 "C"
969 } else if pct >= 50.0 {
970 "D"
971 } else {
972 "F"
973 };
974
975 SeoScoreResult {
976 score,
977 max_score,
978 percentage: format!("{:.1}%", pct),
979 grade: grade.into(),
980 }
981}