web_analyzer/
geo_analysis.rs1use reqwest::Client;
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4use std::time::Duration;
5
6#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct GeoAnalysisResult {
10 pub domain: String,
11 pub llms_txt: LlmsTxtResult,
12 pub webmcp: WebMcpResult,
13 pub ai_crawler_directives: AiCrawlerResult,
14 pub geo_score: u32,
15 pub geo_grade: String,
16}
17
18#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct LlmsTxtResult {
20 pub found: bool,
21 pub files: Vec<String>,
22}
23
24#[derive(Debug, Clone, Serialize, Deserialize)]
25pub struct WebMcpResult {
26 pub found: bool,
27 pub endpoints: Vec<String>,
28 pub html_features: Vec<String>,
29}
30
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub struct AiCrawlerResult {
33 pub status: String,
34 pub bots: HashMap<String, String>,
35}
36
37const AI_BOTS: &[&str] = &[
40 "GPTBot",
41 "ChatGPT-User",
42 "ClaudeBot",
43 "Claude-Web",
44 "Applebot-Extended",
45 "OAI-SearchBot",
46 "PerplexityBot",
47];
48
49pub async fn analyze_geo(
52 domain: &str,
53 progress_tx: Option<tokio::sync::mpsc::Sender<crate::ScanProgress>>,
54) -> Result<GeoAnalysisResult, Box<dyn std::error::Error + Send + Sync>> {
55 let base_url = if domain.starts_with("http") {
56 domain.to_string()
57 } else {
58 format!("https://{}", domain)
59 };
60
61 let client = Client::builder()
62 .timeout(Duration::from_secs(10))
63 .danger_accept_invalid_certs(true)
64 .build()?;
65
66 if let Some(t) = &progress_tx { let _ = t.send(crate::ScanProgress { module: "Geo Analysis".into(), percentage: 10.0, message: "Checking for llms.txt presence...".into(), status: "Info".into() }).await; }
68 let llms_paths = ["/llms.txt", "/llms-full.txt", "/.well-known/llms.txt"];
69 let mut llms_found = Vec::new();
70 for path in &llms_paths {
71 let url = format!("{}{}", base_url.trim_end_matches('/'), path);
72 if let Ok(resp) = client.get(&url).send().await {
73 if resp.status().is_success() {
74 let ct = resp
75 .headers()
76 .get("content-type")
77 .and_then(|v| v.to_str().ok())
78 .unwrap_or("")
79 .to_lowercase();
80 if ct.contains("text/plain") || ct.contains("text/html") {
81 llms_found.push(path.to_string());
82 }
83 }
84 }
85 }
86
87 if let Some(t) = &progress_tx { let _ = t.send(crate::ScanProgress { module: "Geo Analysis".into(), percentage: 40.0, message: "Scanning for Model Context Protocol (MCP) endpoints...".into(), status: "Info".into() }).await; }
89 let mcp_paths = ["/.well-known/mcp", "/mcp.json"];
90 let mut mcp_found = Vec::new();
91 for path in &mcp_paths {
92 let url = format!("{}{}", base_url.trim_end_matches('/'), path);
93 if let Ok(resp) = client.get(&url).send().await {
94 if resp.status().is_success() {
95 mcp_found.push(path.to_string());
96 }
97 }
98 }
99
100 let mut html_features = Vec::new();
102 if let Ok(resp) = client.get(&base_url).send().await {
103 if resp.status().is_success() {
104 if let Ok(html) = resp.text().await {
105 if html.contains("navigator.modelContext") {
106 html_features.push("navigator.modelContext API".to_string());
107 }
108 let lower = html.to_lowercase();
109 if lower.contains("webmcp") || lower.contains("model context protocol") {
110 html_features
111 .push("WebMCP/Model Context Protocol references in HTML".to_string());
112 }
113 }
114 }
115 }
116
117 let mcp_has_anything = !mcp_found.is_empty() || !html_features.is_empty();
118
119 if let Some(t) = &progress_tx { let _ = t.send(crate::ScanProgress { module: "Geo Analysis".into(), percentage: 70.0, message: "Analyzing AI crawler directives in robots.txt...".into(), status: "Info".into() }).await; }
121 let mut directives: HashMap<String, String> = AI_BOTS
122 .iter()
123 .map(|b| (b.to_string(), "Unknown".into()))
124 .collect();
125
126 let robots_url = format!("{}/robots.txt", base_url.trim_end_matches('/'));
127 if let Ok(resp) = client.get(&robots_url).send().await {
128 if resp.status().is_success() {
129 if let Ok(body) = resp.text().await {
130 let mut current_agent: Option<String> = None;
131 for line in body.lines() {
132 let line = line.trim();
133 if line.is_empty() || line.starts_with('#') {
134 continue;
135 }
136 let lower = line.to_lowercase();
137
138 if lower.starts_with("user-agent:") {
139 let agent = line.split(':').nth(1).unwrap_or("").trim().to_string();
140 if AI_BOTS.iter().any(|b| *b == agent) {
141 current_agent = Some(agent);
142 } else {
143 current_agent = None;
144 }
145 } else if let Some(ref agent) = current_agent {
146 if lower.starts_with("disallow:") {
147 let path = line.split(':').nth(1).unwrap_or("").trim();
148 if path == "/" {
149 directives.insert(agent.clone(), "Blocked".into());
150 } else if directives.get(agent).map(|s| s.as_str()) == Some("Unknown") {
151 directives.insert(agent.clone(), "Partially Blocked".into());
152 }
153 } else if lower.starts_with("allow:")
154 && directives.get(agent).map(|s| s.as_str()) == Some("Unknown") {
155 directives.insert(agent.clone(), "Allowed".into());
156 }
157 }
158 }
159 for (_, v) in directives.iter_mut() {
161 if *v == "Unknown" {
162 *v = "Allowed (Implicit)".into();
163 }
164 }
165 }
166 }
167 }
168
169 let blocked_count = directives
170 .values()
171 .filter(|v| v.contains("Blocked"))
172 .count();
173 let crawler_status = if blocked_count > AI_BOTS.len() / 2 {
174 "Restrictive"
175 } else {
176 "Permissive"
177 };
178
179 if let Some(t) = &progress_tx { let _ = t.send(crate::ScanProgress { module: "Geo Analysis".into(), percentage: 90.0, message: "Calculating Geofencing AI readiness score...".into(), status: "Info".into() }).await; }
181 let mut score: u32 = 0;
182
183 if !llms_found.is_empty() {
185 score += 20 + (llms_found.len() as u32 * 10).min(20);
186 }
187
188 if mcp_has_anything {
190 score += 20;
191 if !mcp_found.is_empty() {
192 score += 10;
193 }
194 if !html_features.is_empty() {
195 score += 10;
196 }
197 }
198
199 if crawler_status == "Permissive" {
201 score += 20;
202 }
203
204 let grade = match score {
205 80..=100 => "A (Excellent)".into(),
206 60..=79 => "B (Good)".into(),
207 40..=59 => "C (Fair)".into(),
208 20..=39 => "D (Poor)".into(),
209 _ => "F (None)".into(),
210 };
211
212 Ok(GeoAnalysisResult {
213 domain: domain.to_string(),
214 llms_txt: LlmsTxtResult {
215 found: !llms_found.is_empty(),
216 files: llms_found,
217 },
218 webmcp: WebMcpResult {
219 found: mcp_has_anything,
220 endpoints: mcp_found,
221 html_features,
222 },
223 ai_crawler_directives: AiCrawlerResult {
224 status: crawler_status.to_string(),
225 bots: directives,
226 },
227 geo_score: score,
228 geo_grade: grade,
229 })
230}