use crate::types::{Confidence, ScanConfig, Severity, Vulnerability};
use std::collections::HashMap;
use tracing::info;
#[derive(Debug, Clone)]
pub struct GoogleDork {
pub category: String,
pub query: String,
pub description: String,
pub impact: String,
}
#[derive(Debug, Clone)]
pub struct GoogleDorkingResults {
pub domain: String,
pub dorks: Vec<GoogleDork>,
pub by_category: HashMap<String, Vec<GoogleDork>>,
}
pub struct GoogleDorkingScanner;
impl GoogleDorkingScanner {
pub fn new() -> Self {
Self
}
pub fn generate_dorks(&self, domain: &str) -> GoogleDorkingResults {
info!("Generating Google dorks for domain: {}", domain);
let mut dorks = Vec::new();
let clean_domain = domain
.trim()
.trim_start_matches("http://")
.trim_start_matches("https://");
dorks.push(GoogleDork {
category: "PHP Extensions".to_string(),
query: format!("site:{} ext:php inurl:?", clean_domain),
description: "Find PHP files with query parameters".to_string(),
impact: "May expose PHP endpoints accepting user input, potential injection points"
.to_string(),
});
dorks.push(GoogleDork {
category: "API Endpoints".to_string(),
query: format!(
"site:{} inurl:api | site:{}/rest | site:{}/v1 | site:{}/v2 | site:{}/v3",
clean_domain, clean_domain, clean_domain, clean_domain, clean_domain
),
description: "Discover API endpoints".to_string(),
impact: "API endpoints may expose sensitive data or functionality".to_string(),
});
dorks.push(GoogleDork {
category: "Sensitive Files".to_string(),
query: format!(
"site:\"{}\" ext:log | ext:txt | ext:conf | ext:cnf | ext:ini | ext:env | ext:sh | ext:bak | ext:backup | ext:swp | ext:old | ext:~ | ext:git | ext:svn | ext:htpasswd | ext:htaccess | ext:json",
clean_domain
),
description: "Find sensitive file extensions".to_string(),
impact: "May expose configuration files, credentials, backups, or version control data".to_string(),
});
dorks.push(GoogleDork {
category: "Sensitive Paths".to_string(),
query: format!(
"inurl:conf | inurl:env | inurl:cgi | inurl:bin | inurl:etc | inurl:root | inurl:sql | inurl:backup | inurl:admin | inurl:php site:{}",
clean_domain
),
description: "Find sensitive URL paths".to_string(),
impact: "May reveal administrative interfaces, configuration endpoints, or backup files".to_string(),
});
dorks.push(GoogleDork {
category: "Error Messages".to_string(),
query: format!(
"inurl:\"error\" | intitle:\"exception\" | intitle:\"failure\" | intitle:\"server at\" | inurl:exception | \"database error\" | \"SQL syntax\" | \"undefined index\" | \"unhandled exception\" | \"stack trace\" site:{}",
clean_domain
),
description: "Find error pages and stack traces".to_string(),
impact: "Error messages may leak sensitive information about the application stack".to_string(),
});
dorks.push(GoogleDork {
category: "XSS Prone Parameters".to_string(),
query: format!(
"inurl:q= | inurl:s= | inurl:search= | inurl:query= | inurl:keyword= | inurl:lang= inurl:& site:{}",
clean_domain
),
description: "Find parameters commonly vulnerable to XSS".to_string(),
impact: "Search and display parameters often lack proper output encoding".to_string(),
});
dorks.push(GoogleDork {
category: "Open Redirect Parameters".to_string(),
query: format!(
"inurl:url= | inurl:return= | inurl:next= | inurl:redirect= | inurl:redir= | inurl:ret= | inurl:r2= | inurl:page= inurl:& inurl:http site:{}",
clean_domain
),
description: "Find parameters prone to open redirect".to_string(),
impact: "May allow attackers to redirect users to malicious sites".to_string(),
});
dorks.push(GoogleDork {
category: "SQLi Prone Parameters".to_string(),
query: format!(
"inurl:id= | inurl:pid= | inurl:category= | inurl:cat= | inurl:action= | inurl:sid= | inurl:dir= inurl:& site:{}",
clean_domain
),
description: "Find parameters commonly vulnerable to SQL injection".to_string(),
impact: "ID and category parameters often directly interact with databases".to_string(),
});
dorks.push(GoogleDork {
category: "SSRF Prone Parameters".to_string(),
query: format!(
"inurl:http | inurl:url= | inurl:path= | inurl:dest= | inurl:html= | inurl:data= | inurl:domain= | inurl:page= inurl:& site:{}",
clean_domain
),
description: "Find parameters prone to SSRF".to_string(),
impact: "URL-accepting parameters may allow server-side request forgery".to_string(),
});
dorks.push(GoogleDork {
category: "LFI Prone Parameters".to_string(),
query: format!(
"inurl:include | inurl:dir | inurl:detail= | inurl:file= | inurl:folder= | inurl:inc= | inurl:locate= | inurl:doc= | inurl:conf= inurl:& site:{}",
clean_domain
),
description: "Find parameters prone to Local File Inclusion".to_string(),
impact: "File path parameters may allow reading arbitrary files".to_string(),
});
dorks.push(GoogleDork {
category: "RCE Prone Parameters".to_string(),
query: format!(
"inurl:cmd | inurl:exec= | inurl:query= | inurl:code= | inurl:do= | inurl:run= | inurl:read= | inurl:ping= inurl:& site:{}",
clean_domain
),
description: "Find parameters prone to Remote Code Execution".to_string(),
impact: "Command execution parameters are critical security risks".to_string(),
});
dorks.push(GoogleDork {
category: "File Upload".to_string(),
query: format!(
"site:{} intext:\"choose file\" | intext:\"select file\" | intext:\"upload PDF\"",
clean_domain
),
description: "Find file upload functionality".to_string(),
impact: "File upload features may allow arbitrary file uploads".to_string(),
});
dorks.push(GoogleDork {
category: "API Documentation".to_string(),
query: format!(
"inurl:apidocs | inurl:api-docs | inurl:swagger | inurl:api-explorer | inurl:redoc | inurl:openapi | intitle:\"Swagger UI\" site:\"{}\"",
clean_domain
),
description: "Find exposed API documentation".to_string(),
impact: "API docs reveal endpoints, parameters, and authentication methods".to_string(),
});
dorks.push(GoogleDork {
category: "Login Pages".to_string(),
query: format!(
"inurl:login | inurl:signin | intitle:login | intitle:signin | inurl:secure site:{}",
clean_domain
),
description: "Find login and authentication pages".to_string(),
impact: "Login pages are targets for credential attacks".to_string(),
});
dorks.push(GoogleDork {
category: "Test Environments".to_string(),
query: format!(
"inurl:test | inurl:env | inurl:dev | inurl:staging | inurl:sandbox | inurl:debug | inurl:temp | inurl:internal | inurl:demo site:{}",
clean_domain
),
description: "Find development and test environments".to_string(),
impact: "Non-production environments often have weaker security".to_string(),
});
dorks.push(GoogleDork {
category: "Sensitive Documents".to_string(),
query: format!(
"site:{} ext:txt | ext:pdf | ext:xml | ext:xls | ext:xlsx | ext:ppt | ext:pptx | ext:doc | ext:docx intext:\"confidential\" | intext:\"Not for Public Release\" | intext:\"internal use only\" | intext:\"do not distribute\"",
clean_domain
),
description: "Find confidential documents".to_string(),
impact: "May expose sensitive business documents and data".to_string(),
});
dorks.push(GoogleDork {
category: "PII Parameters".to_string(),
query: format!(
"inurl:email= | inurl:phone= | inurl:name= | inurl:user= inurl:& site:{}",
clean_domain
),
description: "Find parameters handling personal information".to_string(),
impact: "PII parameters may be vulnerable to enumeration or injection".to_string(),
});
dorks.push(GoogleDork {
category: "AEM Paths".to_string(),
query: format!(
"inurl:/content/usergenerated | inurl:/content/dam | inurl:/jcr:content | inurl:/libs/granite | inurl:/etc/clientlibs | inurl:/content/geometrixx | inurl:/bin/wcm | inurl:/crx/de site:{}",
clean_domain
),
description: "Find Adobe Experience Manager paths".to_string(),
impact: "AEM misconfigurations can expose admin interfaces and content".to_string(),
});
dorks.push(GoogleDork {
category: "Known Vulnerabilities".to_string(),
query: format!(
"site:openbugbounty.org inurl:reports intext:\"{}\"",
clean_domain
),
description: "Find disclosed vulnerabilities on OpenBugBounty".to_string(),
impact: "Previously reported vulnerabilities may still be unpatched".to_string(),
});
dorks.push(GoogleDork {
category: "Information Leakage".to_string(),
query: format!("site:groups.google.com \"{}\"", clean_domain),
description: "Find mentions in Google Groups".to_string(),
impact: "May reveal internal discussions, credentials, or configurations".to_string(),
});
dorks.push(GoogleDork {
category: "Code Leaks".to_string(),
query: format!("site:pastebin.com \"{}\"", clean_domain),
description: "Find code snippets on Pastebin".to_string(),
impact: "May expose credentials, API keys, or internal code".to_string(),
});
dorks.push(GoogleDork {
category: "Code Leaks".to_string(),
query: format!("site:jsfiddle.net \"{}\"", clean_domain),
description: "Find code snippets on JSFiddle".to_string(),
impact: "May expose frontend code with hardcoded credentials".to_string(),
});
dorks.push(GoogleDork {
category: "Code Leaks".to_string(),
query: format!("site:codebeautify.org \"{}\"", clean_domain),
description: "Find code snippets on CodeBeautify".to_string(),
impact: "May expose formatted code with sensitive data".to_string(),
});
dorks.push(GoogleDork {
category: "Code Leaks".to_string(),
query: format!("site:codepen.io \"{}\"", clean_domain),
description: "Find code snippets on CodePen".to_string(),
impact: "May expose frontend code with API endpoints".to_string(),
});
dorks.push(GoogleDork {
category: "Cloud Storage".to_string(),
query: format!("site:s3.amazonaws.com \"{}\"", clean_domain),
description: "Find AWS S3 buckets".to_string(),
impact: "Misconfigured S3 buckets may expose sensitive data".to_string(),
});
dorks.push(GoogleDork {
category: "Cloud Storage".to_string(),
query: format!("site:blob.core.windows.net \"{}\"", clean_domain),
description: "Find Azure Blob storage".to_string(),
impact: "Misconfigured blob storage may expose sensitive data".to_string(),
});
dorks.push(GoogleDork {
category: "Cloud Storage".to_string(),
query: format!("site:googleapis.com \"{}\"", clean_domain),
description: "Find Google Cloud Storage".to_string(),
impact: "May expose GCS buckets or API responses".to_string(),
});
dorks.push(GoogleDork {
category: "Cloud Storage".to_string(),
query: format!("site:drive.google.com \"{}\"", clean_domain),
description: "Find Google Drive files".to_string(),
impact: "Shared Drive files may contain sensitive information".to_string(),
});
dorks.push(GoogleDork {
category: "Cloud Storage".to_string(),
query: format!("site:dev.azure.com \"{}\"", clean_domain),
description: "Find Azure DevOps resources".to_string(),
impact: "May expose repositories, pipelines, or configurations".to_string(),
});
dorks.push(GoogleDork {
category: "Cloud Storage".to_string(),
query: format!("site:onedrive.live.com \"{}\"", clean_domain),
description: "Find OneDrive files".to_string(),
impact: "Shared OneDrive files may contain sensitive data".to_string(),
});
dorks.push(GoogleDork {
category: "Cloud Storage".to_string(),
query: format!("site:digitaloceanspaces.com \"{}\"", clean_domain),
description: "Find DigitalOcean Spaces".to_string(),
impact: "Misconfigured Spaces may expose sensitive files".to_string(),
});
dorks.push(GoogleDork {
category: "Cloud Storage".to_string(),
query: format!("site:sharepoint.com \"{}\"", clean_domain),
description: "Find SharePoint resources".to_string(),
impact: "May expose internal documents and files".to_string(),
});
dorks.push(GoogleDork {
category: "Cloud Storage".to_string(),
query: format!("site:s3-external-1.amazonaws.com \"{}\"", clean_domain),
description: "Find S3 external buckets".to_string(),
impact: "Additional S3 bucket configurations".to_string(),
});
dorks.push(GoogleDork {
category: "Cloud Storage".to_string(),
query: format!(
"site:s3.dualstack.us-east-1.amazonaws.com \"{}\"",
clean_domain
),
description: "Find S3 dualstack buckets".to_string(),
impact: "IPv6-enabled S3 buckets".to_string(),
});
dorks.push(GoogleDork {
category: "Cloud Storage".to_string(),
query: format!("site:dropbox.com/s \"{}\"", clean_domain),
description: "Find Dropbox shared links".to_string(),
impact: "Shared Dropbox files may contain sensitive data".to_string(),
});
dorks.push(GoogleDork {
category: "Cloud Storage".to_string(),
query: format!("site:docs.google.com inurl:\"/d/\" \"{}\"", clean_domain),
description: "Find Google Docs".to_string(),
impact: "Shared documents may contain sensitive information".to_string(),
});
dorks.push(GoogleDork {
category: "Package Repositories".to_string(),
query: format!("site:jfrog.io \"{}\"", clean_domain),
description: "Find JFrog Artifactory resources".to_string(),
impact: "May expose build artifacts or internal packages".to_string(),
});
dorks.push(GoogleDork {
category: "Cloud Services".to_string(),
query: format!("site:firebaseio.com \"{}\"", clean_domain),
description: "Find Firebase databases".to_string(),
impact: "Misconfigured Firebase may expose data without authentication".to_string(),
});
dorks.push(GoogleDork {
category: "Security Information".to_string(),
query: "site:*/security.txt \"bounty\"".to_string(),
description: "Find security.txt files mentioning bug bounty".to_string(),
impact: "Identifies targets with bug bounty programs".to_string(),
});
dorks.push(GoogleDork {
category: "Code Repositories".to_string(),
query: format!("site:github.com \"{}\"", clean_domain),
description: "Find GitHub repositories mentioning the domain".to_string(),
impact: "May expose source code, credentials, or internal tools".to_string(),
});
dorks.push(GoogleDork {
category: "Code Repositories".to_string(),
query: format!("site:gitlab.com \"{}\"", clean_domain),
description: "Find GitLab repositories mentioning the domain".to_string(),
impact: "May expose source code or configurations".to_string(),
});
dorks.push(GoogleDork {
category: "Project Management".to_string(),
query: format!("site:trello.com \"{}\"", clean_domain),
description: "Find Trello boards".to_string(),
impact: "Public Trello boards may expose project details and credentials".to_string(),
});
let mut by_category: HashMap<String, Vec<GoogleDork>> = HashMap::new();
for dork in &dorks {
by_category
.entry(dork.category.clone())
.or_default()
.push(dork.clone());
}
GoogleDorkingResults {
domain: clean_domain.to_string(),
dorks,
by_category,
}
}
pub async fn scan(
&self,
url: &str,
_config: &ScanConfig,
) -> anyhow::Result<(Vec<Vulnerability>, usize, GoogleDorkingResults)> {
let domain = extract_domain(url);
let results = self.generate_dorks(&domain);
let vuln = Vulnerability {
id: format!("google_dorking_{}", generate_uuid()),
vuln_type: "GOOGLE_DORKS_GENERATED".to_string(),
severity: Severity::Info,
confidence: Confidence::High,
category: "Reconnaissance".to_string(),
url: url.to_string(),
parameter: None,
payload: "N/A".to_string(),
description: format!(
"Generated {} Google dork queries for {} across {} categories",
results.dorks.len(),
domain,
results.by_category.len()
),
evidence: Some(format!(
"Categories: {}",
results.by_category.keys().cloned().collect::<Vec<_>>().join(", ")
)),
cwe: "CWE-200".to_string(),
cvss: 0.0,
verified: true,
false_positive: false,
remediation: "Review generated dorks manually in Google Search to find exposed resources. \
Remediate any findings by removing sensitive files, securing endpoints, or implementing \
proper access controls.".to_string(),
discovered_at: chrono::Utc::now().to_rfc3339(),
ml_data: None,
};
Ok((vec![vuln], results.dorks.len(), results))
}
pub fn format_dorks_for_display(results: &GoogleDorkingResults) -> String {
let mut output = String::new();
output.push_str(&format!(
"\n╔══════════════════════════════════════════════════════════════════╗\n"
));
output.push_str(&format!("║ GOOGLE DORKS FOR: {:<46} ║\n", results.domain));
output.push_str(&format!("║ Total Dorks: {:<51} ║\n", results.dorks.len()));
output.push_str(&format!(
"╚══════════════════════════════════════════════════════════════════╝\n\n"
));
let categories: Vec<&String> = {
let mut cats: Vec<_> = results.by_category.keys().collect();
cats.sort();
cats
};
for category in categories {
if let Some(dorks) = results.by_category.get(category) {
output.push_str(&format!("┌─ {} ({} dorks)\n", category, dorks.len()));
output.push_str("│\n");
for dork in dorks {
output.push_str(&format!("│ 📝 {}\n", dork.description));
output.push_str(&format!("│ 🔍 {}\n", dork.query));
output.push_str(&format!("│ ⚠️ Impact: {}\n", dork.impact));
output.push_str("│\n");
}
output.push_str(
"└────────────────────────────────────────────────────────────────────\n\n",
);
}
}
output
}
pub fn format_dorks_as_json(results: &GoogleDorkingResults) -> serde_json::Value {
let dorks_json: Vec<serde_json::Value> = results
.dorks
.iter()
.map(|d| {
serde_json::json!({
"category": d.category,
"query": d.query,
"description": d.description,
"impact": d.impact
})
})
.collect();
serde_json::json!({
"domain": results.domain,
"total_dorks": results.dorks.len(),
"categories": results.by_category.keys().collect::<Vec<_>>(),
"dorks": dorks_json
})
}
}
impl Default for GoogleDorkingScanner {
fn default() -> Self {
Self::new()
}
}
fn extract_domain(url: &str) -> String {
let url = url.trim();
let without_scheme = url
.trim_start_matches("http://")
.trim_start_matches("https://");
if let Some(slash_pos) = without_scheme.find('/') {
without_scheme[..slash_pos].to_string()
} else {
without_scheme.to_string()
}
}
fn generate_uuid() -> String {
use rand::Rng;
let mut rng = rand::rng();
format!(
"{:08x}-{:04x}-{:04x}-{:04x}-{:012x}",
rng.random::<u32>(),
rng.random::<u16>(),
rng.random::<u16>(),
rng.random::<u16>(),
rng.random::<u64>() & 0xffffffffffff
)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extract_domain() {
assert_eq!(extract_domain("https://example.com"), "example.com");
assert_eq!(extract_domain("https://example.com/path"), "example.com");
assert_eq!(extract_domain("http://sub.example.com"), "sub.example.com");
assert_eq!(extract_domain("example.com"), "example.com");
}
#[test]
fn test_generate_dorks() {
let scanner = GoogleDorkingScanner::new();
let results = scanner.generate_dorks("example.com");
assert!(!results.dorks.is_empty());
assert!(!results.by_category.is_empty());
assert_eq!(results.domain, "example.com");
}
#[test]
fn test_dorks_contain_domain() {
let scanner = GoogleDorkingScanner::new();
let results = scanner.generate_dorks("test.example.org");
let dorks_with_domain = results
.dorks
.iter()
.filter(|d| d.query.contains("test.example.org") || d.query.contains("example"))
.count();
assert!(dorks_with_domain > results.dorks.len() / 2);
}
#[test]
fn test_categories_exist() {
let scanner = GoogleDorkingScanner::new();
let results = scanner.generate_dorks("example.com");
assert!(results.by_category.contains_key("API Endpoints"));
assert!(results.by_category.contains_key("Sensitive Files"));
assert!(results.by_category.contains_key("Cloud Storage"));
}
#[test]
fn test_format_for_display() {
let scanner = GoogleDorkingScanner::new();
let results = scanner.generate_dorks("example.com");
let output = GoogleDorkingScanner::format_dorks_for_display(&results);
assert!(output.contains("example.com"));
assert!(output.contains("GOOGLE DORKS"));
}
#[test]
fn test_format_as_json() {
let scanner = GoogleDorkingScanner::new();
let results = scanner.generate_dorks("example.com");
let json = GoogleDorkingScanner::format_dorks_as_json(&results);
assert!(json.get("domain").is_some());
assert!(json.get("dorks").is_some());
assert!(json.get("total_dorks").is_some());
}
}