use super::common::*;
use crate::error::{Error, Result};
use crate::types::web_search::WebSearchRequest;
const MAX_QUERY_LENGTH: usize = 1000;
const MIN_QUERY_LENGTH: usize = 1;
const MAX_RESULTS: u32 = 100;
const MIN_RESULTS: u32 = 1;
pub fn validate_web_search_request(request: &WebSearchRequest) -> Result<()> {
validate_non_empty_string(&request.query, "query")?;
validate_string_length(&request.query, "query", MIN_QUERY_LENGTH, MAX_QUERY_LENGTH)?;
validate_query_content(&request.query)?;
if let Some(num_results) = request.num_results {
validate_numeric_range(num_results, "num_results", MIN_RESULTS, MAX_RESULTS)?;
}
Ok(())
}
fn validate_query_content(query: &str) -> Result<()> {
let trimmed = query.trim();
if trimmed.is_empty() {
return Err(Error::ConfigError(
"Search query cannot be empty or contain only whitespace".to_string(),
));
}
let has_alphanumeric = trimmed.chars().any(|c| c.is_alphanumeric());
if !has_alphanumeric {
return Err(Error::ConfigError(
"Search query must contain at least some alphanumeric characters".to_string(),
));
}
validate_query_safety(trimmed)?;
Ok(())
}
fn validate_query_safety(query: &str) -> Result<()> {
let query_lower = query.to_lowercase();
let problematic_patterns = [
"javascript:",
"data:",
"vbscript:",
"file:",
"ftp:",
"<script",
"</script",
"onclick",
"onerror",
"onload",
"onmouseover",
"eval(",
"alert(",
"confirm(",
"prompt(",
];
for pattern in &problematic_patterns {
if query_lower.contains(pattern) {
return Err(Error::ConfigError(format!(
"Search query contains potentially unsafe content: {}",
pattern
)));
}
}
if has_excessive_repetition(query) {
return Err(Error::ConfigError(
"Search query appears to contain excessive repetitive content".to_string(),
));
}
if looks_like_url_injection(query) {
return Err(Error::ConfigError(
"Search query appears to contain URL injection patterns".to_string(),
));
}
Ok(())
}
pub fn has_excessive_repetition(query: &str) -> bool {
let chars: Vec<char> = query.chars().collect();
if chars.len() < 10 {
return false;
}
let mut consecutive_count = 1;
let mut max_consecutive = 1;
for i in 1..chars.len() {
if chars[i] == chars[i - 1] {
consecutive_count += 1;
max_consecutive = max_consecutive.max(consecutive_count);
} else {
consecutive_count = 1;
}
}
max_consecutive > 10
}
pub fn looks_like_url_injection(query: &str) -> bool {
let http_count = query.matches("http").count();
let url_count = query.matches("://").count();
http_count > 2 || url_count > 2
}
pub fn validate_and_suggest_query_improvement(query: &str) -> Result<Vec<String>> {
validate_web_search_request(&WebSearchRequest {
query: query.to_string(),
num_results: None,
})?;
let mut suggestions = Vec::new();
if query.len() < 5 {
suggestions
.push("Consider using a more specific search query for better results".to_string());
}
if query.len() < 15
&& !query.contains('+')
&& !query.contains('-')
&& query.chars().filter(|c| c.is_whitespace()).count() == 0
{
suggestions.push("Consider adding more keywords to your search query".to_string());
}
if query.to_lowercase().starts_with("what is")
|| query.to_lowercase().starts_with("how do")
|| query.to_lowercase().starts_with("why does")
{
suggestions
.push("Your query looks like a question - consider rephrasing as keywords".to_string());
}
Ok(suggestions)
}
pub fn estimate_query_complexity(query: &str) -> u8 {
let mut complexity = 1u8;
if query.len() >= 50 {
complexity += 1;
}
if query.contains('"') {
complexity += 1;
}
if query.contains("AND:") || query.contains("OR:") || query.contains("NOT:") {
complexity += 2;
}
if query.to_lowercase().contains("site:") {
complexity += 1;
}
if query.to_lowercase().contains("filetype:") {
complexity += 1;
}
complexity.min(8) }
pub fn validate_results_for_complexity(request: &WebSearchRequest) -> Result<()> {
let complexity = estimate_query_complexity(&request.query);
if let Some(num_results) = request.num_results {
let max_allowed = match complexity {
1..=2 => 100, 3..=4 => 50, 5..=6 => 25, _ => 10, };
if num_results > max_allowed {
return Err(Error::ConfigError(format!(
"Query complexity ({}) limits maximum results to {}. Consider simplifying your query or requesting fewer results.",
complexity, max_allowed
)));
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
fn create_valid_web_search_request() -> WebSearchRequest {
WebSearchRequest {
query: "rust programming language".to_string(),
num_results: Some(10),
}
}
#[test]
fn test_validate_web_search_request_valid() {
let request = create_valid_web_search_request();
assert!(validate_web_search_request(&request).is_ok());
}
#[test]
fn test_validate_web_search_request_empty_query() {
let mut request = create_valid_web_search_request();
request.query = "".to_string();
assert!(validate_web_search_request(&request).is_err());
}
#[test]
fn test_validate_web_search_request_whitespace_query() {
let mut request = create_valid_web_search_request();
request.query = " ".to_string();
assert!(validate_web_search_request(&request).is_err());
}
#[test]
fn test_validate_web_search_request_tab_newline_query() {
let mut request = create_valid_web_search_request();
request.query = "\t\n\r".to_string();
assert!(validate_web_search_request(&request).is_err());
}
#[test]
fn test_validate_web_search_request_minimum_length() {
let mut request = create_valid_web_search_request();
request.query = "a".to_string();
assert!(validate_web_search_request(&request).is_ok());
}
#[test]
fn test_validate_web_search_request_maximum_length() {
let mut request = create_valid_web_search_request();
let base = "This is a test query with varied content to avoid repetition detection ";
let query = base.repeat(20); request.query = query[..1000].to_string();
assert_eq!(request.query.len(), 1000);
assert!(validate_web_search_request(&request).is_ok());
}
#[test]
fn test_validate_web_search_request_too_long() {
let mut request = create_valid_web_search_request();
request.query = "a".repeat(1001);
assert!(validate_web_search_request(&request).is_err());
}
#[test]
fn test_validate_web_search_request_no_alphanumeric() {
let test_cases = ["!@#$%^&*()", "-----", " ", "!@# $%^", "....", "???"];
for query in test_cases {
let mut request = create_valid_web_search_request();
request.query = query.to_string();
assert!(validate_web_search_request(&request).is_err());
}
}
#[test]
fn test_validate_web_search_request_with_alphanumeric() {
let test_cases = [
"test123",
"hello world",
"rust programming",
"a!b@c#",
"123 456",
];
for query in test_cases {
let mut request = create_valid_web_search_request();
request.query = query.to_string();
assert!(validate_web_search_request(&request).is_ok());
}
}
#[test]
fn test_validate_web_search_request_javascript_injection() {
let injection_attempts = [
"test javascript:alert('xss')",
"javascript:void(0)",
"JAVASCRIPT:alert(1)",
"test javascript:document.cookie",
];
for query in injection_attempts {
let mut request = create_valid_web_search_request();
request.query = query.to_string();
assert!(validate_web_search_request(&request).is_err());
}
}
#[test]
fn test_validate_web_search_request_data_uri() {
let data_uri_attempts = [
"test data:text/html,<script>alert(1)</script>",
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==",
"DATA:application/json,{\"malicious\":true}",
];
for query in data_uri_attempts {
let mut request = create_valid_web_search_request();
request.query = query.to_string();
assert!(validate_web_search_request(&request).is_err());
}
}
#[test]
fn test_validate_web_search_request_script_tags() {
let script_attempts = [
"test <script>alert('xss')</script>",
"<script>document.location='http://evil.com'</script>",
"test <SCRIPT>alert(1)</SCRIPT>",
"<script src='evil.js'></script>",
];
for query in script_attempts {
let mut request = create_valid_web_search_request();
request.query = query.to_string();
assert!(validate_web_search_request(&request).is_err());
}
}
#[test]
fn test_validate_web_search_request_event_handlers() {
let event_attempts = [
"test onclick='alert(1)'",
"onerror='malicious()'",
"onload='document.cookie'",
"ONCLICK='xss()'",
"test onmouseover='dangerous()'",
];
for query in event_attempts {
let mut request = create_valid_web_search_request();
request.query = query.to_string();
assert!(validate_web_search_request(&request).is_err());
}
}
#[test]
fn test_validate_web_search_request_eval_attempts() {
let eval_attempts = [
"eval('alert(1)')",
"test eval('malicious code')",
" EVAL('dangerous') ",
"window.eval('xss')",
];
for query in eval_attempts {
let mut request = create_valid_web_search_request();
request.query = query.to_string();
assert!(validate_web_search_request(&request).is_err());
}
}
#[test]
fn test_validate_web_search_request_valid_results() {
let test_cases = [1, 5, 10, 25, 50, 100];
for num_results in test_cases {
let mut request = create_valid_web_search_request();
request.num_results = Some(num_results);
assert!(validate_web_search_request(&request).is_ok());
}
}
#[test]
fn test_validate_web_search_request_too_many_results() {
let mut request = create_valid_web_search_request();
request.num_results = Some(101);
assert!(validate_web_search_request(&request).is_err());
request.num_results = Some(1000);
assert!(validate_web_search_request(&request).is_err());
}
#[test]
fn test_validate_web_search_request_zero_results() {
let mut request = create_valid_web_search_request();
request.num_results = Some(0);
assert!(validate_web_search_request(&request).is_err());
}
#[test]
fn test_validate_web_search_request_no_results() {
let mut request = create_valid_web_search_request();
request.num_results = None;
assert!(validate_web_search_request(&request).is_ok());
}
#[test]
fn test_has_excessive_repetition() {
let test_cases = [
("normal query", false),
("hello world", false),
("aaaaaaaaaaaaaaaaaaa", true),
("bbbbbbbbbbbbbbbbbbbb", true),
("aa aa aa aa", false), ("hello!!!!!", false), ("??????????????", true),
("$$$$$$$$$$$$$$$$$$", true),
("test", false),
("a", false), ];
for (query, expected) in test_cases {
assert_eq!(
has_excessive_repetition(query),
expected,
"Query '{}' should have excessive repetition: {}",
query,
expected
);
}
}
#[test]
fn test_looks_like_url_injection() {
let test_cases = [
("normal search query", false),
("rust programming tutorial", false),
("http://example.com", false),
("https://example.com test", false),
(
"http://example.com http://malicious.com http://spam.com",
true,
),
(
"https://site1.com https://site2.com https://site3.com",
true,
),
("http://test.com https://test2.com", false), ("://not-a-url", false),
("test http://example.com", false),
];
for (query, expected) in test_cases {
assert_eq!(
looks_like_url_injection(query),
expected,
"Query '{}' should look like URL injection: {}",
query,
expected
);
}
}
#[test]
fn test_estimate_query_complexity() {
let test_cases = [
("simple", 1),
("rust programming", 1),
("longer query with more words than average length that exceeds fifty characters", 2),
("\"exact phrase\" search", 2),
("site:example.com search", 2),
("filetype:pdf documents", 2),
("AND:this OR:that NOT:other", 3),
("\"phrase\" site:example.com AND:search", 5),
("AND:complex OR:query site:example.com filetype:pdf \"exact phrase\"", 7),
("AND:very OR:complex AND:query OR:with AND:multiple OR:operators site:test.com filetype:pdf \"phrase\" \"another phrase\"", 7),
];
for (query, expected) in test_cases {
let actual = estimate_query_complexity(query);
assert_eq!(
actual, expected,
"Query '{}' complexity should be {}, got {}",
query, expected, actual
);
}
}
#[test]
fn test_validate_results_for_complexity() {
let simple_request = WebSearchRequest {
query: "simple search".to_string(),
num_results: Some(100),
};
assert!(validate_results_for_complexity(&simple_request).is_ok());
let moderate_request = WebSearchRequest {
query: "\"exact phrase\" search".to_string(),
num_results: Some(50),
};
assert!(validate_results_for_complexity(&moderate_request).is_ok());
let complex_request = WebSearchRequest {
query: "AND:complex OR:query site:example.com".to_string(),
num_results: Some(25),
};
assert!(validate_results_for_complexity(&complex_request).is_ok());
let very_complex_request = WebSearchRequest {
query: "AND:very OR:complex AND:query OR:with AND:multiple site:test.com filetype:pdf \"phrase\""
.to_string(),
num_results: Some(15),
};
assert!(validate_results_for_complexity(&very_complex_request).is_err());
let too_many_results_request = WebSearchRequest {
query: "AND:complex OR:query site:example.com filetype:pdf \"exact phrase\""
.to_string(),
num_results: Some(50),
};
assert!(validate_results_for_complexity(&too_many_results_request).is_err());
}
#[test]
fn test_validate_and_suggest_query_improvement() {
let suggestions = validate_and_suggest_query_improvement("rust").unwrap();
assert!(!suggestions.is_empty());
assert!(suggestions.iter().any(|s| s.contains("more specific")));
let suggestions = validate_and_suggest_query_improvement("hello").unwrap();
assert!(!suggestions.is_empty());
assert!(suggestions.iter().any(|s| s.contains("keywords")));
let suggestions = validate_and_suggest_query_improvement("what is rust").unwrap();
assert!(!suggestions.is_empty());
assert!(suggestions.iter().any(|s| s.contains("question")));
let suggestions = validate_and_suggest_query_improvement(
"rust programming tutorial guide advanced comprehensive",
)
.unwrap();
assert_eq!(suggestions.len(), 0);
let suggestions =
validate_and_suggest_query_improvement("rust+programming+tutorial").unwrap();
assert_eq!(suggestions.len(), 0);
}
#[test]
fn test_validate_web_search_request_unicode() {
let unicode_queries = [
"программирование на rust", "rust编程语言", "rustプログラミング", "rust programmación", "rust 🦀 programming", ];
for query in unicode_queries {
let mut request = create_valid_web_search_request();
request.query = query.to_string();
assert!(validate_web_search_request(&request).is_ok());
}
}
#[test]
fn test_validate_web_search_request_edge_cases() {
let mut request = create_valid_web_search_request();
request.query = "rust programming 🦀 tutorial 2024".to_string();
assert!(validate_web_search_request(&request).is_ok());
let mut request = create_valid_web_search_request();
request.query = "rust 1.75 programming tutorial".to_string();
assert!(validate_web_search_request(&request).is_ok());
let mut request = create_valid_web_search_request();
request.query = "rust + programming - tutorial".to_string();
assert!(validate_web_search_request(&request).is_ok());
}
#[test]
fn test_validate_web_search_request_comprehensive() {
let mut request = create_valid_web_search_request();
request.query = "rust programming language tutorial for beginners 2024".to_string();
request.num_results = Some(25);
assert!(validate_web_search_request(&request).is_ok());
assert!(validate_results_for_complexity(&request).is_ok());
let suggestions = validate_and_suggest_query_improvement(&request.query).unwrap();
assert_eq!(suggestions.len(), 0); }
}