use std::{collections::HashMap, sync::LazyLock};
use regex::RegexBuilder;
use super::{Val, ValType};
pub(crate) type SplitPredType = fn(Val, Val) -> Val;
pub(crate) struct SplitPred;
impl SplitPred {
const JOIN_PRED_MAP: LazyLock<HashMap<&'static str, SplitPredType>> = LazyLock::new(|| {
HashMap::from([
("-split", isplit as _),
("-isplit", isplit as _),
("-csplit", csplit as _),
])
});
pub(crate) fn get(name: &str) -> Option<SplitPredType> {
Self::JOIN_PRED_MAP.get(name).copied()
}
}
pub fn powershell_split(
input: &str,
pattern: Option<String>,
max_splits: Option<usize>,
case_insensitive: bool,
) -> Result<Vec<String>, regex::Error> {
let pattern = pattern.unwrap_or(r"\s+".to_string());
if pattern.starts_with("(") && pattern.ends_with(")") {
let Some(pat) = pattern
.strip_prefix("(")
.unwrap_or_default()
.strip_suffix(")")
else {
return Ok(vec![]);
};
return powershell_split_preserve_delimeter(
input,
pat.to_string(),
max_splits,
case_insensitive,
);
}
let re = RegexBuilder::new(&pattern)
.case_insensitive(case_insensitive)
.build()?;
let result = if let Some(limit) = max_splits {
re.splitn(input, limit).map(|s| s.to_string()).collect()
} else {
re.split(input).map(|s| s.to_string()).collect()
};
Ok(result)
}
pub fn powershell_split_preserve_delimeter(
input: &str,
pattern: String,
max_splits: Option<usize>,
case_insensitive: bool,
) -> Result<Vec<String>, regex::Error> {
let re = RegexBuilder::new(&pattern)
.case_insensitive(case_insensitive)
.build()?;
let mut result = Vec::new();
let mut last_end = 0;
for (splits, mat) in re.find_iter(input).enumerate() {
if let Some(limit) = max_splits
&& splits >= limit
{
break;
}
if last_end < mat.start() {
result.push(input[last_end..mat.start()].to_string());
}
result.push(mat.as_str().to_string());
last_end = mat.end();
}
if last_end < input.len() {
result.push(input[last_end..].to_string());
}
Ok(result)
}
pub fn split_input_is_null(input: Vec<Val>) -> Val {
let mut res = vec![];
for i in input.into_iter() {
let s = i.cast_to_string();
res.extend(s.split_whitespace().map(|word| Val::String(word.into())));
}
Val::Array(res)
}
pub fn split(input: Val, args: Val, case_insensitive: bool) -> Val {
if input.ttype() == ValType::Null {
return split_input_is_null(args.flatten());
}
let args = args.cast_to_array();
let mut pattern = None;
let mut max_splits = None;
if !args.is_empty() {
pattern = Some(args[0].cast_to_string())
}
if args.len() > 1 {
let Ok(splits) = args[1].cast_to_int() else {
return Val::Null;
};
max_splits = Some(splits as usize);
}
let mut res = vec![];
let input_array = input.cast_to_array();
for i in input_array.into_iter() {
if let Ok(v) = powershell_split(
&i.cast_to_string(),
pattern.clone(),
max_splits,
case_insensitive,
) {
res.append(&mut v.iter().map(Val::from).collect::<Vec<_>>());
}
}
if res.is_empty() {
Val::Null
} else if res.len() == 1 {
res[0].clone()
} else {
Val::Array(res)
}
}
pub fn isplit(input: Val, args: Val) -> Val {
log::trace!("split: {:?} {:?}", input, args);
split(input, args, true)
}
pub fn csplit(input: Val, args: Val) -> Val {
split(input, args, false)
}
#[cfg(test)]
mod tests {
use crate::{NEWLINE, PowerShellSession, Variables};
#[test]
fn test_split_empty_input() {
let mut p = PowerShellSession::new().with_variables(Variables::new().values_persist());
assert_eq!(
p.safe_eval(r#" -sPlit "red yellow blue green" "#).unwrap(),
vec!["red", "yellow", "blue", "green"].join(NEWLINE)
);
assert_eq!(
p.safe_eval(r#" -split ("red", "yellow blue green") "#)
.unwrap(),
vec!["red", "yellow", "blue", "green"].join(NEWLINE)
);
assert_eq!(
p.safe_eval(r#" -split ("red", "yellow blue green"), 2 "#)
.unwrap(),
vec!["red", "yellow", "blue", "green", "2"].join(NEWLINE)
);
assert_eq!(
p.safe_eval(r#" -split @("red", "yellow blue green") "#)
.unwrap(),
vec!["red", "yellow", "blue", "green"].join(NEWLINE)
);
assert_eq!(
p.safe_eval(r#" -split @("red", "yellow blue green"), 2 "#)
.unwrap(),
vec!["red", "yellow", "blue", "green", "2"].join(NEWLINE)
);
let input = r#"
$nestedData = @{
Users = @(
@{ Name = "Alice"; Age = 30; Skills = @("PowerShell", "Python") }
@{ Name = "Bob"; Age = 25; Skills = @("Java", "C#") }
)
Settings = @{
Theme = "Dark"
Language = "en-US"
}
}
$scriptBlock = {
param($x, $y)
return $x + $y
}
"#;
p.parse_script(input).unwrap();
assert_eq!(
p.safe_eval(r#" -split ("red", ("yellow blue green", 1, $scriptblock, $nesteddata)) "#)
.unwrap(),
vec![
"red",
"yellow",
"blue",
"green",
"1",
"param($x,",
"$y)",
"return",
"$x",
"+",
"$y",
"System.Collections.Hashtable"
]
.join(NEWLINE)
);
}
#[test]
fn test_split_typical() {
assert_eq!(
PowerShellSession::new()
.safe_eval(r#" ("rredd", ("yellow blue green", 1)) -split "e" "#)
.unwrap(),
vec!["rr", "dd", "y", "llow blu", " gr", "", "n 1"].join(NEWLINE)
);
assert_eq!(PowerShellSession::new().safe_eval(r#" $c = "Mercury,Venus,Earth,Mars,Jupiter,Saturn,Uranus,Neptune";$c -split ",", 5 "#).unwrap(),
vec!["Mercury", "Venus", "Earth", "Mars", "Jupiter,Saturn,Uranus,Neptune"].join(NEWLINE));
assert_eq!(
PowerShellSession::new()
.safe_eval(r#" "Lastname:FirstName:Address" -split ":" "#)
.unwrap(),
vec!["Lastname", "FirstName", "Address"].join(NEWLINE)
);
assert_eq!(
PowerShellSession::new()
.safe_eval(r#" "Lastname:FirstName:Address" -split "(:)" "#)
.unwrap(),
vec!["Lastname", ":", "FirstName", ":", "Address"].join(NEWLINE)
);
assert_eq!(
PowerShellSession::new()
.safe_eval(r#" [string] (-isplit @('a,b c','1 2,3,4,5', '5,6,7,8')) "#)
.unwrap(),
"a,b c 1 2,3,4,5 5,6,7,8".to_string()
);
assert_eq!(
PowerShellSession::new()
.safe_eval(r#" $c = 'a,b,c','1,2,3,4,5', '5,6,7,8';[string]($c -split ',', 2) "#)
.unwrap(),
"a b,c 1 2,3,4,5 5 6,7,8".to_string()
);
assert_eq!(
PowerShellSession::new()
.safe_eval(r#" $c = 2121212, 1212;[string]($c -split '1', 2) "#)
.unwrap(),
"2 21212 212".to_string()
);
assert_eq!(
PowerShellSession::new()
.safe_eval(r#" [string]("Mercury,Venus,Earth" -split '[et]') "#)
.unwrap(),
"M rcury,V nus, ar h".to_string()
);
}
#[test]
fn test_strange_case_with_script_block() {
assert_eq!(PowerShellSession::new().safe_eval(r#" $c = "Mercury,Venus,Earth,Mars,Jupiter,Saturn,Uranus,Neptune";[string]($c -split {$_ -eq "e" -or $_ -eq "p"}) "#).unwrap(),"M rcury,V nus, arth,Mars,Ju it r,Saturn,Uranus,N tun".to_string());
}
}