use async_trait::async_trait;
use clap::{CommandFactory, Parser};
use regex::Regex;
use crate::ast::Value;
use crate::interpreter::{ExecResult, OutputData, OutputNode};
use crate::tools::{schema_from_clap, ExecContext, ToolCtx, GlobalFlags, Tool, ToolArgs, ToolSchema};
pub struct Split;
#[derive(Parser, Debug)]
#[command(name = "split", about = "Split a string into an array")]
struct SplitArgs {
#[arg(short = 'r', long = "regex")]
regex: Option<String>,
#[arg(long)]
limit: Option<i64>,
#[command(flatten)]
global: GlobalFlags,
args: Vec<String>,
}
#[async_trait]
impl Tool for Split {
fn name(&self) -> &str {
"split"
}
fn schema(&self) -> ToolSchema {
schema_from_clap(
&SplitArgs::command(),
"split",
"Split a string into an array",
[
("Split on whitespace", "split \"a b c\""),
("Split on delimiter", "split \"a:b:c\" \":\""),
("Split on regex", "split \"a1b2c3\" -r \"[0-9]\""),
("Limit splits", "split \"a:b:c:d\" \":\" --limit=2"),
("Split stdin", "echo \"a,b,c\" | split \",\""),
("Split stdin on whitespace", "echo \"a b c\" | split"),
],
)
}
async fn execute(&self, args: ToolArgs, ctx: &mut dyn ToolCtx) -> ExecResult {
let Some(ctx) = ctx.as_any_mut().downcast_mut::<ExecContext>() else {
return ExecResult::failure(1, "internal error: kernel builtin requires ExecContext");
};
let parsed = match SplitArgs::try_parse_from(
std::iter::once("split".to_string()).chain(args.to_argv()),
) {
Ok(p) => p,
Err(e) => return ExecResult::failure(2, format!("split: {e}")),
};
parsed.global.apply(ctx);
let has_named_string = args.named.contains_key("string");
let stdin = ctx.read_stdin_to_string().await;
let (input, delim_idx) = if let Some(s) = stdin.filter(|s| !s.is_empty() && !has_named_string) {
(s.trim_end_matches('\n').to_string(), 0usize)
} else if let Some(s) = args.get_string("string", 0) {
(s, 1usize)
} else {
return ExecResult::failure(1, "split: no input (provide string argument or pipe stdin)");
};
let delimiter = args.get_string("delimiter", delim_idx);
let regex_pat = args.get_string("regex", usize::MAX)
.or_else(|| args.get_string("r", usize::MAX));
let limit = args.get("limit", usize::MAX)
.and_then(|v| match v {
Value::Int(i) => Some(*i as usize),
Value::String(s) => s.parse().ok(),
_ => None,
})
.unwrap_or(0);
let parts: Vec<&str> = if let Some(pattern) = regex_pat {
let re = match Regex::new(&pattern) {
Ok(r) => r,
Err(e) => return ExecResult::failure(1, format!("split: invalid regex: {}", e)),
};
if limit > 0 {
re.splitn(&input, limit + 1).collect()
} else {
re.split(&input).collect()
}
} else if let Some(d) = delimiter {
if limit > 0 {
input.splitn(limit + 1, &d).collect()
} else {
input.split(&d).collect()
}
} else {
if limit > 0 {
let mut result = Vec::new();
let mut remaining = input.as_str();
for _ in 0..limit {
remaining = remaining.trim_start();
if let Some(pos) = remaining.find(char::is_whitespace) {
result.push(&remaining[..pos]);
remaining = &remaining[pos..];
} else {
break;
}
}
remaining = remaining.trim_start();
if !remaining.is_empty() {
result.push(remaining);
}
result
} else {
input.split_whitespace().collect()
}
};
let nodes: Vec<OutputNode> = parts
.iter()
.map(|s| OutputNode::new(*s))
.collect();
let json_array: Vec<serde_json::Value> = parts
.iter()
.map(|s| serde_json::Value::String((*s).to_string()))
.collect();
let mut result = ExecResult::with_output(OutputData::nodes(nodes));
result.data = Some(Value::Json(serde_json::Value::Array(json_array)));
result
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::vfs::{MemoryFs, VfsRouter};
use std::sync::Arc;
fn make_ctx() -> ExecContext {
let mut vfs = VfsRouter::new();
vfs.mount("/", MemoryFs::new());
ExecContext::new(Arc::new(vfs))
}
#[tokio::test]
async fn test_split_whitespace() {
let mut ctx = make_ctx();
let mut args = ToolArgs::new();
args.positional.push(Value::String("hello world foo".into()));
let result = Split.execute(args, &mut ctx).await;
assert!(result.ok());
assert_eq!(&*result.text_out(), "hello\nworld\nfoo");
let data = result.data.unwrap();
if let Value::Json(serde_json::Value::Array(arr)) = data {
assert_eq!(arr.len(), 3);
assert_eq!(arr[0], serde_json::json!("hello"));
assert_eq!(arr[1], serde_json::json!("world"));
assert_eq!(arr[2], serde_json::json!("foo"));
} else {
panic!("Expected JSON array");
}
}
#[tokio::test]
async fn test_split_delimiter() {
let mut ctx = make_ctx();
let mut args = ToolArgs::new();
args.positional.push(Value::String("a:b:c".into()));
args.positional.push(Value::String(":".into()));
let result = Split.execute(args, &mut ctx).await;
assert!(result.ok());
assert_eq!(&*result.text_out(), "a\nb\nc");
}
#[tokio::test]
async fn test_split_regex() {
let mut ctx = make_ctx();
let mut args = ToolArgs::new();
args.positional.push(Value::String("a1b2c3".into()));
args.named.insert("regex".to_string(), Value::String("[0-9]".into()));
let result = Split.execute(args, &mut ctx).await;
assert!(result.ok());
assert_eq!(&*result.text_out(), "a\nb\nc\n"); }
#[tokio::test]
async fn test_split_limit() {
let mut ctx = make_ctx();
let mut args = ToolArgs::new();
args.positional.push(Value::String("a:b:c:d".into()));
args.positional.push(Value::String(":".into()));
args.named.insert("limit".to_string(), Value::Int(2));
let result = Split.execute(args, &mut ctx).await;
assert!(result.ok());
assert_eq!(&*result.text_out(), "a\nb\nc:d");
}
#[tokio::test]
async fn test_split_empty() {
let mut ctx = make_ctx();
let mut args = ToolArgs::new();
args.positional.push(Value::String("".into()));
let result = Split.execute(args, &mut ctx).await;
assert!(result.ok());
assert_eq!(&*result.text_out(), "");
}
#[tokio::test]
async fn test_split_invalid_regex() {
let mut ctx = make_ctx();
let mut args = ToolArgs::new();
args.positional.push(Value::String("test".into()));
args.named.insert("regex".to_string(), Value::String("[invalid".into()));
let result = Split.execute(args, &mut ctx).await;
assert!(!result.ok());
assert!(result.err.contains("invalid regex"));
}
#[tokio::test]
async fn test_split_multiple_spaces() {
let mut ctx = make_ctx();
let mut args = ToolArgs::new();
args.positional.push(Value::String(" a b c ".into()));
let result = Split.execute(args, &mut ctx).await;
assert!(result.ok());
assert_eq!(&*result.text_out(), "a\nb\nc");
}
#[tokio::test]
async fn test_split_stdin_with_delimiter() {
let mut ctx = make_ctx();
ctx.set_stdin("a,b,c".to_string());
let mut args = ToolArgs::new();
args.positional.push(Value::String(",".into()));
let result = Split.execute(args, &mut ctx).await;
assert!(result.ok());
assert_eq!(&*result.text_out(), "a\nb\nc");
}
#[tokio::test]
async fn test_split_stdin_whitespace() {
let mut ctx = make_ctx();
ctx.set_stdin("hello world foo".to_string());
let args = ToolArgs::new();
let result = Split.execute(args, &mut ctx).await;
assert!(result.ok());
assert_eq!(&*result.text_out(), "hello\nworld\nfoo");
}
#[tokio::test]
async fn test_split_no_input_error() {
let mut ctx = make_ctx();
let args = ToolArgs::new();
let result = Split.execute(args, &mut ctx).await;
assert!(!result.ok());
assert!(result.err.contains("no input"));
}
#[tokio::test]
async fn test_split_stdin_trailing_newline() {
let mut ctx = make_ctx();
ctx.set_stdin("a,b,c\n".to_string());
let mut args = ToolArgs::new();
args.positional.push(Value::String(",".into()));
let result = Split.execute(args, &mut ctx).await;
assert!(result.ok());
assert_eq!(&*result.text_out(), "a\nb\nc");
}
}