use async_trait::async_trait;
use clap::{CommandFactory, Parser};
use crate::interpreter::{ExecResult, OutputData, OutputNode};
use crate::tools::{schema_from_clap, ExecContext, ToolCtx, GlobalFlags, Tool, ToolArgs, ToolSchema};
pub struct Wc;
#[derive(Parser, Debug)]
#[command(name = "wc", about = "Print line, word, and byte counts")]
struct WcArgs {
#[arg(short = 'l', long = "lines")]
lines: bool,
#[arg(short = 'w', long = "words")]
words: bool,
#[arg(short = 'm', long = "chars")]
chars: bool,
#[arg(short = 'c', long = "bytes")]
bytes: bool,
#[command(flatten)]
global: GlobalFlags,
paths: Vec<String>,
}
#[async_trait]
impl Tool for Wc {
fn name(&self) -> &str {
"wc"
}
fn schema(&self) -> ToolSchema {
schema_from_clap(
&WcArgs::command(),
"wc",
"Print line, word, and byte counts",
[
("Count all (lines, words, bytes)", "wc file.txt"),
("Count lines only", "wc -l file.txt"),
("Count words from stdin", "echo 'hello world' | wc -w"),
],
)
}
async fn execute(&self, args: ToolArgs, ctx: &mut dyn ToolCtx) -> ExecResult {
let Some(ctx) = ctx.as_any_mut().downcast_mut::<ExecContext>() else {
return ExecResult::failure(1, "internal error: kernel builtin requires ExecContext");
};
let parsed = match WcArgs::try_parse_from(
std::iter::once("wc".to_string()).chain(args.to_argv()),
) {
Ok(p) => p,
Err(e) => return ExecResult::failure(2, format!("wc: {e}")),
};
parsed.global.apply(ctx);
let lines_only = parsed.lines;
let words_only = parsed.words;
let chars_only = parsed.chars;
let bytes_only = parsed.bytes;
let show_all = !lines_only && !words_only && !chars_only && !bytes_only;
let paths = match ctx.expand_paths(&args.positional).await {
Ok(p) => p,
Err(e) => return ExecResult::failure(1, format!("wc: {}", e)),
};
let headers = build_headers(lines_only, words_only, chars_only, bytes_only, show_all);
if paths.is_empty() {
let input = ctx.read_stdin_to_bytes().await.unwrap_or_default();
let (lc, wc, cc, bc) = count_content(&input);
let cells = build_cells(lc, wc, cc, bc, lines_only, words_only, chars_only, bytes_only, show_all);
let node = OutputNode::new("").with_cells(cells);
return ExecResult::with_output(OutputData::table(headers, vec![node]));
}
let mut nodes = Vec::new();
let mut total_lines = 0usize;
let mut total_words = 0usize;
let mut total_chars = 0usize;
let mut total_bytes = 0usize;
let mut had_error = false;
let mut error_messages = Vec::new();
for path in &paths {
let resolved = ctx.resolve_path(path);
let mut counter = WcCounter::default();
match ctx
.read_file_chunked(&resolved, ExecContext::STREAM_CHUNK_SIZE, |chunk| {
counter.push(chunk);
std::ops::ControlFlow::Continue(())
})
.await
{
Ok(()) => {
let (lc, wc, cc, bc) = counter.finish();
total_lines += lc;
total_words += wc;
total_chars += cc;
total_bytes += bc;
let cells = build_cells(lc, wc, cc, bc, lines_only, words_only, chars_only, bytes_only, show_all);
nodes.push(OutputNode::new(path.as_str()).with_cells(cells));
}
Err(e) => {
error_messages.push(format!("wc: {}: {}", path, e));
had_error = true;
}
}
}
if paths.len() > 1 {
let cells = build_cells(
total_lines, total_words, total_chars, total_bytes,
lines_only, words_only, chars_only, bytes_only, show_all,
);
nodes.push(OutputNode::new("total").with_cells(cells));
}
let output = OutputData::table(headers, nodes);
if had_error {
let mut result = ExecResult::with_output(output);
result.code = 1;
result.err = error_messages.join("\n");
result
} else {
ExecResult::with_output(output)
}
}
}
#[derive(Default)]
struct WcCounter {
carry: Vec<u8>,
newlines: usize,
words: usize,
chars: usize,
bytes: usize,
}
impl WcCounter {
fn push(&mut self, chunk: &[u8]) {
self.bytes += chunk.len();
self.carry.extend_from_slice(chunk);
let mut start = 0;
while let Some(pos) = self.carry[start..].iter().position(|&b| b == b'\n') {
let line_end = start + pos;
self.count_line(start, line_end);
self.newlines += 1;
start = line_end + 1;
}
if start > 0 {
self.carry.drain(..start);
}
}
fn count_line(&mut self, lo: usize, hi: usize) {
let line = String::from_utf8_lossy(&self.carry[lo..hi]);
self.words += line.split_whitespace().count();
self.chars += line.chars().count();
}
fn finish(mut self) -> (usize, usize, usize, usize) {
let has_trailing = !self.carry.is_empty();
if has_trailing {
self.count_line(0, self.carry.len());
}
let lines = self.newlines + usize::from(has_trailing);
let chars = self.chars + self.newlines;
(lines, self.words, chars, self.bytes)
}
}
fn count_content(input: &[u8]) -> (usize, usize, usize, usize) {
let bytes = input.len();
let text = String::from_utf8_lossy(input);
let lines = text.lines().count();
let words = text.split_whitespace().count();
let chars = text.chars().count();
(lines, words, chars, bytes)
}
fn build_headers(
lines_only: bool,
words_only: bool,
chars_only: bool,
bytes_only: bool,
show_all: bool,
) -> Vec<String> {
let mut headers = vec!["FILE".to_string()];
if show_all || lines_only {
headers.push("LINES".to_string());
}
if show_all || words_only {
headers.push("WORDS".to_string());
}
if show_all || bytes_only {
headers.push("BYTES".to_string());
}
if chars_only {
headers.push("CHARS".to_string());
}
headers
}
#[allow(clippy::too_many_arguments)]
fn build_cells(
line_count: usize,
word_count: usize,
char_count: usize,
byte_count: usize,
lines_only: bool,
words_only: bool,
chars_only: bool,
bytes_only: bool,
show_all: bool,
) -> Vec<String> {
let mut cells = Vec::new();
if show_all || lines_only {
cells.push(line_count.to_string());
}
if show_all || words_only {
cells.push(word_count.to_string());
}
if show_all || bytes_only {
cells.push(byte_count.to_string());
}
if chars_only {
cells.push(char_count.to_string());
}
cells
}
#[cfg(test)]
mod tests {
use super::*;
use crate::ast::Value;
use crate::vfs::{Filesystem, MemoryFs, VfsRouter};
use std::path::Path;
use std::sync::Arc;
async fn make_ctx() -> ExecContext {
let mut vfs = VfsRouter::new();
let mem = MemoryFs::new();
mem.write(Path::new("test.txt"), b"hello world\nfoo bar baz\n")
.await
.unwrap();
mem.write(Path::new("unicode.txt"), "héllo wörld\n".as_bytes())
.await
.unwrap();
vfs.mount("/", mem);
ExecContext::new(Arc::new(vfs))
}
#[tokio::test]
async fn test_wc_all_counts() {
let mut ctx = make_ctx().await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("/test.txt".into()));
let result = Wc.execute(args, &mut ctx).await;
assert!(result.ok());
assert!(result.text_out().contains("2"));
assert!(result.text_out().contains("5"));
assert!(result.text_out().contains("24"));
assert!(result.text_out().contains("/test.txt"));
}
#[tokio::test]
async fn test_wc_lines_only() {
let mut ctx = make_ctx().await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("/test.txt".into()));
args.flags.insert("l".to_string());
let result = Wc.execute(args, &mut ctx).await;
assert!(result.ok());
assert!(result.text_out().contains("2"));
assert!(result.text_out().contains("/test.txt"));
}
#[tokio::test]
async fn test_wc_words_only() {
let mut ctx = make_ctx().await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("/test.txt".into()));
args.flags.insert("w".to_string());
let result = Wc.execute(args, &mut ctx).await;
assert!(result.ok());
assert!(result.text_out().contains("5"));
}
#[tokio::test]
async fn test_wc_bytes_only() {
let mut ctx = make_ctx().await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("/test.txt".into()));
args.flags.insert("c".to_string());
let result = Wc.execute(args, &mut ctx).await;
assert!(result.ok());
assert!(result.text_out().contains("24"));
}
#[tokio::test]
async fn test_wc_chars() {
let mut ctx = make_ctx().await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("/unicode.txt".into()));
args.flags.insert("m".to_string());
let result = Wc.execute(args, &mut ctx).await;
assert!(result.ok());
assert!(result.text_out().contains("12"));
}
#[tokio::test]
async fn test_wc_stdin() {
let mut ctx = make_ctx().await;
ctx.set_stdin("one two three\nfour five\n".to_string());
let args = ToolArgs::new();
let result = Wc.execute(args, &mut ctx).await;
assert!(result.ok());
assert!(result.text_out().contains("2"));
assert!(result.text_out().contains("5"));
assert!(!result.text_out().contains("/"));
}
#[tokio::test]
async fn test_wc_file_not_found() {
let mut ctx = make_ctx().await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("/nonexistent".into()));
let result = Wc.execute(args, &mut ctx).await;
assert!(!result.ok());
}
#[tokio::test]
async fn test_wc_lines_and_words() {
let mut ctx = make_ctx().await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("/test.txt".into()));
args.flags.insert("l".to_string());
args.flags.insert("w".to_string());
let result = Wc.execute(args, &mut ctx).await;
assert!(result.ok());
assert!(result.text_out().contains("2")); assert!(result.text_out().contains("5")); }
#[tokio::test]
async fn test_wc_unicode_chars_vs_bytes() {
let mut ctx = make_ctx().await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("/unicode.txt".into()));
args.flags.insert("c".to_string());
let result_bytes = Wc.execute(args, &mut ctx).await;
assert!(result_bytes.ok());
let mut ctx = make_ctx().await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("/unicode.txt".into()));
args.flags.insert("m".to_string());
let result_chars = Wc.execute(args, &mut ctx).await;
assert!(result_chars.ok());
assert!(result_bytes.text_out().contains("14"));
assert!(result_chars.text_out().contains("12"));
}
#[tokio::test]
async fn test_wc_empty_input() {
let mut ctx = make_ctx().await;
ctx.set_stdin("".to_string());
let args = ToolArgs::new();
let result = Wc.execute(args, &mut ctx).await;
assert!(result.ok());
assert!(result.text_out().contains("0"));
}
#[tokio::test]
async fn test_wc_single_word_no_newline() {
let mut ctx = make_ctx().await;
ctx.set_stdin("word".to_string());
let args = ToolArgs::new();
let result = Wc.execute(args, &mut ctx).await;
assert!(result.ok());
let out = result.text_out();
assert!(out.contains("0") || out.contains("1")); }
#[tokio::test]
async fn test_wc_only_whitespace() {
let mut ctx = make_ctx().await;
ctx.set_stdin(" \n\t\t\n ".to_string());
let mut args = ToolArgs::new();
args.flags.insert("w".to_string());
let result = Wc.execute(args, &mut ctx).await;
assert!(result.ok());
assert!(result.text_out().contains("0"));
}
#[tokio::test]
async fn test_wc_japanese_text() {
let mut ctx = make_ctx().await;
ctx.set_stdin("日本語 テスト\n".to_string());
let mut args = ToolArgs::new();
args.flags.insert("m".to_string());
let result = Wc.execute(args, &mut ctx).await;
assert!(result.ok());
assert!(result.text_out().contains("8"));
}
#[tokio::test]
async fn test_wc_long_line() {
let mut ctx = make_ctx().await;
let long_line = "a".repeat(10000);
ctx.set_stdin(format!("{}\n", long_line));
let mut args = ToolArgs::new();
args.flags.insert("c".to_string());
let result = Wc.execute(args, &mut ctx).await;
assert!(result.ok());
assert!(result.text_out().contains("10001")); }
#[test]
fn wc_counter_matches_whole_buffer_across_every_split() {
let inputs: &[&[u8]] = &[
b"hello world\nfoo bar baz\n",
b"word",
b"",
"h\u{e9}llo w\u{f6}rld\n\u{65e5}\u{672c}\u{8a9e} \u{30c6}\u{30b9}\u{30c8}\n".as_bytes(),
b"a\n\nb",
b" \n\t\t\n ",
b"trailing no newline",
];
for input in inputs {
let want = count_content(input);
for split in 0..=input.len() {
let mut c = WcCounter::default();
c.push(&input[..split]);
c.push(&input[split..]);
assert_eq!(
c.finish(),
want,
"input={:?} split={}",
String::from_utf8_lossy(input),
split
);
}
}
}
struct RecordingFs {
inner: MemoryFs,
ranges: Arc<std::sync::Mutex<Vec<(Option<u64>, Option<u64>)>>>,
}
#[async_trait::async_trait]
impl Filesystem for RecordingFs {
async fn read(&self, path: &Path) -> std::io::Result<Vec<u8>> {
self.ranges.lock().unwrap().push((None, None));
self.inner.read(path).await
}
async fn read_range(
&self,
path: &Path,
range: Option<kaish_vfs::ReadRange>,
) -> std::io::Result<Vec<u8>> {
let key = (
range.as_ref().and_then(|r| r.offset),
range.as_ref().and_then(|r| r.limit),
);
self.ranges.lock().unwrap().push(key);
self.inner.read_range(path, range).await
}
async fn write(&self, path: &Path, data: &[u8]) -> std::io::Result<()> {
self.inner.write(path, data).await
}
async fn list(&self, path: &Path) -> std::io::Result<Vec<crate::vfs::DirEntry>> {
self.inner.list(path).await
}
async fn stat(&self, path: &Path) -> std::io::Result<crate::vfs::DirEntry> {
self.inner.stat(path).await
}
async fn mkdir(&self, path: &Path) -> std::io::Result<()> {
self.inner.mkdir(path).await
}
async fn remove(&self, path: &Path) -> std::io::Result<()> {
self.inner.remove(path).await
}
fn read_only(&self) -> bool {
self.inner.read_only()
}
}
#[tokio::test]
async fn wc_streams_file_in_bounded_chunks() {
let ranges = Arc::new(std::sync::Mutex::new(Vec::new()));
let rec = RecordingFs {
inner: MemoryFs::new(),
ranges: ranges.clone(),
};
let payload = vec![b'x'; 1000];
rec.inner.write(Path::new("big.txt"), &payload).await.unwrap();
let mut vfs = VfsRouter::new();
vfs.mount("/", rec);
let ctx = ExecContext::new(Arc::new(vfs));
let mut counter = WcCounter::default();
ctx.read_file_chunked(Path::new("/big.txt"), 256, |c| {
counter.push(c);
std::ops::ControlFlow::Continue(())
})
.await
.unwrap();
let recs = ranges.lock().unwrap();
assert!(
recs.len() >= 4,
"expected the file to be read in several chunks, got {} reads",
recs.len()
);
assert!(
recs.iter().all(|&(_, limit)| limit == Some(256)),
"every read must be bounded to the chunk size; recorded {recs:?}"
);
assert_eq!(counter.finish().3, payload.len(), "byte count is exact");
}
#[tokio::test]
async fn test_wc_glob() {
let mut ctx = make_ctx().await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("*.txt".into()));
args.flags.insert("l".to_string());
let result = Wc.execute(args, &mut ctx).await;
assert!(result.ok());
assert!(result.text_out().contains("total"));
}
}