Skip to main content

token_count/cli/
input.rs

1//! Stdin input handling with streaming support
2
3use crate::error::TokenError;
4use std::io::{self, BufRead, BufReader, Read};
5
6const CHUNK_SIZE: usize = 64 * 1024; // 64KB chunks
7const MAX_INPUT_SIZE: usize = 100 * 1024 * 1024; // 100MB limit
8
9/// Read all input from stdin
10///
11/// # Errors
12///
13/// Returns `TokenError::InvalidUtf8` if the input contains invalid UTF-8
14/// Returns `TokenError::InputTooLarge` if the input exceeds 100MB
15/// Returns `TokenError::Io` for other IO errors
16pub fn read_stdin() -> Result<String, TokenError> {
17    let stdin = io::stdin();
18    let mut reader = BufReader::with_capacity(CHUNK_SIZE, stdin.lock());
19    let mut buffer = String::new();
20
21    // Read with size tracking to enforce limits
22    let bytes_read = reader.read_to_string(&mut buffer).map_err(|e| {
23        if e.kind() == io::ErrorKind::InvalidData {
24            // UTF-8 validation error
25            TokenError::InvalidUtf8 { offset: buffer.len() }
26        } else {
27            TokenError::Io(e)
28        }
29    })?;
30
31    // Enforce size limit to prevent DoS attacks
32    if bytes_read > MAX_INPUT_SIZE {
33        return Err(TokenError::InputTooLarge { size: bytes_read, limit: MAX_INPUT_SIZE });
34    }
35
36    Ok(buffer)
37}
38
39/// Read stdin with a callback for each chunk (for streaming processing)
40///
41/// This allows processing large inputs without loading everything into memory
42pub fn read_stdin_streaming<F>(mut process: F) -> Result<(), TokenError>
43where
44    F: FnMut(&str) -> Result<(), TokenError>,
45{
46    let stdin = io::stdin();
47    let mut reader = BufReader::with_capacity(CHUNK_SIZE, stdin.lock());
48    let mut buffer = String::with_capacity(CHUNK_SIZE);
49
50    loop {
51        buffer.clear();
52        let bytes_read = reader.read_line(&mut buffer).map_err(|e| {
53            if e.kind() == io::ErrorKind::InvalidData {
54                TokenError::InvalidUtf8 { offset: 0 }
55            } else {
56                TokenError::Io(e)
57            }
58        })?;
59
60        if bytes_read == 0 {
61            break; // EOF
62        }
63
64        process(&buffer)?;
65    }
66
67    Ok(())
68}
69
70#[cfg(test)]
71mod tests {
72    use super::*;
73
74    #[test]
75    fn test_chunk_size() {
76        assert_eq!(CHUNK_SIZE, 64 * 1024);
77    }
78}