use koda_core::providers::ImageData;
use std::path::{Path, PathBuf};
#[derive(Debug, Clone)]
pub struct PasteBlock {
pub content: String,
pub char_count: usize,
}
#[derive(Debug)]
pub struct ProcessedInput {
pub prompt: String,
pub context_files: Vec<FileContext>,
pub images: Vec<ImageData>,
pub paste_blocks: Vec<PasteBlock>,
}
#[derive(Debug)]
pub struct FileContext {
pub path: String,
pub content: String,
}
const IMAGE_EXTENSIONS: &[&str] = &["png", "jpg", "jpeg", "gif", "webp", "bmp"];
fn is_image_file(path: &str) -> bool {
let lower = path.to_lowercase();
IMAGE_EXTENSIONS.iter().any(|ext| lower.ends_with(ext))
}
fn mime_type_for(path: &str) -> &'static str {
let lower = path.to_lowercase();
if lower.ends_with(".png") {
"image/png"
} else if lower.ends_with(".jpg") || lower.ends_with(".jpeg") {
"image/jpeg"
} else if lower.ends_with(".gif") {
"image/gif"
} else if lower.ends_with(".webp") {
"image/webp"
} else if lower.ends_with(".bmp") {
"image/bmp"
} else {
"application/octet-stream"
}
}
fn strip_quotes(s: &str) -> &str {
if s.len() >= 2
&& ((s.starts_with('"') && s.ends_with('"')) || (s.starts_with('\'') && s.ends_with('\'')))
{
&s[1..s.len() - 1]
} else {
s
}
}
fn unescape_path(path: &str) -> String {
if cfg!(windows) {
return path.to_string();
}
let mut result = String::with_capacity(path.len());
let mut chars = path.chars();
while let Some(c) = chars.next() {
if c == '\\' {
if let Some(next) = chars.next() {
result.push(next);
} else {
result.push(c);
}
} else {
result.push(c);
}
}
result
}
fn tokenize_shell_aware(input: &str) -> Vec<String> {
let mut tokens = Vec::new();
let mut current = String::new();
let mut chars = input.chars().peekable();
while let Some(&c) = chars.peek() {
match c {
' ' | '\t' | '\n' | '\r' => {
if !current.is_empty() {
tokens.push(std::mem::take(&mut current));
}
chars.next();
}
'\\' => {
chars.next(); current.push('\\');
if let Some(&next) = chars.peek() {
current.push(next);
chars.next();
}
}
'"' | '\'' => {
let quote = c;
current.push(quote);
chars.next(); while let Some(&inner) = chars.peek() {
current.push(inner);
chars.next();
if inner == quote {
break;
}
}
}
_ => {
current.push(c);
chars.next();
}
}
}
if !current.is_empty() {
tokens.push(current);
}
tokens
}
fn looks_like_file_path(token: &str) -> bool {
let cleaned = strip_quotes(token);
let unescaped = unescape_path(cleaned);
let check = |s: &str| -> bool {
s.starts_with('/')
|| s.starts_with("~/")
|| s.starts_with("./")
|| s.starts_with("..")
|| (s.len() >= 3
&& s.as_bytes()[0].is_ascii_alphabetic()
&& s.as_bytes()[1] == b':'
&& (s.as_bytes()[2] == b'\\' || s.as_bytes()[2] == b'/'))
};
check(cleaned) || check(&unescaped)
}
fn try_load_image(path: &Path, display_path: &str) -> Option<ImageData> {
match std::fs::read(path) {
Ok(bytes) => {
use base64::Engine;
let b64 = base64::engine::general_purpose::STANDARD.encode(&bytes);
let media_type = mime_type_for(display_path).to_string();
Some(ImageData {
media_type,
base64: b64,
})
}
Err(_) => {
eprintln!(" \x1b[33m\u{26a0} Could not read image: {display_path}\x1b[0m");
None
}
}
}
fn resolve_bare_path(token: &str) -> Option<PathBuf> {
let cleaned = strip_quotes(token);
let unescaped = unescape_path(cleaned);
let path_str = unescaped.as_str();
if let Some(rest) = path_str.strip_prefix("~/") {
let home = std::env::var("HOME")
.or_else(|_| std::env::var("USERPROFILE"))
.ok()?;
Some(PathBuf::from(home).join(rest))
} else {
let p = PathBuf::from(path_str);
if p.is_absolute() {
Some(p)
} else {
std::env::current_dir().ok().map(|cwd| cwd.join(path_str))
}
}
}
pub fn process_input(input: &str, project_root: &Path) -> ProcessedInput {
let mut prompt_parts = Vec::new();
let mut context_files = Vec::new();
let mut images = Vec::new();
for token in tokenize_shell_aware(input) {
if let Some(raw_path) = token.strip_prefix('@') {
if raw_path.is_empty() {
prompt_parts.push(token.to_string());
continue;
}
let raw_path = strip_quotes(raw_path);
let clean_path = unescape_path(raw_path);
let full_path = match koda_core::tools::safe_resolve_path(project_root, &clean_path) {
Ok(p) => p,
Err(_) => {
tracing::warn!("@file path escapes project root: {clean_path}");
prompt_parts.push(token.to_string());
continue;
}
};
if is_image_file(&clean_path) {
if let Some(img) = try_load_image(&full_path, &clean_path) {
images.push(img);
} else {
prompt_parts.push(token.to_string());
}
continue;
}
match std::fs::read_to_string(&full_path) {
Ok(content) => {
context_files.push(FileContext {
path: clean_path,
content,
});
}
Err(_) => {
eprintln!(" \x1b[33m\u{26a0} Could not read: {clean_path}\x1b[0m");
prompt_parts.push(token.to_string());
}
}
continue;
}
let unescaped = unescape_path(strip_quotes(&token));
if looks_like_file_path(&token)
&& is_image_file(&unescaped)
&& let Some(resolved) = resolve_bare_path(&token)
&& resolved.exists()
{
let display = resolved.display().to_string();
if let Some(img) = try_load_image(&resolved, &display) {
images.push(img);
continue;
}
}
prompt_parts.push(token);
}
let prompt = prompt_parts.join(" ");
let prompt = if prompt.trim().is_empty() && (!context_files.is_empty() || !images.is_empty()) {
if !images.is_empty() && context_files.is_empty() {
"Describe and analyze this image.".to_string()
} else {
"Describe and explain the attached files.".to_string()
}
} else {
prompt
};
ProcessedInput {
prompt,
context_files,
images,
paste_blocks: Vec::new(),
}
}
pub fn format_context_files(files: &[FileContext]) -> Option<String> {
if files.is_empty() {
return None;
}
let mut parts = Vec::new();
for f in files {
parts.push(format!(
"<file path=\"{}\">{}</file>",
f.path,
if f.content.len() > 40_000 {
let mut end = 40_000;
while !f.content.is_char_boundary(end) {
end -= 1;
}
format!(
"{}\n\n[truncated — {} bytes total]",
&f.content[..end],
f.content.len()
)
} else {
f.content.clone()
}
));
}
Some(parts.join("\n\n"))
}
pub const PASTE_BLOCK_THRESHOLD: usize = 200;
const PASTE_BLOCK_MAX_CHARS: usize = 40_000;
pub fn format_paste_blocks(blocks: &[PasteBlock]) -> Option<String> {
if blocks.is_empty() {
return None;
}
let parts: Vec<String> = blocks
.iter()
.map(|b| {
let content = if b.content.len() > PASTE_BLOCK_MAX_CHARS {
let mut end = PASTE_BLOCK_MAX_CHARS;
while !b.content.is_char_boundary(end) {
end -= 1;
}
format!(
"{}\n\n[truncated — {} chars total]",
&b.content[..end],
b.char_count
)
} else {
b.content.clone()
};
format!(
"<reference type=\"pasted\" chars=\"{}\">{}</reference>",
b.char_count, content
)
})
.collect();
Some(parts.join("\n\n"))
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
#[test]
fn test_process_input_with_file_ref() {
let dir = TempDir::new().unwrap();
fs::write(dir.path().join("test.rs"), "fn test() {}").unwrap();
let result = process_input("explain @test.rs", dir.path());
assert_eq!(result.prompt, "explain");
assert_eq!(result.context_files.len(), 1);
assert_eq!(result.context_files[0].path, "test.rs");
assert_eq!(result.context_files[0].content, "fn test() {}");
}
#[test]
fn test_process_input_no_refs() {
let dir = TempDir::new().unwrap();
let result = process_input("just a normal question", dir.path());
assert_eq!(result.prompt, "just a normal question");
assert!(result.context_files.is_empty());
}
#[test]
fn test_process_input_only_ref() {
let dir = TempDir::new().unwrap();
fs::write(dir.path().join("code.py"), "print('hi')").unwrap();
let result = process_input("@code.py", dir.path());
assert_eq!(result.prompt, "Describe and explain the attached files.");
assert_eq!(result.context_files.len(), 1);
}
#[test]
fn test_process_input_missing_file() {
let dir = TempDir::new().unwrap();
let result = process_input("explain @nonexistent.rs", dir.path());
assert!(result.prompt.contains("@nonexistent.rs"));
assert!(result.context_files.is_empty());
}
#[test]
fn test_format_context_files_empty() {
assert!(format_context_files(&[]).is_none());
}
#[test]
fn test_format_context_files() {
let files = vec![FileContext {
path: "main.rs".into(),
content: "fn main() {}".into(),
}];
let result = format_context_files(&files).unwrap();
assert!(result.contains("<file path=\"main.rs\">"));
assert!(result.contains("fn main() {}"));
assert!(result.contains("</file>"));
}
#[test]
fn test_is_image_file() {
assert!(is_image_file("photo.png"));
assert!(is_image_file("photo.PNG"));
assert!(is_image_file("photo.jpg"));
assert!(is_image_file("photo.jpeg"));
assert!(is_image_file("photo.gif"));
assert!(is_image_file("photo.webp"));
assert!(is_image_file("photo.bmp"));
assert!(!is_image_file("code.rs"));
assert!(!is_image_file("data.json"));
assert!(!is_image_file("readme.md"));
}
#[test]
fn test_mime_type_for() {
assert_eq!(mime_type_for("x.png"), "image/png");
assert_eq!(mime_type_for("x.jpg"), "image/jpeg");
assert_eq!(mime_type_for("x.jpeg"), "image/jpeg");
assert_eq!(mime_type_for("x.gif"), "image/gif");
assert_eq!(mime_type_for("x.webp"), "image/webp");
assert_eq!(mime_type_for("x.bmp"), "image/bmp");
}
#[test]
fn test_process_input_image_ref() {
let dir = TempDir::new().unwrap();
let png_bytes: [u8; 8] = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
fs::write(dir.path().join("screenshot.png"), png_bytes).unwrap();
let result = process_input("what is this @screenshot.png", dir.path());
assert_eq!(result.prompt, "what is this");
assert!(result.context_files.is_empty());
assert_eq!(result.images.len(), 1);
assert_eq!(result.images[0].media_type, "image/png");
assert!(!result.images[0].base64.is_empty());
}
#[test]
fn test_process_input_image_only_default_prompt() {
let dir = TempDir::new().unwrap();
let png_bytes: [u8; 8] = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
fs::write(dir.path().join("ui.png"), png_bytes).unwrap();
let result = process_input("@ui.png", dir.path());
assert_eq!(result.prompt, "Describe and analyze this image.");
assert_eq!(result.images.len(), 1);
}
#[test]
fn test_process_input_mixed_image_and_file() {
let dir = TempDir::new().unwrap();
fs::write(dir.path().join("code.rs"), "fn main() {}").unwrap();
let png_bytes: [u8; 8] = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
fs::write(dir.path().join("error.png"), png_bytes).unwrap();
let result = process_input("fix this @code.rs @error.png", dir.path());
assert_eq!(result.prompt, "fix this");
assert_eq!(result.context_files.len(), 1);
assert_eq!(result.images.len(), 1);
}
#[test]
fn test_strip_quotes() {
assert_eq!(strip_quotes("'/path/to/file.png'"), "/path/to/file.png");
assert_eq!(strip_quotes("\"/path/to/file.png\""), "/path/to/file.png");
assert_eq!(strip_quotes("/no/quotes.png"), "/no/quotes.png");
assert_eq!(strip_quotes("'mismatched"), "'mismatched");
assert_eq!(strip_quotes("'"), "'");
assert_eq!(strip_quotes("\""), "\"");
}
#[test]
fn test_looks_like_file_path() {
assert!(looks_like_file_path("/absolute/path.png"));
assert!(looks_like_file_path("~/Desktop/img.jpg"));
assert!(looks_like_file_path("./relative/img.png"));
assert!(looks_like_file_path("../parent/img.png"));
assert!(looks_like_file_path("'/quoted/path.png'"));
assert!(looks_like_file_path("C:\\Users\\test\\img.png"));
assert!(looks_like_file_path("D:/tmp/img.png"));
assert!(looks_like_file_path("/Users/foo/Screenshot\\ 2026.png"));
assert!(!looks_like_file_path("just-a-word"));
assert!(!looks_like_file_path("relative.png"));
}
#[test]
fn test_tokenize_simple() {
assert_eq!(tokenize_shell_aware("hello world"), vec!["hello", "world"],);
}
#[test]
fn test_tokenize_backslash_spaces() {
let tokens = tokenize_shell_aware(
"explain /Users/foo/Screenshot\\ 2026-04-09\\ at\\ 4.37.01\\ PM.png",
);
assert_eq!(tokens.len(), 2);
assert_eq!(tokens[0], "explain");
assert_eq!(
tokens[1],
"/Users/foo/Screenshot\\ 2026-04-09\\ at\\ 4.37.01\\ PM.png",
);
}
#[test]
fn test_tokenize_double_quoted() {
let tokens = tokenize_shell_aware(r#"explain "/Users/foo/Screenshot 2026.png" please"#);
assert_eq!(tokens.len(), 3);
assert_eq!(tokens[0], "explain");
assert_eq!(tokens[1], "\"/Users/foo/Screenshot 2026.png\"");
assert_eq!(tokens[2], "please");
}
#[test]
fn test_tokenize_single_quoted() {
let tokens = tokenize_shell_aware("explain '/Users/foo/Screenshot 2026.png'");
assert_eq!(tokens.len(), 2);
assert_eq!(tokens[0], "explain");
assert_eq!(tokens[1], "'/Users/foo/Screenshot 2026.png'");
}
#[test]
fn test_tokenize_at_ref_with_escaped_spaces() {
let tokens = tokenize_shell_aware("what is @docs/my\\ file.rs");
assert_eq!(tokens.len(), 3);
assert_eq!(tokens[0], "what");
assert_eq!(tokens[1], "is");
assert_eq!(tokens[2], "@docs/my\\ file.rs");
}
#[test]
fn test_tokenize_mixed() {
let tokens = tokenize_shell_aware("fix @code.rs /tmp/err\\ log.png normal-word");
assert_eq!(tokens.len(), 4);
assert_eq!(tokens[0], "fix");
assert_eq!(tokens[1], "@code.rs");
assert_eq!(tokens[2], "/tmp/err\\ log.png");
assert_eq!(tokens[3], "normal-word");
}
#[test]
fn test_tokenize_empty() {
assert!(tokenize_shell_aware("").is_empty());
assert!(tokenize_shell_aware(" ").is_empty());
}
#[test]
fn test_tokenize_unicode_cjk_path() {
let tokens = tokenize_shell_aware("read /home/用户/文件.rs please");
assert_eq!(tokens, vec!["read", "/home/用户/文件.rs", "please"]);
}
#[test]
fn test_tokenize_trailing_backslash() {
let tokens = tokenize_shell_aware("fix bad\\");
assert_eq!(tokens, vec!["fix", "bad\\"]);
}
#[test]
fn test_tokenize_multiple_consecutive_spaces() {
let tokens = tokenize_shell_aware("fix the bug");
assert_eq!(tokens, vec!["fix", "the", "bug"]);
}
#[test]
fn test_unescape_path_spaces() {
assert_eq!(
unescape_path("/Users/foo/Screenshot\\ 2026.png"),
"/Users/foo/Screenshot 2026.png",
);
}
#[test]
fn test_unescape_path_parens() {
assert_eq!(unescape_path("file\\ \\(1\\).png"), "file (1).png",);
}
#[test]
fn test_unescape_path_no_escapes() {
assert_eq!(unescape_path("/simple/path.rs"), "/simple/path.rs");
}
#[test]
fn test_unescape_path_trailing_backslash() {
assert_eq!(unescape_path("trailing\\"), "trailing\\");
}
#[test]
fn test_drag_and_drop_absolute_path() {
let dir = TempDir::new().unwrap();
let png_bytes: [u8; 8] = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
let img_path = dir.path().join("screenshot.png");
fs::write(&img_path, png_bytes).unwrap();
let input = format!("what is this {}", img_path.display());
let result = process_input(&input, dir.path());
assert_eq!(result.prompt, "what is this");
assert_eq!(result.images.len(), 1);
assert_eq!(result.images[0].media_type, "image/png");
}
#[test]
fn test_drag_and_drop_quoted_path() {
let dir = TempDir::new().unwrap();
let png_bytes: [u8; 8] = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
let img_path = dir.path().join("screenshot.png");
fs::write(&img_path, png_bytes).unwrap();
let input = format!("explain '{}'", img_path.display());
let result = process_input(&input, dir.path());
assert_eq!(result.prompt, "explain");
assert_eq!(result.images.len(), 1);
}
#[test]
fn test_drag_and_drop_escaped_spaces() {
let dir = TempDir::new().unwrap();
let png_bytes: [u8; 8] = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
let img_path = dir.path().join("Screenshot 2026-04-09 at 4.37.01 PM.png");
fs::write(&img_path, png_bytes).unwrap();
let escaped_path = img_path.display().to_string().replace(' ', "\\ ");
let input = format!("what is this {escaped_path}");
let result = process_input(&input, dir.path());
assert_eq!(result.prompt, "what is this");
assert_eq!(
result.images.len(),
1,
"image should be loaded from escaped path"
);
assert_eq!(result.images[0].media_type, "image/png");
}
#[test]
fn test_drag_and_drop_quoted_spaces() {
let dir = TempDir::new().unwrap();
let png_bytes: [u8; 8] = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
let img_path = dir.path().join("Screenshot 2026.png");
fs::write(&img_path, png_bytes).unwrap();
let input = format!("what is this \"{}\"", img_path.display());
let result = process_input(&input, dir.path());
assert_eq!(result.prompt, "what is this");
assert_eq!(
result.images.len(),
1,
"image should be loaded from quoted path"
);
}
#[test]
fn test_at_ref_with_escaped_spaces() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("my file.rs");
fs::write(&file_path, "fn main() {}").unwrap();
let result = process_input("explain @my\\ file.rs", dir.path());
assert_eq!(result.prompt, "explain");
assert_eq!(result.context_files.len(), 1);
assert_eq!(result.context_files[0].content, "fn main() {}");
}
#[test]
fn test_drag_and_drop_nonexistent_stays_in_prompt() {
let dir = TempDir::new().unwrap();
let input = "/tmp/nonexistent_image_12345.png what is this";
let result = process_input(input, dir.path());
assert!(result.prompt.contains("/tmp/nonexistent_image_12345.png"));
assert!(result.images.is_empty());
}
#[test]
fn test_non_image_absolute_path_stays_in_prompt() {
let dir = TempDir::new().unwrap();
fs::write(dir.path().join("data.json"), "{}").unwrap();
let input = format!("read {}", dir.path().join("data.json").display());
let result = process_input(&input, dir.path());
assert!(result.prompt.contains("data.json"));
assert!(result.images.is_empty());
}
#[test]
fn test_resolve_bare_path_absolute() {
#[cfg(unix)]
{
let resolved = resolve_bare_path("/tmp/test.png");
assert_eq!(resolved, Some(PathBuf::from("/tmp/test.png")));
}
#[cfg(windows)]
{
let resolved = resolve_bare_path("C:\\tmp\\test.png");
assert_eq!(resolved, Some(PathBuf::from("C:\\tmp\\test.png")));
}
}
#[test]
fn test_resolve_bare_path_home() {
if std::env::var("HOME").is_ok() {
let resolved = resolve_bare_path("~/test.png");
assert!(resolved.is_some());
let path = resolved.unwrap();
assert!(!path.to_string_lossy().contains('~'));
assert!(path.to_string_lossy().ends_with("test.png"));
}
}
#[test]
fn test_resolve_bare_path_quoted() {
#[cfg(unix)]
{
let resolved = resolve_bare_path("'/tmp/test.png'");
assert_eq!(resolved, Some(PathBuf::from("/tmp/test.png")));
}
#[cfg(windows)]
{
let resolved = resolve_bare_path("'C:\\tmp\\test.png'");
assert_eq!(resolved, Some(PathBuf::from("C:\\tmp\\test.png")));
}
}
#[test]
fn test_resolve_bare_path_relative() {
let resolved = resolve_bare_path("./test.png");
assert!(resolved.is_some());
assert!(resolved.unwrap().is_absolute());
}
#[test]
fn test_at_file_traversal_blocked() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(dir.path().join("safe.rs"), "fn main() {}").unwrap();
let result = process_input("read @../../etc/passwd", dir.path());
assert!(
result.context_files.is_empty(),
"traversal should not load files outside project root"
);
assert!(result.prompt.contains("@../../etc/passwd"));
}
#[test]
fn test_format_paste_blocks_empty() {
assert!(format_paste_blocks(&[]).is_none());
}
#[test]
fn test_format_paste_blocks_single() {
let blocks = vec![PasteBlock {
content: "hello world".into(),
char_count: 11,
}];
let result = format_paste_blocks(&blocks).unwrap();
assert!(result.contains("<reference type=\"pasted\" chars=\"11\">"));
assert!(result.contains("hello world"));
assert!(result.contains("</reference>"));
}
#[test]
fn test_format_paste_blocks_multiple() {
let blocks = vec![
PasteBlock {
content: "block one".into(),
char_count: 9,
},
PasteBlock {
content: "block two".into(),
char_count: 9,
},
];
let result = format_paste_blocks(&blocks).unwrap();
assert!(result.contains("block one"));
assert!(result.contains("block two"));
assert!(result.contains("</reference>\n\n<reference"));
}
#[test]
fn test_format_paste_blocks_truncation() {
let long_content = "a".repeat(50_000);
let blocks = vec![PasteBlock {
content: long_content,
char_count: 50_000,
}];
let result = format_paste_blocks(&blocks).unwrap();
assert!(result.contains("[truncated — 50000 chars total]"));
assert!(result.len() < 45_000);
}
}