use crate::config::Config;
use std::fs;
use std::path::PathBuf;
pub fn is_vision_available(config: &Config) -> bool {
if crate::brain::provider::factory::active_provider_vision(config).is_some() {
return true;
}
if config.image.vision.enabled
&& let Some(ref key) = config.image.vision.api_key
{
return !key.is_empty();
}
false
}
pub enum FileContent {
Text(String),
Image(PathBuf),
PdfPages { paths: Vec<PathBuf>, label: String },
Video(PathBuf),
Unsupported(String),
}
const TEXT_LIMIT: usize = 8_000;
const PDF_TEXT_LIMIT: usize = 200_000;
const MAX_PDF_PAGES: usize = 100;
pub fn is_text_mime(mime: &str) -> bool {
let lower = mime.to_lowercase();
lower.starts_with("text/")
|| matches!(
lower.as_str(),
"application/json"
| "application/xml"
| "application/x-yaml"
| "application/yaml"
| "application/toml"
| "application/javascript"
| "application/x-javascript"
| "application/x-sh"
| "application/x-python"
| "application/x-ruby"
)
}
pub fn mime_from_ext(filename: &str) -> &'static str {
match filename
.rsplit('.')
.next()
.unwrap_or("")
.to_lowercase()
.as_str()
{
"txt" | "md" | "rst" | "log" => "text/plain",
"json" => "application/json",
"xml" | "svg" => "application/xml",
"yaml" | "yml" => "application/yaml",
"toml" => "application/toml",
"csv" | "tsv" => "text/csv",
"html" | "htm" => "text/html",
"js" | "mjs" => "application/javascript",
"ts" => "text/plain",
"py" | "rb" | "sh" | "rs" | "go" | "java" | "c" | "cpp" | "h" => "text/plain",
"png" => "image/png",
"jpg" | "jpeg" => "image/jpeg",
"gif" => "image/gif",
"webp" => "image/webp",
"bmp" => "image/bmp",
"pdf" => "application/pdf",
"zip" => "application/zip",
"mp4" | "m4v" => "video/mp4",
"mov" => "video/quicktime",
"webm" => "video/webm",
"mkv" => "video/x-matroska",
"avi" => "video/x-msvideo",
"3gp" => "video/3gpp",
"flv" => "video/x-flv",
_ => "application/octet-stream",
}
}
pub fn is_video_mime(mime: &str) -> bool {
mime.to_lowercase().starts_with("video/")
}
fn is_video_vision_available(config: &Config) -> bool {
config.image.vision.enabled
&& config
.image
.vision
.api_key
.as_ref()
.is_some_and(|k| !k.is_empty())
}
fn save_to_temp(bytes: &[u8], filename: &str) -> Result<PathBuf, String> {
let home = dirs::home_dir().ok_or("No home directory found")?;
let tmp_dir = home.join(".opencrabs").join("tmp").join("files");
fs::create_dir_all(&tmp_dir).map_err(|e| format!("Failed to create temp dir: {e}"))?;
let safe_name = filename
.chars()
.filter(|c| c.is_alphanumeric() || *c == '.' || *c == '-' || *c == '_')
.collect::<String>();
let path = tmp_dir.join(format!("{}_{safe_name}", uuid::Uuid::new_v4()));
fs::write(&path, bytes).map_err(|e| format!("Failed to write temp file: {e}"))?;
Ok(path)
}
fn extract_pdf_text(bytes: &[u8], filename: &str) -> FileContent {
let raw = match pdf_extract::extract_text_from_mem(bytes) {
Ok(t) => t,
Err(e) => {
tracing::warn!("pdf_extract failed for {filename}: {e} — surfacing as unsupported");
return FileContent::Unsupported(format!(
"[File received: {filename} (PDF) — failed to extract text: {e}]"
));
}
};
let trimmed = raw.trim().to_string();
if trimmed.is_empty() {
return FileContent::Unsupported(format!(
"[File received: {filename} (PDF) — no extractable text found, may be image-based]"
));
}
let saved_path = save_to_temp(bytes, filename).ok();
let full_len = trimmed.chars().count();
let total_pages = trimmed.matches('\u{000C}').count() + 1;
let truncated_text = if full_len > PDF_TEXT_LIMIT {
let preview: String = trimmed.chars().take(PDF_TEXT_LIMIT).collect();
let preview_pages = preview.matches('\u{000C}').count() + 1;
let path_hint = saved_path
.as_ref()
.map(|p| {
format!(
"\n\n[Inline preview shows pages 1-{preview_pages} of {total_pages} (~{} of {} chars). \
Original PDF saved at: {}\n\
Call `parse_document(path='{}', pages=[{}, ...])` for the remaining pages.]",
PDF_TEXT_LIMIT,
full_len,
p.display(),
p.display(),
preview_pages + 1,
)
})
.unwrap_or_else(|| {
format!(
"\n\n[Inline preview truncated at {PDF_TEXT_LIMIT} of {full_len} chars; \
full PDF could not be saved to disk for `parse_document` follow-up.]"
)
});
format!("{preview}…{path_hint}")
} else if let Some(ref p) = saved_path {
format!(
"{trimmed}\n\n[Full PDF saved at: {} — call `parse_document(path='{}', pages=[N])` to re-query any specific page.]",
p.display(),
p.display()
)
} else {
trimmed
};
FileContent::Text(format!(
"[File: {filename} ({total_pages} pages)]\n```\n{truncated_text}\n```"
))
}
pub fn process_file_with_vision(
bytes: &[u8],
mime: &str,
filename: &str,
config: &Config,
) -> FileContent {
let effective = if mime.is_empty() || mime == "application/octet-stream" {
mime_from_ext(filename)
} else {
mime
};
let has_vision = is_vision_available(config);
if effective.starts_with("image/") {
if has_vision {
return match save_to_temp(bytes, filename) {
Ok(path) => FileContent::Image(path),
Err(e) => FileContent::Unsupported(format!(
"[Image attachment: {filename} — failed to save for vision: {e}]"
)),
};
}
return FileContent::Unsupported(format!(
"[Image attachment: {filename} — no vision model configured. \
Set `image.vision.enabled = true` with an API key, or add `vision_model` \
to your provider config in config.toml.]"
));
}
if is_video_mime(effective) {
if is_video_vision_available(config) {
return match save_to_temp(bytes, filename) {
Ok(path) => FileContent::Video(path),
Err(e) => FileContent::Unsupported(format!(
"[Video attachment: {filename} — failed to save for vision: {e}]"
)),
};
}
return FileContent::Unsupported(format!(
"[Video attachment: {filename} — no video-capable vision model configured. \
Set `image.vision.enabled = true` with a Gemini API key in config.toml. \
(Frame-fallback for non-Gemini providers is not yet wired.)]"
));
}
if effective == "application/pdf" {
return process_pdf_smart(bytes, filename, has_vision);
}
if effective == "application/zip" || effective == "application/x-zip-compressed" {
return extract_zip_contents(bytes, filename, config);
}
if is_text_mime(effective) {
let raw = String::from_utf8_lossy(bytes);
let truncated = if raw.len() > TEXT_LIMIT {
format!(
"{}…[truncated]",
raw.chars().take(TEXT_LIMIT).collect::<String>()
)
} else {
raw.into_owned()
};
return FileContent::Text(format!("[File: {filename}]\n```\n{truncated}\n```"));
}
FileContent::Unsupported(format!(
"[File received: {filename} ({effective}) — unsupported format]"
))
}
const PDF_TEXT_DENSITY_MIN_CHARS_PER_PAGE: usize = 100;
const PDF_TEXT_DENSITY_MIN_TOTAL: usize = 500;
fn process_pdf_smart(bytes: &[u8], filename: &str, has_vision: bool) -> FileContent {
let raw_text = pdf_extract::extract_text_from_mem(bytes).ok();
let trimmed = raw_text
.as_ref()
.map(|t| t.trim().to_string())
.unwrap_or_default();
let char_count = trimmed.chars().count();
let total_pages = if trimmed.is_empty() {
0
} else {
trimmed.matches('\u{000C}').count() + 1
};
let chars_per_page = char_count.checked_div(total_pages).unwrap_or(0);
let has_readable_text = char_count >= PDF_TEXT_DENSITY_MIN_TOTAL
&& chars_per_page >= PDF_TEXT_DENSITY_MIN_CHARS_PER_PAGE;
if has_readable_text {
tracing::debug!(
"PDF {filename}: {char_count} chars / {total_pages} pages \
({chars_per_page}/page) — using text path"
);
return extract_pdf_text(bytes, filename);
}
if !has_vision {
tracing::warn!(
"PDF {filename}: sparse text ({char_count} chars / {total_pages} pages) and no \
vision configured — surfacing as Unsupported"
);
return FileContent::Unsupported(format!(
"[File received: {filename} (PDF, {total_pages} pages) — only {char_count} chars extracted (~{chars_per_page}/page). \
Likely a scanned/image-based PDF that needs a vision model. \
Enable `[image.vision]` in config.toml or set `vision_model` on your provider.]"
));
}
tracing::info!(
"PDF {filename}: sparse text ({char_count} chars / {total_pages} pages) — \
rendering pages for lazy vision"
);
process_pdf_vision(bytes, filename)
}
fn process_pdf_vision(bytes: &[u8], filename: &str) -> FileContent {
let pdf_path = match save_to_temp(bytes, filename) {
Ok(p) => p,
Err(e) => {
return FileContent::Unsupported(format!(
"[PDF received: {filename} — failed to prepare: {e}]"
));
}
};
let rendered = super::pdf_vision::render_pdf_pages(
pdf_path.to_str().unwrap_or(""),
MAX_PDF_PAGES,
pdf_path.parent().map(|p| p.to_str().unwrap()).unwrap_or(""),
);
match rendered {
Ok(paths) if !paths.is_empty() => {
let page_count = paths.len();
let label = if page_count == 1 {
"scanned PDF".to_string()
} else {
format!("scanned {page_count}-page PDF")
};
FileContent::PdfPages { paths, label }
}
other => {
let render_err = match other {
Ok(_) => "renderer produced no output".to_string(),
Err(e) => e,
};
tracing::warn!(
"PDF {filename}: vision render failed: {render_err}; text path also empty"
);
FileContent::Unsupported(format!(
"[File received: {filename} (PDF) — neither text extraction nor vision render \
produced content. Vision error: {render_err}]"
))
}
}
}
pub fn inject_file_content(content: &FileContent) -> (String, bool) {
match content {
FileContent::Image(path) => {
let path_str = path.to_string_lossy();
(
format!(
"[User attached an image. Call analyze_image with this path to view it. If the user asks to edit, modify, replace elements, or restyle the image, call generate_image with this path as the 'image' parameter instead.]\n<<IMG:{path_str}>>"
),
true,
)
}
FileContent::PdfPages { paths, label } => {
let path_list: String = paths
.iter()
.enumerate()
.map(|(i, p)| format!("- Page {}: {}", i + 1, p.to_string_lossy()))
.collect::<Vec<_>>()
.join("\n");
(
format!(
"[User attached a {label} ({n} page(s)). No extractable text — pages were \
rendered as images. Call `analyze_image(image='<path>', question='...')` \
ONE PAGE AT A TIME as you need content. Do NOT try to read all pages in \
one turn — providers cap request body size and bundling fails.]\n{path_list}",
n = paths.len(),
),
false,
)
}
FileContent::Video(path) => {
let path_str = path.to_string_lossy();
(
format!(
"[User attached a video. Call analyze_video with this path to view it. \
analyze_video accepts an optional `question` arg — pass the user's actual \
question if they asked something specific, otherwise it defaults to a \
general description.]\n<<VID:{path_str}>>"
),
true,
)
}
FileContent::Text(text) => (text.clone(), false),
FileContent::Unsupported(note) => (note.clone(), false),
}
}
pub fn classify_file(bytes: &[u8], mime: &str, filename: &str) -> FileContent {
let effective = if mime.is_empty() || mime == "application/octet-stream" {
mime_from_ext(filename)
} else {
mime
};
if effective.starts_with("image/") {
return FileContent::Image(PathBuf::new());
}
if effective == "application/pdf" {
return extract_pdf_text(bytes, filename);
}
if is_text_mime(effective) {
let raw = String::from_utf8_lossy(bytes);
let truncated = if raw.len() > TEXT_LIMIT {
format!(
"{}…[truncated]",
raw.chars().take(TEXT_LIMIT).collect::<String>()
)
} else {
raw.into_owned()
};
return FileContent::Text(format!("[File: {filename}]\n```\n{truncated}\n```"));
}
FileContent::Unsupported(format!(
"[File received: {filename} ({effective}) — unsupported format]"
))
}
fn extract_zip_contents(bytes: &[u8], archive_name: &str, config: &Config) -> FileContent {
use std::io::Read as _;
let reader = std::io::Cursor::new(bytes);
let mut archive = match zip::ZipArchive::new(reader) {
Ok(a) => a,
Err(e) => {
return FileContent::Unsupported(format!(
"[ZIP archive: {archive_name} — failed to open: {e}]"
));
}
};
let mut parts: Vec<String> = Vec::new();
let file_count = archive.len();
for i in 0..file_count {
let mut file = match archive.by_index(i) {
Ok(f) => f,
Err(e) => {
parts.push(format!("[Error reading entry {i}: {e}]"));
continue;
}
};
let name = file.name().to_string();
if file.is_dir() {
continue;
}
let basename = name.rsplit('/').next().unwrap_or(&name);
if basename.starts_with('.') || basename.starts_with("__MACOSX") {
continue;
}
let mut buf = Vec::new();
if let Err(e) = file.read_to_end(&mut buf) {
parts.push(format!("[{name} — read error: {e}]"));
continue;
}
if buf.len() > 10 * 1024 * 1024 {
parts.push(format!(
"[{name} — skipped, too large ({}MB)]",
buf.len() / 1024 / 1024
));
continue;
}
let entry_mime = mime_from_ext(&name);
let content = process_file_with_vision(&buf, entry_mime, &name, config);
match content {
FileContent::Text(t) => parts.push(t),
FileContent::Image(path) => {
parts.push(format!("<<IMG:{}>>", path.display()));
}
FileContent::Video(path) => {
parts.push(format!("<<VID:{}>>", path.display()));
}
FileContent::PdfPages { paths, label } => {
let path_list: String = paths
.iter()
.enumerate()
.map(|(i, p)| format!("- Page {}: {}", i + 1, p.display()))
.collect::<Vec<_>>()
.join("\n");
parts.push(format!(
"[{label} from zip] Call `analyze_image` per page as needed:\n{path_list}"
));
}
FileContent::Unsupported(msg) => {
parts.push(msg);
}
}
if parts.len() >= 50 {
parts.push(format!(
"[... and {} more files truncated]",
file_count - i - 1
));
break;
}
}
if parts.is_empty() {
return FileContent::Unsupported(format!(
"[ZIP archive: {archive_name} — empty or no processable files]"
));
}
let combined = if file_count == 1 {
parts.into_iter().next().unwrap()
} else {
format!(
"[ZIP archive: {archive_name} — {file_count} files]
{}",
parts.join(
"
---
"
)
)
};
FileContent::Text(combined)
}