use std::path::Path;
use crate::error::{Error, Result};
use crate::index::parse::DocumentFormat;
const LARGE_FILE_THRESHOLD: usize = 100 * 1024 * 1024;
#[derive(Debug, Clone)]
pub struct SourceValidation {
pub valid: bool,
pub errors: Vec<String>,
pub warnings: Vec<String>,
}
impl SourceValidation {
fn valid() -> Self {
Self {
valid: true,
errors: vec![],
warnings: vec![],
}
}
fn invalid(errors: Vec<String>) -> Self {
Self {
valid: false,
errors,
warnings: vec![],
}
}
fn with_warnings(mut self, warnings: Vec<String>) -> Self {
self.warnings = warnings;
self
}
}
pub fn validate_file(path: &Path) -> Result<SourceValidation> {
if !path.exists() {
return Ok(SourceValidation::invalid(vec![format!(
"File not found: {}",
path.display()
)]));
}
let metadata = std::fs::metadata(path)
.map_err(|e| Error::Parse(format!("Cannot read file metadata: {}", e)))?;
let size = metadata.len() as usize;
let mut warnings = Vec::new();
if size > LARGE_FILE_THRESHOLD {
warnings.push(format!(
"Large file ({}MB) may take longer to index",
size / (1024 * 1024)
));
}
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
if DocumentFormat::from_extension(ext).is_none() {
return Ok(
SourceValidation::invalid(vec![format!("Unsupported format: .{}", ext)])
.with_warnings(warnings),
);
}
Ok(SourceValidation::valid().with_warnings(warnings))
}
pub fn validate_content(content: &str, _format: DocumentFormat) -> SourceValidation {
let mut errors = Vec::new();
if content.trim().is_empty() {
errors.push("Content is empty".to_string());
}
if errors.is_empty() {
SourceValidation::valid()
} else {
SourceValidation::invalid(errors)
}
}
pub fn validate_bytes(data: &[u8], format: DocumentFormat) -> SourceValidation {
let mut errors = Vec::new();
if data.is_empty() {
errors.push("Byte data is empty".to_string());
}
if format == DocumentFormat::Pdf && !data.is_empty() {
if !data.starts_with(b"%PDF") {
errors.push("Data does not appear to be a valid PDF (missing %PDF header)".to_string());
}
}
if errors.is_empty() {
SourceValidation::valid()
} else {
SourceValidation::invalid(errors)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_validate_file_missing() {
let result = validate_file(Path::new("./nonexistent.md")).unwrap();
assert!(!result.valid);
assert!(result.errors[0].contains("not found"));
}
#[test]
fn test_validate_file_unsupported_format() {
let tmp = std::env::temp_dir().join("vectorless_test_validate.dat");
std::fs::write(&tmp, b"data").unwrap();
let result = validate_file(&tmp).unwrap();
assert!(!result.valid);
assert!(result.errors[0].contains("Unsupported"));
let _ = std::fs::remove_file(&tmp);
}
#[test]
fn test_validate_file_valid() {
let tmp = std::env::temp_dir().join("vectorless_test_validate.md");
std::fs::write(&tmp, b"# Hello").unwrap();
let result = validate_file(&tmp).unwrap();
assert!(result.valid);
assert!(result.errors.is_empty());
let _ = std::fs::remove_file(&tmp);
}
#[test]
fn test_validate_content_empty() {
let result = validate_content(" \n ", DocumentFormat::Markdown);
assert!(!result.valid);
assert!(result.errors[0].contains("empty"));
}
#[test]
fn test_validate_content_valid() {
let result = validate_content("# Hello", DocumentFormat::Markdown);
assert!(result.valid);
}
#[test]
fn test_validate_bytes_empty() {
let result = validate_bytes(&[], DocumentFormat::Pdf);
assert!(!result.valid);
assert!(result.errors[0].contains("empty"));
}
#[test]
fn test_validate_bytes_invalid_pdf() {
let result = validate_bytes(b"not a pdf", DocumentFormat::Pdf);
assert!(!result.valid);
assert!(result.errors[0].contains("PDF"));
}
#[test]
fn test_validate_bytes_valid_pdf() {
let result = validate_bytes(b"%PDF-1.4 some content", DocumentFormat::Pdf);
assert!(result.valid);
}
#[test]
fn test_validate_bytes_valid_markdown() {
let result = validate_bytes(b"# Hello", DocumentFormat::Markdown);
assert!(result.valid);
}
}