use crate::summary_item::SummaryItem;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileInfo {
pub lines: Vec<String>,
pub original_lines: Option<Vec<String>>,
pub total_lines: usize,
pub total_lines_exact: bool,
pub total_bytes: usize,
pub truncated: bool,
pub truncated_by_lines: bool,
pub truncated_by_bytes: bool,
pub truncated_by_context: bool,
pub language: Option<String>,
pub encoding: String,
pub syntax_errors: Vec<String>,
pub tokens: Option<Vec<String>>,
pub token_total: Option<usize>,
pub summary_lines: Option<Vec<SummaryItem>>,
pub file_hash: Option<String>,
pub estimated_llm_tokens: Option<u64>,
pub token_model: Option<String>,
pub compression_ratio: Option<f64>,
}
impl FileInfo {
pub fn new() -> Self {
Self {
lines: Vec::new(),
original_lines: None,
total_lines: 0,
total_lines_exact: true,
total_bytes: 0,
truncated: false,
truncated_by_lines: false,
truncated_by_bytes: false,
truncated_by_context: false,
language: None,
encoding: "UTF-8".to_string(),
syntax_errors: Vec::new(),
tokens: None,
token_total: None,
summary_lines: None,
file_hash: None,
estimated_llm_tokens: None,
token_model: None,
compression_ratio: None,
}
}
pub fn with_metadata(
total_lines: usize,
total_bytes: usize,
language: Option<String>,
encoding: String,
) -> Self {
Self {
lines: Vec::new(),
original_lines: None,
total_lines,
total_lines_exact: true,
total_bytes,
truncated: false,
truncated_by_lines: false,
truncated_by_bytes: false,
truncated_by_context: false,
language,
encoding,
syntax_errors: Vec::new(),
tokens: None,
token_total: None,
summary_lines: None,
file_hash: None,
estimated_llm_tokens: None,
token_model: None,
compression_ratio: None,
}
}
pub fn with_lines(mut self, lines: Vec<String>) -> Self {
self.lines = lines;
self
}
pub fn with_truncation(mut self, truncated: bool, by_lines: bool, by_bytes: bool) -> Self {
self.truncated = truncated;
self.truncated_by_lines = by_lines;
self.truncated_by_bytes = by_bytes;
self
}
pub fn with_context_truncation(mut self, truncated: bool) -> Self {
self.truncated = truncated;
self.truncated_by_context = truncated;
self
}
pub fn add_syntax_error(&mut self, error: String) {
self.syntax_errors.push(error);
}
pub fn with_tokens(mut self, tokens: Option<Vec<String>>) -> Self {
self.tokens = tokens;
self
}
pub fn with_token_total(mut self, total: Option<usize>) -> Self {
self.token_total = total;
self
}
pub fn with_file_hash(mut self, hash: Option<String>) -> Self {
self.file_hash = hash;
self
}
pub fn with_estimated_llm_tokens(mut self, tokens: Option<u64>, model: Option<String>) -> Self {
self.estimated_llm_tokens = tokens;
self.token_model = model;
self
}
pub fn with_compression_ratio(mut self, ratio: Option<f64>) -> Self {
self.compression_ratio = ratio;
self
}
pub fn with_summary_lines(mut self, summary_lines: Option<Vec<SummaryItem>>) -> Self {
self.summary_lines = summary_lines;
self
}
pub fn with_original_lines(mut self, original: Option<Vec<String>>) -> Self {
self.original_lines = original;
self
}
pub fn with_total_lines_exact(mut self, exact: bool) -> Self {
self.total_lines_exact = exact;
self
}
pub fn is_success(&self) -> bool {
self.syntax_errors.is_empty()
}
pub fn processed_lines(&self) -> usize {
self.lines.len()
}
pub fn processing_ratio(&self) -> f64 {
if self.total_lines == 0 {
1.0
} else {
self.processed_lines() as f64 / self.total_lines as f64
}
}
pub fn has_tokens(&self) -> bool {
self.tokens.as_ref().is_some_and(|t| !t.is_empty())
}
pub fn has_summary(&self) -> bool {
self.summary_lines.as_ref().is_some_and(|s| !s.is_empty())
}
pub fn token_count(&self) -> usize {
self.token_total
.or_else(|| self.tokens.as_ref().map(Vec::len))
.unwrap_or(0)
}
pub fn tokens_truncated(&self) -> bool {
matches!(
(&self.token_total, &self.tokens),
(Some(total), Some(tokens)) if *total > tokens.len()
)
}
pub fn summary_line_count(&self) -> usize {
self.summary_lines.as_ref().map_or(0, Vec::len)
}
pub fn truncation_reason(&self) -> Option<String> {
if !self.truncated {
return None;
}
let mut reasons = Vec::new();
if self.truncated_by_lines {
reasons.push("line limit");
}
if self.truncated_by_bytes {
reasons.push("byte limit");
}
if self.truncated_by_context {
reasons.push("context fitting");
}
if reasons.is_empty() {
Some("unknown reason".to_string())
} else {
Some(reasons.join(" and "))
}
}
pub fn get_stats_summary(&self) -> ProcessingStats {
ProcessingStats {
total_lines: self.total_lines,
total_lines_exact: self.total_lines_exact,
processed_lines: self.processed_lines(),
total_bytes: self.total_bytes,
truncated: self.truncated,
truncation_reason: self.truncation_reason(),
has_syntax_errors: !self.syntax_errors.is_empty(),
error_count: self.syntax_errors.len(),
language: self.language.clone(),
encoding: self.encoding.clone(),
token_count: self.token_count(),
tokens_truncated: self.tokens_truncated(),
summary_line_count: self.summary_line_count(),
}
}
}
impl Default for FileInfo {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProcessingStats {
pub total_lines: usize,
pub total_lines_exact: bool,
pub processed_lines: usize,
pub total_bytes: usize,
pub truncated: bool,
pub truncation_reason: Option<String>,
pub has_syntax_errors: bool,
pub error_count: usize,
pub language: Option<String>,
pub encoding: String,
pub token_count: usize,
pub tokens_truncated: bool,
pub summary_line_count: usize,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_new_file_info() {
let info = FileInfo::new();
assert_eq!(info.lines.len(), 0);
assert!(info.original_lines.is_none());
assert_eq!(info.total_lines, 0);
assert!(info.total_lines_exact);
assert_eq!(info.total_bytes, 0);
assert!(!info.truncated);
assert!(!info.truncated_by_lines);
assert!(!info.truncated_by_bytes);
assert!(!info.truncated_by_context);
assert_eq!(info.language, None);
assert_eq!(info.encoding, "UTF-8");
assert_eq!(info.syntax_errors.len(), 0);
assert_eq!(info.tokens, None);
assert!(info.token_total.is_none());
assert_eq!(info.summary_lines, None);
}
#[test]
fn test_with_metadata() {
let info =
FileInfo::with_metadata(100, 1024, Some("rust".to_string()), "UTF-8".to_string());
assert_eq!(info.total_lines, 100);
assert!(info.total_lines_exact);
assert_eq!(info.total_bytes, 1024);
assert_eq!(info.language, Some("rust".to_string()));
assert_eq!(info.encoding, "UTF-8");
}
#[test]
fn test_builder_pattern() {
use crate::summary_item::SummaryItem;
let lines = vec!["line1".to_string(), "line2".to_string()];
let tokens = vec!["token1".to_string(), "token2".to_string()];
let summary = vec![SummaryItem::new("fn main()", 1, Some(3), "function")];
let info = FileInfo::new()
.with_lines(lines.clone())
.with_truncation(true, true, false)
.with_original_lines(Some(lines.clone()))
.with_total_lines_exact(false)
.with_tokens(Some(tokens.clone()))
.with_summary_lines(Some(summary.clone()));
assert_eq!(info.lines, lines);
assert!(info.truncated);
assert!(info.truncated_by_lines);
assert!(!info.truncated_by_bytes);
assert_eq!(info.tokens, Some(tokens));
assert!(!info.total_lines_exact);
assert_eq!(info.summary_lines.as_ref().map(Vec::len), Some(1));
}
#[test]
#[allow(clippy::float_cmp)]
fn test_processing_ratio() {
let mut info = FileInfo::new();
info.total_lines = 100;
info.lines = vec!["line".to_string(); 50];
assert_eq!(info.processing_ratio(), 0.5);
info.total_lines = 0;
assert_eq!(info.processing_ratio(), 1.0);
}
#[test]
fn test_truncation_reason() {
let mut info = FileInfo::new();
assert_eq!(info.truncation_reason(), None);
info.truncated = true;
info.truncated_by_lines = true;
assert_eq!(info.truncation_reason(), Some("line limit".to_string()));
info.truncated_by_bytes = true;
assert_eq!(
info.truncation_reason(),
Some("line limit and byte limit".to_string())
);
info.truncated_by_lines = false;
assert_eq!(info.truncation_reason(), Some("byte limit".to_string()));
}
#[test]
fn test_helper_methods() {
let mut info = FileInfo::new();
assert!(info.is_success());
info.add_syntax_error("test error".to_string());
assert!(!info.is_success());
assert!(!info.has_tokens());
assert!(!info.has_summary());
assert_eq!(info.token_count(), 0);
assert_eq!(info.summary_line_count(), 0);
info.tokens = Some(vec!["token".to_string()]);
info.token_total = Some(5);
info.summary_lines = Some(vec![SummaryItem::new("summary", 1, None, "other")]);
assert!(info.has_tokens());
assert!(info.tokens_truncated());
assert!(info.has_summary());
assert_eq!(info.token_count(), 5);
assert_eq!(info.summary_line_count(), 1);
}
#[test]
fn test_stats_summary() {
let mut info =
FileInfo::with_metadata(100, 2048, Some("rust".to_string()), "UTF-8".to_string());
info.lines = vec!["line".to_string(); 50];
info.truncated = true;
info.truncated_by_lines = true;
info.total_lines_exact = false;
info.add_syntax_error("test error".to_string());
info.tokens = Some(vec!["token1".to_string(), "token2".to_string()]);
info.token_total = Some(5);
let stats = info.get_stats_summary();
assert_eq!(stats.total_lines, 100);
assert!(!stats.total_lines_exact);
assert_eq!(stats.processed_lines, 50);
assert_eq!(stats.total_bytes, 2048);
assert!(stats.truncated);
assert_eq!(stats.truncation_reason, Some("line limit".to_string()));
assert!(stats.has_syntax_errors);
assert_eq!(stats.error_count, 1);
assert_eq!(stats.language, Some("rust".to_string()));
assert_eq!(stats.encoding, "UTF-8");
assert_eq!(stats.token_count, 5);
assert!(stats.tokens_truncated);
assert_eq!(stats.summary_line_count, 0);
}
}