pub mod escaping;
mod markdown;
mod toon;
mod xml;
use crate::repomap::RepoMap;
use crate::types::{Repository, TokenizerModel};
use std::io::{self, Write};
pub use markdown::MarkdownFormatter;
pub use toon::ToonFormatter;
pub use xml::XmlFormatter;
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub enum OutputFormat {
#[default]
Xml,
Markdown,
Json,
Yaml,
Toon,
Plain,
}
pub trait Formatter {
#[must_use]
fn format(&self, repo: &Repository, map: &RepoMap) -> String;
#[must_use]
fn format_repo(&self, repo: &Repository) -> String;
fn name(&self) -> &'static str;
}
pub trait StreamingFormatter {
fn format_to_writer<W: Write>(
&self,
repo: &Repository,
map: &RepoMap,
writer: &mut W,
) -> io::Result<()>;
fn format_repo_to_writer<W: Write>(&self, repo: &Repository, writer: &mut W) -> io::Result<()>;
}
pub struct OutputFormatter;
impl OutputFormatter {
pub fn claude() -> XmlFormatter {
XmlFormatter::new(true).with_model(TokenizerModel::Claude)
}
pub fn gpt() -> MarkdownFormatter {
MarkdownFormatter::new().with_model(TokenizerModel::Claude)
}
pub fn json() -> JsonFormatter {
JsonFormatter
}
pub fn gemini() -> YamlFormatter {
YamlFormatter::new(TokenizerModel::Gemini)
}
pub fn by_format(format: OutputFormat) -> Box<dyn Formatter> {
Self::by_format_with_options(format, true)
}
pub fn by_format_with_options(format: OutputFormat, line_numbers: bool) -> Box<dyn Formatter> {
Self::by_format_with_all_options(format, line_numbers, true)
}
pub fn by_format_with_all_options(
format: OutputFormat,
line_numbers: bool,
show_file_index: bool,
) -> Box<dyn Formatter> {
let model = Self::default_model_for_format(format);
Self::by_format_with_all_options_and_model(format, line_numbers, show_file_index, model)
}
pub fn by_format_with_model(format: OutputFormat, model: TokenizerModel) -> Box<dyn Formatter> {
Self::by_format_with_all_options_and_model(format, true, true, model)
}
pub fn by_format_with_all_options_and_model(
format: OutputFormat,
line_numbers: bool,
show_file_index: bool,
model: TokenizerModel,
) -> Box<dyn Formatter> {
match format {
OutputFormat::Xml => Box::new(
XmlFormatter::new(true)
.with_line_numbers(line_numbers)
.with_file_index(show_file_index)
.with_model(model),
),
OutputFormat::Markdown => Box::new(
MarkdownFormatter::new()
.with_line_numbers(line_numbers)
.with_model(model),
),
OutputFormat::Json => Box::new(JsonFormatter),
OutputFormat::Yaml => Box::new(YamlFormatter::new(model)),
OutputFormat::Toon => Box::new(
ToonFormatter::new()
.with_line_numbers(line_numbers)
.with_file_index(show_file_index)
.with_model(model),
),
OutputFormat::Plain => Box::new(
PlainFormatter::new()
.with_line_numbers(line_numbers)
.with_model(model),
),
}
}
pub fn toon() -> ToonFormatter {
ToonFormatter::new().with_model(TokenizerModel::Claude)
}
fn default_model_for_format(format: OutputFormat) -> TokenizerModel {
match format {
OutputFormat::Yaml => TokenizerModel::Gemini,
_ => TokenizerModel::Claude,
}
}
}
pub struct JsonFormatter;
#[derive(serde::Serialize)]
struct JsonOutput<'a> {
repository: &'a Repository,
map: &'a RepoMap,
}
#[derive(serde::Serialize)]
struct JsonRepoOutput<'a> {
repository: &'a Repository,
}
impl Formatter for JsonFormatter {
fn format(&self, repo: &Repository, map: &RepoMap) -> String {
serde_json::to_string_pretty(&JsonOutput { repository: repo, map }).unwrap_or_default()
}
fn format_repo(&self, repo: &Repository) -> String {
serde_json::to_string_pretty(&JsonRepoOutput { repository: repo }).unwrap_or_default()
}
fn name(&self) -> &'static str {
"json"
}
}
pub struct PlainFormatter {
include_line_numbers: bool,
token_model: TokenizerModel,
}
impl PlainFormatter {
pub fn new() -> Self {
Self { include_line_numbers: true, token_model: TokenizerModel::Claude }
}
pub fn with_line_numbers(mut self, enabled: bool) -> Self {
self.include_line_numbers = enabled;
self
}
pub fn with_model(mut self, model: TokenizerModel) -> Self {
self.token_model = model;
self
}
}
impl Default for PlainFormatter {
fn default() -> Self {
Self::new()
}
}
impl Formatter for PlainFormatter {
fn format(&self, repo: &Repository, map: &RepoMap) -> String {
let mut output = String::new();
output.push_str(&format!("Repository: {}\n", repo.name));
output.push_str(&format!(
"Files: {} | Lines: {} | Tokens: {}\n",
repo.metadata.total_files,
repo.metadata.total_lines,
repo.metadata.total_tokens.get(self.token_model)
));
output.push_str(&"=".repeat(60));
output.push('\n');
output.push('\n');
output.push_str("REPOSITORY MAP\n");
output.push_str(&"-".repeat(40));
output.push('\n');
output.push_str(&map.summary);
output.push_str("\n\n");
if let Some(structure) = &repo.metadata.directory_structure {
output.push_str("DIRECTORY STRUCTURE\n");
output.push_str(&"-".repeat(40));
output.push('\n');
output.push_str(structure);
output.push_str("\n\n");
}
output.push_str("FILES\n");
output.push_str(&"=".repeat(60));
output.push('\n');
for file in &repo.files {
output.push('\n');
output.push_str(&format!("File: {}\n", file.relative_path));
if let Some(lang) = &file.language {
output.push_str(&format!("Language: {}\n", lang));
}
output.push_str(&format!("Tokens: {}\n", file.token_count.get(self.token_model)));
output.push_str(&"-".repeat(40));
output.push('\n');
if let Some(content) = &file.content {
if self.include_line_numbers {
for (i, line) in content.lines().enumerate() {
output.push_str(&format!("{:4} {}\n", i + 1, line));
}
} else {
output.push_str(content);
if !content.ends_with('\n') {
output.push('\n');
}
}
}
output.push_str(&"-".repeat(40));
output.push('\n');
}
output
}
fn format_repo(&self, repo: &Repository) -> String {
let mut output = String::new();
for file in &repo.files {
output.push_str(&format!("=== {} ===\n", file.relative_path));
if let Some(content) = &file.content {
if self.include_line_numbers {
for (i, line) in content.lines().enumerate() {
output.push_str(&format!("{:4} {}\n", i + 1, line));
}
} else {
output.push_str(content);
if !content.ends_with('\n') {
output.push('\n');
}
}
}
output.push('\n');
}
output
}
fn name(&self) -> &'static str {
"plain"
}
}
pub struct YamlFormatter {
token_model: TokenizerModel,
}
impl YamlFormatter {
pub fn new(model: TokenizerModel) -> Self {
Self { token_model: model }
}
}
impl Formatter for YamlFormatter {
fn format(&self, repo: &Repository, map: &RepoMap) -> String {
let mut output = String::new();
output.push_str("---\n");
output.push_str("# Repository Context for Gemini\n");
output.push_str("# Note: Query should be at the END of this context\n\n");
output.push_str("metadata:\n");
output.push_str(&format!(" name: {}\n", repo.name));
output.push_str(&format!(" files: {}\n", repo.metadata.total_files));
output.push_str(&format!(" lines: {}\n", repo.metadata.total_lines));
output
.push_str(&format!(" tokens: {}\n", repo.metadata.total_tokens.get(self.token_model)));
output.push('\n');
output.push_str("languages:\n");
for lang in &repo.metadata.languages {
output.push_str(&format!(
" - name: {}\n files: {}\n percentage: {:.1}%\n",
lang.language, lang.files, lang.percentage
));
}
output.push('\n');
output.push_str("repository_map:\n");
output.push_str(&format!(" summary: |\n {}\n", map.summary.replace('\n', "\n ")));
output.push_str(" key_symbols:\n");
for sym in &map.key_symbols {
output.push_str(&format!(
" - name: {}\n type: {}\n file: {}\n rank: {}\n",
sym.name, sym.kind, sym.file, sym.rank
));
if let Some(ref summary) = sym.summary {
output.push_str(&format!(" summary: {}\n", summary));
}
}
output.push('\n');
output.push_str("files:\n");
for file in &repo.files {
output.push_str(&format!(" - path: {}\n", file.relative_path));
if let Some(lang) = &file.language {
output.push_str(&format!(" language: {}\n", lang));
}
output.push_str(&format!(" tokens: {}\n", file.token_count.get(self.token_model)));
if let Some(content) = &file.content {
output.push_str(" content: |\n");
for line in content.lines() {
output.push_str(&format!(" {}\n", line));
}
}
}
output.push_str("\n# --- INSERT YOUR QUERY BELOW THIS LINE ---\n");
output.push_str("query: |\n");
output.push_str(" [Your question about this repository]\n");
output
}
fn format_repo(&self, repo: &Repository) -> String {
serde_yaml::to_string(repo).unwrap_or_default()
}
fn name(&self) -> &'static str {
"yaml"
}
}
#[cfg(test)]
#[allow(clippy::str_to_string)]
mod tests {
use super::*;
use crate::repomap::RepoMapGenerator;
use crate::types::{LanguageStats, RepoFile, RepoMetadata, TokenCounts};
fn create_test_repo() -> Repository {
Repository {
name: "test".to_string(),
path: "/tmp/test".into(),
files: vec![RepoFile {
path: "/tmp/test/main.py".into(),
relative_path: "main.py".to_string(),
language: Some("python".to_string()),
size_bytes: 100,
token_count: TokenCounts {
o200k: 48,
cl100k: 49,
claude: 50,
gemini: 47,
llama: 46,
mistral: 46,
deepseek: 46,
qwen: 46,
cohere: 47,
grok: 46,
},
symbols: Vec::new(),
importance: 0.8,
content: Some("def main():\n print('hello')".to_string()),
}],
metadata: RepoMetadata {
total_files: 1,
total_lines: 2,
total_tokens: TokenCounts {
o200k: 48,
cl100k: 49,
claude: 50,
gemini: 47,
llama: 46,
mistral: 46,
deepseek: 46,
qwen: 46,
cohere: 47,
grok: 46,
},
languages: vec![LanguageStats {
language: "Python".to_string(),
files: 1,
lines: 2,
percentage: 100.0,
}],
framework: None,
description: None,
branch: None,
commit: None,
directory_structure: Some("main.py\n".to_string()),
external_dependencies: vec!["requests".to_string()],
git_history: None,
},
}
}
#[test]
fn test_json_formatter() {
let repo = create_test_repo();
let map = RepoMapGenerator::new(1000).generate(&repo);
let formatter = OutputFormatter::json();
let output = formatter.format(&repo, &map);
assert!(output.contains("\"name\": \"test\""));
assert!(output.contains("\"files\""));
}
#[test]
fn test_yaml_formatter() {
let repo = create_test_repo();
let map = RepoMapGenerator::new(1000).generate(&repo);
let formatter = OutputFormatter::gemini();
let output = formatter.format(&repo, &map);
assert!(output.contains("name: test"));
assert!(output.contains("# --- INSERT YOUR QUERY"));
}
}