use crate::output::{Formatter, StreamingFormatter};
use crate::repomap::RepoMap;
use crate::types::{Repository, TokenizerModel};
use std::io::{self, Write};
pub struct ToonFormatter {
include_line_numbers: bool,
use_tabular: bool,
show_file_index: bool,
token_model: TokenizerModel,
}
impl ToonFormatter {
pub fn new() -> Self {
Self {
include_line_numbers: true,
use_tabular: true,
show_file_index: true,
token_model: TokenizerModel::Claude,
}
}
pub fn with_line_numbers(mut self, enabled: bool) -> Self {
self.include_line_numbers = enabled;
self
}
pub fn with_tabular(mut self, enabled: bool) -> Self {
self.use_tabular = enabled;
self
}
pub fn with_file_index(mut self, enabled: bool) -> Self {
self.show_file_index = enabled;
self
}
pub fn with_model(mut self, model: TokenizerModel) -> Self {
self.token_model = model;
self
}
fn estimate_output_size(repo: &Repository) -> usize {
let base = 500;
let files = repo.files.len() * 300;
let content: usize = repo
.files
.iter()
.filter_map(|f| f.content.as_ref())
.map(|c| c.len())
.sum();
base + files + content
}
fn stream_metadata<W: Write>(&self, w: &mut W, repo: &Repository) -> io::Result<()> {
writeln!(w, "metadata:")?;
writeln!(w, " name: {}", repo.name)?;
writeln!(w, " files: {}", repo.metadata.total_files)?;
writeln!(w, " lines: {}", repo.metadata.total_lines)?;
writeln!(w, " tokens: {}", repo.metadata.total_tokens.get(self.token_model))?;
if let Some(ref desc) = repo.metadata.description {
writeln!(w, " description: {}", escape_toon(desc))?;
}
if let Some(ref branch) = repo.metadata.branch {
writeln!(w, " branch: {}", branch)?;
}
if let Some(ref commit) = repo.metadata.commit {
writeln!(w, " commit: {}", commit)?;
}
writeln!(w)
}
fn stream_languages<W: Write>(&self, w: &mut W, repo: &Repository) -> io::Result<()> {
if repo.metadata.languages.is_empty() {
return Ok(());
}
let count = repo.metadata.languages.len();
writeln!(w, "languages[{}]{{name,files,percentage}}:", count)?;
for lang in &repo.metadata.languages {
writeln!(w, " {},{},{:.1}", lang.language, lang.files, lang.percentage)?;
}
writeln!(w)
}
fn stream_directory_structure<W: Write>(&self, w: &mut W, repo: &Repository) -> io::Result<()> {
if let Some(ref structure) = repo.metadata.directory_structure {
writeln!(w, "directory_structure: |")?;
for line in structure.lines() {
writeln!(w, " {}", line)?;
}
writeln!(w)?;
}
Ok(())
}
fn stream_dependencies<W: Write>(&self, w: &mut W, repo: &Repository) -> io::Result<()> {
if repo.metadata.external_dependencies.is_empty() {
return Ok(());
}
let count = repo.metadata.external_dependencies.len();
writeln!(w, "dependencies[{}]:", count)?;
for dep in &repo.metadata.external_dependencies {
writeln!(w, " {}", escape_toon(dep))?;
}
writeln!(w)
}
fn stream_repomap<W: Write>(&self, w: &mut W, map: &RepoMap) -> io::Result<()> {
writeln!(w, "repository_map:")?;
writeln!(w, " token_budget: {}", map.token_count)?;
writeln!(w, " summary: |")?;
for line in map.summary.lines() {
writeln!(w, " {}", line)?;
}
if !map.key_symbols.is_empty() {
let count = map.key_symbols.len();
writeln!(w, " symbols[{}]{{name,type,file,line,rank,summary}}:", count)?;
for sym in &map.key_symbols {
writeln!(
w,
" {},{},{},{},{},{}",
escape_toon(&sym.name),
escape_toon(&sym.kind),
escape_toon(&sym.file),
sym.line,
sym.rank,
escape_toon(sym.summary.as_deref().unwrap_or(""))
)?;
}
}
if !map.module_graph.nodes.is_empty() {
let count = map.module_graph.nodes.len();
writeln!(w, " modules[{}]{{name,files,tokens}}:", count)?;
for module in &map.module_graph.nodes {
writeln!(
w,
" {},{},{}",
escape_toon(&module.name),
module.files,
module.tokens
)?;
}
}
writeln!(w)
}
fn stream_file_index<W: Write>(&self, w: &mut W, repo: &Repository) -> io::Result<()> {
if repo.files.is_empty() {
return Ok(());
}
let count = repo.files.len();
writeln!(w, "file_index[{}]{{path,tokens,importance}}:", count)?;
for file in &repo.files {
let importance = if file.importance > 0.8 {
"critical"
} else if file.importance > 0.6 {
"high"
} else if file.importance > 0.3 {
"normal"
} else {
"low"
};
writeln!(
w,
" {},{},{}",
escape_toon(&file.relative_path),
file.token_count.get(self.token_model),
importance
)?;
}
writeln!(w)
}
fn stream_files<W: Write>(&self, w: &mut W, repo: &Repository) -> io::Result<()> {
writeln!(w, "files:")?;
for file in &repo.files {
if let Some(ref content) = file.content {
let lang = file.language.as_deref().unwrap_or("?");
writeln!(
w,
"- {}|{}|{}:",
escape_toon(&file.relative_path),
lang,
file.token_count.get(self.token_model)
)?;
if self.include_line_numbers {
let first_line = content.lines().next().unwrap_or("");
let has_embedded_line_nums = first_line.contains(':')
&& first_line
.split(':')
.next()
.is_some_and(|s| s.parse::<u32>().is_ok());
if has_embedded_line_nums {
for line in content.lines() {
if let Some((num_str, rest)) = line.split_once(':') {
if let Ok(line_num) = num_str.parse::<u32>() {
writeln!(w, " {}:{}", line_num, rest)?;
} else {
writeln!(w, " {}", line)?;
}
} else {
writeln!(w, " {}", line)?;
}
}
} else {
for (i, line) in content.lines().enumerate() {
writeln!(w, " {}:{}", i + 1, line)?;
}
}
} else {
for line in content.lines() {
writeln!(w, " {}", line)?;
}
}
}
}
Ok(())
}
}
impl Default for ToonFormatter {
fn default() -> Self {
Self::new()
}
}
impl Formatter for ToonFormatter {
fn format(&self, repo: &Repository, map: &RepoMap) -> String {
let mut output = Vec::with_capacity(Self::estimate_output_size(repo));
drop(self.format_to_writer(repo, map, &mut output));
String::from_utf8(output)
.unwrap_or_else(|e| String::from_utf8_lossy(e.as_bytes()).into_owned())
}
fn format_repo(&self, repo: &Repository) -> String {
let mut output = Vec::with_capacity(Self::estimate_output_size(repo));
drop(self.format_repo_to_writer(repo, &mut output));
String::from_utf8(output)
.unwrap_or_else(|e| String::from_utf8_lossy(e.as_bytes()).into_owned())
}
fn name(&self) -> &'static str {
"toon"
}
}
impl StreamingFormatter for ToonFormatter {
fn format_to_writer<W: Write>(
&self,
repo: &Repository,
map: &RepoMap,
writer: &mut W,
) -> io::Result<()> {
writeln!(writer, "# Infiniloom Repository Context (TOON format)")?;
writeln!(writer, "# Format: https://github.com/toon-format/toon")?;
writeln!(writer)?;
self.stream_metadata(writer, repo)?;
self.stream_languages(writer, repo)?;
self.stream_directory_structure(writer, repo)?;
self.stream_dependencies(writer, repo)?;
self.stream_repomap(writer, map)?;
if self.show_file_index {
self.stream_file_index(writer, repo)?;
}
self.stream_files(writer, repo)?;
Ok(())
}
fn format_repo_to_writer<W: Write>(&self, repo: &Repository, writer: &mut W) -> io::Result<()> {
writeln!(writer, "# Infiniloom Repository Context (TOON format)")?;
writeln!(writer)?;
self.stream_metadata(writer, repo)?;
self.stream_languages(writer, repo)?;
self.stream_directory_structure(writer, repo)?;
self.stream_dependencies(writer, repo)?;
if self.show_file_index {
self.stream_file_index(writer, repo)?;
}
self.stream_files(writer, repo)?;
Ok(())
}
}
fn escape_toon(s: &str) -> String {
let needs_quotes = s.is_empty()
|| s.starts_with(' ')
|| s.ends_with(' ')
|| s == "true"
|| s == "false"
|| s == "null"
|| s.parse::<f64>().is_ok()
|| s.contains(':') || s.contains(',') || s.contains('|') || s.contains('\n')
|| s.contains('\r')
|| s.contains('\t')
|| s.contains('"')
|| s.contains('\\');
if needs_quotes {
let escaped = s
.replace('\\', "\\\\")
.replace('"', "\\\"")
.replace('\n', "\\n")
.replace('\r', "\\r")
.replace('\t', "\\t");
format!("\"{}\"", escaped)
} else {
s.to_owned()
}
}
#[cfg(test)]
#[allow(clippy::str_to_string)]
mod tests {
use super::*;
use crate::repomap::RepoMapGenerator;
use crate::types::{LanguageStats, RepoFile, RepoMetadata, TokenCounts};
fn create_test_repo() -> Repository {
Repository {
name: "test".to_string(),
path: "/tmp/test".into(),
files: vec![RepoFile {
path: "/tmp/test/main.py".into(),
relative_path: "main.py".to_string(),
language: Some("python".to_string()),
size_bytes: 100,
token_count: TokenCounts {
o200k: 48,
cl100k: 49,
claude: 50,
gemini: 47,
llama: 46,
mistral: 46,
deepseek: 46,
qwen: 46,
cohere: 47,
grok: 46,
},
symbols: Vec::new(),
importance: 0.8,
content: Some("def main():\n print('hello')".to_string()),
}],
metadata: RepoMetadata {
total_files: 1,
total_lines: 2,
total_tokens: TokenCounts {
o200k: 48,
cl100k: 49,
claude: 50,
gemini: 47,
llama: 46,
mistral: 46,
deepseek: 46,
qwen: 46,
cohere: 47,
grok: 46,
},
languages: vec![LanguageStats {
language: "Python".to_string(),
files: 1,
lines: 2,
percentage: 100.0,
}],
framework: None,
description: None,
branch: None,
commit: None,
directory_structure: Some("main.py\n".to_string()),
external_dependencies: vec!["requests".to_string(), "numpy".to_string()],
git_history: None,
},
}
}
#[test]
fn test_toon_output() {
let repo = create_test_repo();
let map = RepoMapGenerator::new(1000).generate(&repo);
let formatter = ToonFormatter::new();
let output = formatter.format(&repo, &map);
assert!(output.contains("# Infiniloom Repository Context"));
assert!(output.contains("metadata:"));
assert!(output.contains("name: test"));
assert!(output.contains("files: 1"));
assert!(output.contains("languages[1]{name,files,percentage}:"));
assert!(output.contains("directory_structure: |"));
assert!(output.contains("main.py|python|50:"));
}
#[test]
fn test_toon_escaping() {
assert_eq!(escape_toon("hello"), "hello");
assert_eq!(escape_toon("hello_world"), "hello_world");
assert_eq!(escape_toon("CamelCase"), "CamelCase");
assert_eq!(escape_toon(""), "\"\"");
assert_eq!(escape_toon("true"), "\"true\"");
assert_eq!(escape_toon("false"), "\"false\"");
assert_eq!(escape_toon("null"), "\"null\"");
assert_eq!(escape_toon("123"), "\"123\"");
assert_eq!(escape_toon("3.14"), "\"3.14\"");
assert_eq!(escape_toon("-42"), "\"-42\"");
assert_eq!(escape_toon("0"), "\"0\"");
assert_eq!(escape_toon("a,b"), "\"a,b\""); assert_eq!(escape_toon("a|b"), "\"a|b\""); assert_eq!(escape_toon("key:value"), "\"key:value\"");
assert_eq!(escape_toon("line\nbreak"), "\"line\\nbreak\"");
assert_eq!(escape_toon("tab\there"), "\"tab\\there\"");
assert_eq!(escape_toon("cr\rhere"), "\"cr\\rhere\"");
assert_eq!(escape_toon("say \"hello\""), "\"say \\\"hello\\\"\"");
assert_eq!(escape_toon("path\\to\\file"), "\"path\\\\to\\\\file\"");
assert_eq!(escape_toon(" leading"), "\" leading\"");
assert_eq!(escape_toon("trailing "), "\"trailing \"");
assert_eq!(escape_toon(" both "), "\" both \"");
}
#[test]
fn test_toon_tabular_format() {
let repo = create_test_repo();
let formatter = ToonFormatter::new();
let output = formatter.format_repo(&repo);
assert!(output.contains("languages[1]{name,files,percentage}:"));
assert!(output.contains("file_index[1]{path,tokens,importance}:"));
}
}