1use crate::configuration::Code2PromptConfig;
3use crate::filter::{build_globset, should_include_file};
4use crate::sort::{FileSortMethod, sort_files, sort_tree};
5use crate::tokenizer::count_tokens;
6use crate::util::strip_utf8_bom;
7use anyhow::Result;
8use ignore::WalkBuilder;
9use log::debug;
10use serde::{Deserialize, Serialize};
11use serde_json::json;
12use std::fs;
13use std::path::Path;
14use termtree::Tree;
15
16#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
17pub struct EntryMetadata {
18 pub is_dir: bool,
19 pub is_symlink: bool,
20}
21
22impl From<&std::fs::Metadata> for EntryMetadata {
23 fn from(meta: &std::fs::Metadata) -> Self {
24 Self {
25 is_dir: meta.is_dir(),
26 is_symlink: meta.is_symlink(),
27 }
28 }
29}
30
31pub fn traverse_directory(
46 config: &Code2PromptConfig,
47 mut selection_engine: Option<&mut crate::selection::SelectionEngine>,
48) -> Result<(String, Vec<serde_json::Value>)> {
49 let mut files = Vec::new();
51 let canonical_root_path = config.path.canonicalize()?;
52 let parent_directory = label(&canonical_root_path);
53
54 let include_globset = build_globset(&config.include_patterns);
55 let exclude_globset = build_globset(&config.exclude_patterns);
56
57 let walker = WalkBuilder::new(&canonical_root_path)
59 .hidden(!config.hidden)
60 .git_ignore(!config.no_ignore)
61 .follow_links(config.follow_symlinks)
62 .build()
63 .filter_map(|entry| entry.ok());
64
65 let mut tree = Tree::new(parent_directory.to_owned());
67
68 for entry in walker {
69 let path = entry.path();
70 if let Ok(relative_path) = path.strip_prefix(&canonical_root_path) {
71 let entry_match = if let Some(engine) = selection_engine.as_mut() {
73 engine.is_selected(relative_path)
75 } else {
76 should_include_file(relative_path, &include_globset, &exclude_globset)
78 };
79
80 let include_in_tree = config.full_directory_tree || entry_match;
82
83 if include_in_tree {
84 let mut current_tree = &mut tree;
85 for component in relative_path.components() {
86 let component_str = component.as_os_str().to_string_lossy().to_string();
87 current_tree = if let Some(pos) = current_tree
88 .leaves
89 .iter_mut()
90 .position(|child| child.root == component_str)
91 {
92 &mut current_tree.leaves[pos]
93 } else {
94 let new_tree = Tree::new(component_str.clone());
95 current_tree.leaves.push(new_tree);
96 current_tree.leaves.last_mut().unwrap()
97 };
98 }
99 }
100
101 if path.is_file()
103 && entry_match
104 && let Ok(metadata) = entry.metadata()
105 {
106 if let Ok(code_bytes) = fs::read(path) {
107 let clean_bytes = strip_utf8_bom(&code_bytes);
108 let code = String::from_utf8_lossy(clean_bytes);
109
110 let code_block = wrap_code_block(
111 &code,
112 path.extension().and_then(|ext| ext.to_str()).unwrap_or(""),
113 config.line_numbers,
114 config.no_codeblock,
115 );
116
117 if !code.trim().is_empty() && !code.contains(char::REPLACEMENT_CHARACTER) {
118 let file_path = if config.absolute_path {
120 path.to_string_lossy().to_string()
121 } else {
122 relative_path.to_string_lossy().to_string()
123 };
124
125 let mut file_entry = serde_json::Map::new();
127 file_entry.insert("path".to_string(), json!(file_path));
128 file_entry.insert(
129 "extension".to_string(),
130 json!(path.extension().and_then(|ext| ext.to_str()).unwrap_or("")),
131 );
132 file_entry.insert("code".to_string(), json!(code_block));
133
134 let entry_meta = EntryMetadata::from(&metadata);
136 file_entry
137 .insert("metadata".to_string(), serde_json::to_value(entry_meta)?);
138
139 if config.token_map_enabled {
141 let token_count = count_tokens(&code, &config.encoding);
142 file_entry.insert("token_count".to_string(), json!(token_count));
143 }
144
145 if let Some(method) = config.sort_method
147 && (method == FileSortMethod::DateAsc
148 || method == FileSortMethod::DateDesc)
149 {
150 let mod_time = metadata
151 .modified()
152 .ok()
153 .and_then(|mtime| {
154 mtime.duration_since(std::time::SystemTime::UNIX_EPOCH).ok()
155 })
156 .map(|d| d.as_secs())
157 .unwrap_or(0);
158 file_entry.insert("mod_time".to_string(), json!(mod_time));
159 }
160 files.push(serde_json::Value::Object(file_entry));
161 debug!(target: "included_files", "Included file: {}", file_path);
162 } else {
163 debug!("Excluded file (empty or invalid UTF-8): {}", path.display());
164 }
165 } else {
166 debug!("Failed to read file: {}", path.display());
167 }
168 }
169 }
170 }
171
172 sort_tree(&mut tree, config.sort_method);
174 sort_files(&mut files, config.sort_method);
175
176 Ok((tree.to_string(), files))
177}
178
179pub fn label<P: AsRef<Path>>(p: P) -> String {
189 let path = p.as_ref();
190 if path.file_name().is_none() {
191 let current_dir = std::env::current_dir().unwrap();
192 current_dir
193 .file_name()
194 .and_then(|name| name.to_str())
195 .unwrap_or(".")
196 .to_owned()
197 } else {
198 path.file_name()
199 .and_then(|name| name.to_str())
200 .unwrap_or("")
201 .to_owned()
202 }
203}
204
205fn wrap_code_block(code: &str, extension: &str, line_numbers: bool, no_codeblock: bool) -> String {
218 let delimiter = "`".repeat(3);
219 let mut code_with_line_numbers = String::new();
220
221 if line_numbers {
222 for (line_number, line) in code.lines().enumerate() {
223 code_with_line_numbers.push_str(&format!("{:4} | {}\n", line_number + 1, line));
224 }
225 } else {
226 code_with_line_numbers = code.to_string();
227 }
228
229 if no_codeblock {
230 code_with_line_numbers
231 } else {
232 format!(
233 "{}{}\n{}\n{}",
234 delimiter, extension, code_with_line_numbers, delimiter
235 )
236 }
237}