code2prompt_core/
path.rs

1//! This module contains the functions for traversing the directory and processing the files.
2use crate::configuration::Code2PromptConfig;
3use crate::filter::{build_globset, should_include_file};
4use crate::sort::{FileSortMethod, sort_files, sort_tree};
5use crate::tokenizer::count_tokens;
6use crate::util::strip_utf8_bom;
7use anyhow::Result;
8use ignore::WalkBuilder;
9use log::debug;
10use serde::{Deserialize, Serialize};
11use serde_json::json;
12use std::fs;
13use std::path::Path;
14use termtree::Tree;
15
16#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
17pub struct EntryMetadata {
18    pub is_dir: bool,
19    pub is_symlink: bool,
20}
21
22impl From<&std::fs::Metadata> for EntryMetadata {
23    fn from(meta: &std::fs::Metadata) -> Self {
24        Self {
25            is_dir: meta.is_dir(),
26            is_symlink: meta.is_symlink(),
27        }
28    }
29}
30
31/// Traverses the directory and returns the string representation of the tree and the vector of JSON file representations.
32///
33/// This function uses the provided configuration to determine which files to include, how to format them,
34/// and how to structure the directory tree.
35///
36/// # Arguments
37///
38/// * `config` - Configuration object containing path, include/exclude patterns, and other settings
39/// * `selection_engine` - Optional SelectionEngine for advanced file selection with user actions
40///
41/// # Returns
42///
43/// * `Result<(String, Vec<serde_json::Value>)>` - A tuple containing the string representation of the directory
44///   tree and a vector of JSON representations of the files
45pub fn traverse_directory(
46    config: &Code2PromptConfig,
47    mut selection_engine: Option<&mut crate::selection::SelectionEngine>,
48) -> Result<(String, Vec<serde_json::Value>)> {
49    // ~~~ Initialization ~~~
50    let mut files = Vec::new();
51    let canonical_root_path = config.path.canonicalize()?;
52    let parent_directory = label(&canonical_root_path);
53
54    let include_globset = build_globset(&config.include_patterns);
55    let exclude_globset = build_globset(&config.exclude_patterns);
56
57    // ~~~ Build the Walker ~~~
58    let walker = WalkBuilder::new(&canonical_root_path)
59        .hidden(!config.hidden)
60        .git_ignore(!config.no_ignore)
61        .follow_links(config.follow_symlinks)
62        .build()
63        .filter_map(|entry| entry.ok());
64
65    // ~~~ Build the Tree ~~~
66    let mut tree = Tree::new(parent_directory.to_owned());
67
68    for entry in walker {
69        let path = entry.path();
70        if let Ok(relative_path) = path.strip_prefix(&canonical_root_path) {
71            // Use SelectionEngine if available, otherwise fall back to pattern matching
72            let entry_match = if let Some(engine) = selection_engine.as_mut() {
73                // New logic: use SelectionEngine (which integrates with FilterEngine)
74                engine.is_selected(relative_path)
75            } else {
76                // Existing logic: use direct pattern matching for compatibility
77                should_include_file(relative_path, &include_globset, &exclude_globset)
78            };
79
80            // ~~~ Directory Tree ~~~
81            let include_in_tree = config.full_directory_tree || entry_match;
82
83            if include_in_tree {
84                let mut current_tree = &mut tree;
85                for component in relative_path.components() {
86                    let component_str = component.as_os_str().to_string_lossy().to_string();
87                    current_tree = if let Some(pos) = current_tree
88                        .leaves
89                        .iter_mut()
90                        .position(|child| child.root == component_str)
91                    {
92                        &mut current_tree.leaves[pos]
93                    } else {
94                        let new_tree = Tree::new(component_str.clone());
95                        current_tree.leaves.push(new_tree);
96                        current_tree.leaves.last_mut().unwrap()
97                    };
98                }
99            }
100
101            // ~~~ Processing File ~~~
102            if path.is_file()
103                && entry_match
104                && let Ok(metadata) = entry.metadata()
105            {
106                if let Ok(code_bytes) = fs::read(path) {
107                    let clean_bytes = strip_utf8_bom(&code_bytes);
108                    let code = String::from_utf8_lossy(clean_bytes);
109
110                    let code_block = wrap_code_block(
111                        &code,
112                        path.extension().and_then(|ext| ext.to_str()).unwrap_or(""),
113                        config.line_numbers,
114                        config.no_codeblock,
115                    );
116
117                    if !code.trim().is_empty() && !code.contains(char::REPLACEMENT_CHARACTER) {
118                        // ~~~ Filepath ~~~
119                        let file_path = if config.absolute_path {
120                            path.to_string_lossy().to_string()
121                        } else {
122                            relative_path.to_string_lossy().to_string()
123                        };
124
125                        // ~~~ File JSON Representation ~~~
126                        let mut file_entry = serde_json::Map::new();
127                        file_entry.insert("path".to_string(), json!(file_path));
128                        file_entry.insert(
129                            "extension".to_string(),
130                            json!(path.extension().and_then(|ext| ext.to_str()).unwrap_or("")),
131                        );
132                        file_entry.insert("code".to_string(), json!(code_block));
133
134                        // Store metadata
135                        let entry_meta = EntryMetadata::from(&metadata);
136                        file_entry
137                            .insert("metadata".to_string(), serde_json::to_value(entry_meta)?);
138
139                        // Add token count for the file only if token map is enabled
140                        if config.token_map_enabled {
141                            let token_count = count_tokens(&code, &config.encoding);
142                            file_entry.insert("token_count".to_string(), json!(token_count));
143                        }
144
145                        // If date sorting is requested, record the file modification time.
146                        if let Some(method) = config.sort_method
147                            && (method == FileSortMethod::DateAsc
148                                || method == FileSortMethod::DateDesc)
149                        {
150                            let mod_time = metadata
151                                .modified()
152                                .ok()
153                                .and_then(|mtime| {
154                                    mtime.duration_since(std::time::SystemTime::UNIX_EPOCH).ok()
155                                })
156                                .map(|d| d.as_secs())
157                                .unwrap_or(0);
158                            file_entry.insert("mod_time".to_string(), json!(mod_time));
159                        }
160                        files.push(serde_json::Value::Object(file_entry));
161                        debug!(target: "included_files", "Included file: {}", file_path);
162                    } else {
163                        debug!("Excluded file (empty or invalid UTF-8): {}", path.display());
164                    }
165                } else {
166                    debug!("Failed to read file: {}", path.display());
167                }
168            }
169        }
170    }
171
172    // ~~~ Sorting ~~~
173    sort_tree(&mut tree, config.sort_method);
174    sort_files(&mut files, config.sort_method);
175
176    Ok((tree.to_string(), files))
177}
178
179/// Returns the file name or the string representation of the path.
180///
181/// # Arguments
182///
183/// * `p` - The path to label.
184///
185/// # Returns
186///
187/// * `String` - The file name or string representation of the path.
188pub fn label<P: AsRef<Path>>(p: P) -> String {
189    let path = p.as_ref();
190    if path.file_name().is_none() {
191        let current_dir = std::env::current_dir().unwrap();
192        current_dir
193            .file_name()
194            .and_then(|name| name.to_str())
195            .unwrap_or(".")
196            .to_owned()
197    } else {
198        path.file_name()
199            .and_then(|name| name.to_str())
200            .unwrap_or("")
201            .to_owned()
202    }
203}
204
205/// Wraps the code block with a delimiter and adds line numbers if required.
206///
207/// # Arguments
208///
209/// * `code` - The code block to wrap.
210/// * `extension` - The file extension of the code block.
211/// * `line_numbers` - Whether to add line numbers to the code.
212/// * `no_codeblock` - Whether to not wrap the code block with a delimiter.
213///
214/// # Returns
215///
216/// * `String` - The wrapped code block.
217fn wrap_code_block(code: &str, extension: &str, line_numbers: bool, no_codeblock: bool) -> String {
218    let delimiter = "`".repeat(3);
219    let mut code_with_line_numbers = String::new();
220
221    if line_numbers {
222        for (line_number, line) in code.lines().enumerate() {
223            code_with_line_numbers.push_str(&format!("{:4} | {}\n", line_number + 1, line));
224        }
225    } else {
226        code_with_line_numbers = code.to_string();
227    }
228
229    if no_codeblock {
230        code_with_line_numbers
231    } else {
232        format!(
233            "{}{}\n{}\n{}",
234            delimiter, extension, code_with_line_numbers, delimiter
235        )
236    }
237}