features_cli/
file_scanner.rs

1use anyhow::{Context, Result};
2use git2::Repository;
3use std::collections::HashMap;
4use std::fs;
5use std::path::Path;
6
7use crate::git_helper::get_all_commits_by_path;
8use crate::models::{Change, Feature, Stats};
9use crate::readme_parser::read_readme_info;
10
11fn is_documentation_directory(dir_path: &Path) -> bool {
12    let dir_name = dir_path
13        .file_name()
14        .and_then(|name| name.to_str())
15        .unwrap_or("");
16
17    // Common documentation directory names
18    let doc_dirs = ["docs", "__docs__", ".docs"];
19
20    doc_dirs.contains(&dir_name.to_lowercase().as_str())
21}
22
23fn is_inside_documentation_directory(dir_path: &Path) -> bool {
24    // Check if any parent directory is a documentation directory
25    for ancestor in dir_path.ancestors().skip(1) {
26        if is_documentation_directory(ancestor) {
27            return true;
28        }
29    }
30    false
31}
32
33fn is_direct_subfolder_of_features(dir_path: &Path) -> bool {
34    if let Some(parent) = dir_path.parent()
35        && let Some(parent_name) = parent.file_name().and_then(|name| name.to_str())
36    {
37        return parent_name == "features";
38    }
39    false
40}
41
42fn find_readme_file(dir_path: &Path) -> Option<std::path::PathBuf> {
43    let readme_candidates = ["README.md", "README.mdx"];
44
45    for candidate in &readme_candidates {
46        let readme_path = dir_path.join(candidate);
47        if readme_path.exists() {
48            return Some(readme_path);
49        }
50    }
51
52    None
53}
54
55/// Check if a directory has a README with `feature: true` in front matter
56fn has_feature_flag_in_readme(dir_path: &Path) -> bool {
57    if let Some(readme_path) = find_readme_file(dir_path)
58        && let Ok(content) = fs::read_to_string(&readme_path)
59    {
60        // Check if content starts with YAML front matter (---)
61        if let Some(stripped) = content.strip_prefix("---\n")
62            && let Some(end_pos) = stripped.find("\n---\n")
63        {
64            let yaml_content = &stripped[..end_pos];
65
66            // Parse YAML front matter
67            if let Ok(yaml_value) = serde_yaml::from_str::<serde_yaml::Value>(yaml_content)
68                && let Some(mapping) = yaml_value.as_mapping()
69            {
70                // Check for feature: true
71                if let Some(feature_value) =
72                    mapping.get(serde_yaml::Value::String("feature".to_string()))
73                {
74                    return feature_value.as_bool() == Some(true);
75                }
76            }
77        }
78    }
79    false
80}
81
82/// Check if a directory should be treated as a feature
83fn is_feature_directory(dir_path: &Path) -> bool {
84    // Skip documentation directories
85    if is_documentation_directory(dir_path) || is_inside_documentation_directory(dir_path) {
86        return false;
87    }
88
89    // Check if it's a direct subfolder of "features" (existing behavior)
90    if is_direct_subfolder_of_features(dir_path) {
91        return true;
92    }
93
94    // Check if the directory has a README with feature: true
95    has_feature_flag_in_readme(dir_path)
96}
97
98pub fn list_files_recursive(dir: &Path) -> Result<Vec<Feature>> {
99    list_files_recursive_impl(dir, None)
100}
101
102pub fn list_files_recursive_with_changes(dir: &Path) -> Result<Vec<Feature>> {
103    // Get all commits once at the beginning for efficiency
104    let all_commits = get_all_commits_by_path(dir).unwrap_or_default();
105    list_files_recursive_impl(dir, Some(&all_commits))
106}
107
108fn read_decision_files(feature_path: &Path) -> Result<Vec<String>> {
109    let mut decisions = Vec::new();
110
111    // Check both "decision" and "decisions" folder names
112    let decision_paths = [
113        feature_path.join(".docs").join("decisions"),
114        feature_path.join("__docs__").join("decisions"),
115    ];
116
117    for decisions_dir in &decision_paths {
118        if decisions_dir.exists() && decisions_dir.is_dir() {
119            let entries = fs::read_dir(decisions_dir).with_context(|| {
120                format!(
121                    "could not read decisions directory `{}`",
122                    decisions_dir.display()
123                )
124            })?;
125
126            for entry in entries {
127                let entry = entry?;
128                let path = entry.path();
129
130                // Skip README.md files and only process .md files
131                if path.is_file()
132                    && let Some(file_name) = path.file_name()
133                {
134                    let file_name_str = file_name.to_string_lossy();
135                    if file_name_str.ends_with(".md") && file_name_str != "README.md" {
136                        let content = fs::read_to_string(&path).with_context(|| {
137                            format!("could not read decision file `{}`", path.display())
138                        })?;
139                        decisions.push(content);
140                    }
141                }
142            }
143            break; // If we found one of the directories, don't check the other
144        }
145    }
146
147    Ok(decisions)
148}
149
150/// Count the number of files in a feature directory (excluding documentation)
151fn count_files(feature_path: &Path, nested_feature_paths: &[String]) -> usize {
152    let mut file_count = 0;
153
154    if let Ok(entries) = fs::read_dir(feature_path) {
155        for entry in entries.flatten() {
156            let path = entry.path();
157            let path_str = path.to_string_lossy().to_string();
158
159            // Skip documentation directories
160            if is_documentation_directory(&path) {
161                continue;
162            }
163
164            // Skip nested feature directories
165            if nested_feature_paths
166                .iter()
167                .any(|nfp| path_str.starts_with(nfp))
168            {
169                continue;
170            }
171
172            if path.is_file() {
173                file_count += 1;
174            } else if path.is_dir() {
175                // Recursively count files in subdirectories
176                file_count += count_files(&path, nested_feature_paths);
177            }
178        }
179    }
180
181    file_count
182}
183
184/// Count the total number of lines in all files in a feature directory (excluding documentation)
185fn count_lines(feature_path: &Path, nested_feature_paths: &[String]) -> usize {
186    let mut line_count = 0;
187
188    if let Ok(entries) = fs::read_dir(feature_path) {
189        for entry in entries.flatten() {
190            let path = entry.path();
191            let path_str = path.to_string_lossy().to_string();
192
193            // Skip documentation directories
194            if is_documentation_directory(&path) {
195                continue;
196            }
197
198            // Skip nested feature directories
199            if nested_feature_paths
200                .iter()
201                .any(|nfp| path_str.starts_with(nfp))
202            {
203                continue;
204            }
205
206            if path.is_file() {
207                // Try to read the file and count lines
208                if let Ok(content) = fs::read_to_string(&path) {
209                    line_count += content.lines().count();
210                }
211            } else if path.is_dir() {
212                // Recursively count lines in subdirectories
213                line_count += count_lines(&path, nested_feature_paths);
214            }
215        }
216    }
217
218    line_count
219}
220
221/// Count the total number of TODO comments in all files in a feature directory (excluding documentation)
222fn count_todos(feature_path: &Path, nested_feature_paths: &[String]) -> usize {
223    let mut todo_count = 0;
224
225    if let Ok(entries) = fs::read_dir(feature_path) {
226        for entry in entries.flatten() {
227            let path = entry.path();
228            let path_str = path.to_string_lossy().to_string();
229
230            // Skip documentation directories
231            if is_documentation_directory(&path) {
232                continue;
233            }
234
235            // Skip nested feature directories
236            if nested_feature_paths
237                .iter()
238                .any(|nfp| path_str.starts_with(nfp))
239            {
240                continue;
241            }
242
243            if path.is_file() {
244                // Try to read the file and count TODO comments
245                if let Ok(content) = fs::read_to_string(&path) {
246                    for line in content.lines() {
247                        // Look for TODO in comments (case-insensitive)
248                        let line_upper = line.to_uppercase();
249                        if line_upper.contains("TODO") {
250                            todo_count += 1;
251                        }
252                    }
253                }
254            } else if path.is_dir() {
255                // Recursively count TODOs in subdirectories
256                todo_count += count_todos(&path, nested_feature_paths);
257            }
258        }
259    }
260
261    todo_count
262}
263
264/// Get the paths affected by a specific commit
265fn get_commit_affected_paths(repo: &Repository, commit_hash: &str) -> Vec<String> {
266    let Ok(oid) = git2::Oid::from_str(commit_hash) else {
267        return Vec::new();
268    };
269
270    let Ok(commit) = repo.find_commit(oid) else {
271        return Vec::new();
272    };
273
274    let mut paths = Vec::new();
275
276    // For the first commit (no parents), get all files in the tree
277    if commit.parent_count() == 0 {
278        if let Ok(tree) = commit.tree() {
279            collect_all_tree_paths(repo, &tree, "", &mut paths);
280        }
281        return paths;
282    }
283
284    // For commits with parents, check the diff
285    let Ok(tree) = commit.tree() else {
286        return Vec::new();
287    };
288
289    let Ok(parent) = commit.parent(0) else {
290        return Vec::new();
291    };
292
293    let Ok(parent_tree) = parent.tree() else {
294        return Vec::new();
295    };
296
297    if let Ok(diff) = repo.diff_tree_to_tree(Some(&parent_tree), Some(&tree), None) {
298        let _ = diff.foreach(
299            &mut |delta, _| {
300                if let Some(path) = delta.new_file().path()
301                    && let Some(path_str) = path.to_str()
302                {
303                    paths.push(path_str.to_string());
304                }
305                if let Some(path) = delta.old_file().path()
306                    && let Some(path_str) = path.to_str()
307                    && !paths.contains(&path_str.to_string())
308                {
309                    paths.push(path_str.to_string());
310                }
311                true
312            },
313            None,
314            None,
315            None,
316        );
317    }
318
319    paths
320}
321
322/// Collect all file paths in a tree (helper for get_commit_affected_paths)
323fn collect_all_tree_paths(
324    repo: &Repository,
325    tree: &git2::Tree,
326    prefix: &str,
327    paths: &mut Vec<String>,
328) {
329    for entry in tree.iter() {
330        if let Some(name) = entry.name() {
331            let path = if prefix.is_empty() {
332                name.to_string()
333            } else {
334                format!("{}/{}", prefix, name)
335            };
336
337            paths.push(path.clone());
338
339            if entry.kind() == Some(git2::ObjectType::Tree)
340                && let Ok(obj) = entry.to_object(repo)
341                && let Ok(subtree) = obj.peel_to_tree()
342            {
343                collect_all_tree_paths(repo, &subtree, &path, paths);
344            }
345        }
346    }
347}
348
349/// Compute statistics from changes for a feature
350fn compute_stats_from_changes(
351    changes: &[Change],
352    feature_path: &Path,
353    nested_features: &[Feature],
354) -> Option<Stats> {
355    if changes.is_empty() {
356        return None;
357    }
358
359    // Collect paths of nested features to exclude from commit counts
360    let nested_feature_paths: Vec<String> =
361        nested_features.iter().map(|f| f.path.clone()).collect();
362
363    // Get repository to check commit details
364    let repo = Repository::discover(feature_path).ok();
365
366    // Get the feature's relative path from repo root
367    let feature_relative_path = if let Some(ref r) = repo {
368        if let Ok(canonical_path) = std::fs::canonicalize(feature_path) {
369            if let Some(workdir) = r.workdir() {
370                canonical_path
371                    .strip_prefix(workdir)
372                    .ok()
373                    .map(|p| p.to_string_lossy().to_string())
374            } else {
375                None
376            }
377        } else {
378            None
379        }
380    } else {
381        None
382    };
383
384    // Filter changes to only include those that affect files in this feature
385    // (not exclusively in nested features)
386    let filtered_changes: Vec<&Change> = changes
387        .iter()
388        .filter(|change| {
389            // If we don't have repo access, include all changes
390            let Some(ref r) = repo else {
391                return true;
392            };
393
394            let Some(ref feature_rel_path) = feature_relative_path else {
395                return true;
396            };
397
398            // Get the files affected by this commit
399            let affected_files = get_commit_affected_paths(r, &change.hash);
400
401            // Check if any affected file is in this feature but not in a nested feature
402            affected_files.iter().any(|file_path| {
403                // File must be in this feature
404                let in_feature = file_path.starts_with(feature_rel_path);
405
406                // File must not be exclusively in a nested feature
407                let in_nested = nested_feature_paths.iter().any(|nested_path| {
408                    // Convert nested absolute path to relative path
409                    if let Ok(nested_canonical) = std::fs::canonicalize(nested_path)
410                        && let Some(workdir) = r.workdir()
411                        && let Ok(nested_rel) = nested_canonical.strip_prefix(workdir)
412                    {
413                        let nested_rel_str = nested_rel.to_string_lossy();
414                        return file_path.starts_with(nested_rel_str.as_ref());
415                    }
416                    false
417                });
418
419                in_feature && !in_nested
420            })
421        })
422        .collect();
423
424    let mut commits = HashMap::new();
425
426    // Add total commit count
427    commits.insert(
428        "total_commits".to_string(),
429        serde_json::json!(filtered_changes.len()),
430    );
431
432    // Count commits by author
433    let mut authors_count: HashMap<String, usize> = HashMap::new();
434    for change in &filtered_changes {
435        *authors_count.entry(change.author_name.clone()).or_insert(0) += 1;
436    }
437    commits.insert(
438        "authors_count".to_string(),
439        serde_json::json!(authors_count),
440    );
441
442    // Count commits by conventional commit type
443    let mut count_by_type: HashMap<String, usize> = HashMap::new();
444    for change in &filtered_changes {
445        let commit_type = extract_commit_type(&change.title);
446        *count_by_type.entry(commit_type).or_insert(0) += 1;
447    }
448    commits.insert(
449        "count_by_type".to_string(),
450        serde_json::json!(count_by_type),
451    );
452
453    // Get first and last commit dates
454    if let Some(first) = filtered_changes.first() {
455        commits.insert(
456            "first_commit_date".to_string(),
457            serde_json::json!(first.date.clone()),
458        );
459    }
460    if let Some(last) = filtered_changes.last() {
461        commits.insert(
462            "last_commit_date".to_string(),
463            serde_json::json!(last.date.clone()),
464        );
465    }
466
467    // Count files and lines in the feature directory (excluding nested features)
468    let files_count = count_files(feature_path, &nested_feature_paths);
469    let lines_count = count_lines(feature_path, &nested_feature_paths);
470    let todos_count = count_todos(feature_path, &nested_feature_paths);
471
472    Some(Stats {
473        files_count: Some(files_count),
474        lines_count: Some(lines_count),
475        todos_count: Some(todos_count),
476        commits,
477        coverage: None,
478    })
479}
480
481/// Extract the commit type from a conventional commit title
482fn extract_commit_type(title: &str) -> String {
483    // Common conventional commit types
484    let known_types = [
485        "feat", "fix", "docs", "style", "refactor", "perf", "test", "build", "ci", "chore",
486        "revert",
487    ];
488
489    // Check if the title follows conventional commit format (type: description or type(scope): description)
490    if let Some(colon_pos) = title.find(':') {
491        let prefix = &title[..colon_pos];
492
493        // Remove scope if present (e.g., "feat(auth)" -> "feat")
494        let type_part = if let Some(paren_pos) = prefix.find('(') {
495            &prefix[..paren_pos]
496        } else {
497            prefix
498        };
499
500        let type_part = type_part.trim().to_lowercase();
501
502        // Check if it's a known conventional commit type
503        if known_types.contains(&type_part.as_str()) {
504            return type_part;
505        }
506    }
507
508    // If not a conventional commit, return "other"
509    "other".to_string()
510}
511
512fn process_feature_directory(
513    path: &Path,
514    name: &str,
515    changes_map: Option<&HashMap<String, Vec<Change>>>,
516) -> Result<Feature> {
517    // Try to find and read README file, use defaults if not found
518    let mut readme_info = if let Some(readme_path) = find_readme_file(path) {
519        read_readme_info(&readme_path)?
520    } else {
521        use crate::readme_parser::ReadmeInfo;
522        ReadmeInfo {
523            title: None,
524            owner: "Unknown".to_string(),
525            description: "".to_string(),
526            meta: std::collections::HashMap::new(),
527        }
528    };
529
530    // Remove the 'feature' key from meta if it exists (it's redundant since we know it's a feature)
531    readme_info.meta.remove("feature");
532
533    let changes = if let Some(map) = changes_map {
534        // Convert the absolute path to a repo-relative path and look up changes
535        get_changes_for_path(path, map).unwrap_or_default()
536    } else {
537        Vec::new()
538    };
539
540    // Always include decisions regardless of include_changes flag
541    let decisions = read_decision_files(path).unwrap_or_default();
542
543    // Check if this feature has nested features in a 'features' subdirectory
544    let nested_features_path = path.join("features");
545    let mut nested_features = if nested_features_path.exists() && nested_features_path.is_dir() {
546        list_files_recursive_impl(&nested_features_path, changes_map).unwrap_or_default()
547    } else {
548        Vec::new()
549    };
550
551    // Also check for nested features marked with feature: true in subdirectories
552    let entries = fs::read_dir(path)
553        .with_context(|| format!("could not read directory `{}`", path.display()))?;
554
555    let mut entries: Vec<_> = entries.collect::<Result<_, _>>()?;
556    entries.sort_by_key(|entry| entry.path());
557
558    for entry in entries {
559        let entry_path = entry.path();
560        let entry_name = entry_path.file_name().unwrap().to_string_lossy();
561
562        if entry_path.is_dir()
563            && entry_name != "features" // Don't process 'features' folder twice
564            && !is_documentation_directory(&entry_path)
565        {
566            if has_feature_flag_in_readme(&entry_path) {
567                // This directory is a feature itself
568                let nested_feature =
569                    process_feature_directory(&entry_path, &entry_name, changes_map)?;
570                nested_features.push(nested_feature);
571            } else {
572                // This directory is not a feature, but might contain features
573                // Recursively search for features inside it
574                let deeper_features = list_files_recursive_impl(&entry_path, changes_map)?;
575                nested_features.extend(deeper_features);
576            }
577        }
578    }
579
580    // Compute stats from changes if available
581    let stats = compute_stats_from_changes(&changes, path, &nested_features);
582
583    Ok(Feature {
584        name: readme_info.title.unwrap_or_else(|| name.to_string()),
585        description: readme_info.description,
586        owner: readme_info.owner,
587        path: path.to_string_lossy().to_string(),
588        features: nested_features,
589        meta: readme_info.meta,
590        changes,
591        decisions,
592        stats,
593    })
594}
595
596fn list_files_recursive_impl(
597    dir: &Path,
598    changes_map: Option<&HashMap<String, Vec<Change>>>,
599) -> Result<Vec<Feature>> {
600    let entries = fs::read_dir(dir)
601        .with_context(|| format!("could not read directory `{}`", dir.display()))?;
602
603    let mut entries: Vec<_> = entries.collect::<Result<_, _>>()?;
604    entries.sort_by_key(|entry| entry.path());
605
606    let mut features: Vec<Feature> = Vec::new();
607
608    for entry in entries {
609        let path = entry.path();
610        let name = path.file_name().unwrap().to_string_lossy();
611
612        if path.is_dir() {
613            if is_feature_directory(&path) {
614                let feature = process_feature_directory(&path, &name, changes_map)?;
615                features.push(feature);
616            } else if !is_documentation_directory(&path)
617                && !is_inside_documentation_directory(&path)
618            {
619                // Recursively search for features in non-documentation subdirectories
620                let new_features = list_files_recursive_impl(&path, changes_map)?;
621                features.extend(new_features);
622            }
623        }
624    }
625
626    Ok(features)
627}
628
629/// Get changes for a specific path from the pre-computed changes map
630fn get_changes_for_path(
631    path: &Path,
632    changes_map: &HashMap<String, Vec<Change>>,
633) -> Result<Vec<Change>> {
634    // Canonicalize the path
635    let canonical_path = std::fs::canonicalize(path)?;
636
637    // Find the repository and get the working directory
638    let repo = Repository::discover(path)?;
639    let repo_workdir = repo
640        .workdir()
641        .context("repository has no working directory")?;
642
643    // Convert to relative path from repo root
644    let relative_path = canonical_path
645        .strip_prefix(repo_workdir)
646        .context("path is not within repository")?;
647
648    let relative_path_str = relative_path.to_string_lossy().to_string();
649
650    // Look up the changes in the map
651    Ok(changes_map
652        .get(&relative_path_str)
653        .cloned()
654        .unwrap_or_default())
655}
656
657#[cfg(test)]
658mod tests {
659    use super::*;
660
661    #[test]
662    fn test_extract_commit_type() {
663        // Test standard conventional commit types
664        assert_eq!(extract_commit_type("feat: add new feature"), "feat");
665        assert_eq!(extract_commit_type("fix: resolve bug"), "fix");
666        assert_eq!(extract_commit_type("docs: update README"), "docs");
667        assert_eq!(extract_commit_type("style: format code"), "style");
668        assert_eq!(
669            extract_commit_type("refactor: improve structure"),
670            "refactor"
671        );
672        assert_eq!(extract_commit_type("perf: optimize performance"), "perf");
673        assert_eq!(extract_commit_type("test: add unit tests"), "test");
674        assert_eq!(extract_commit_type("build: update dependencies"), "build");
675        assert_eq!(extract_commit_type("ci: fix CI pipeline"), "ci");
676        assert_eq!(extract_commit_type("chore: update gitignore"), "chore");
677        assert_eq!(
678            extract_commit_type("revert: undo previous commit"),
679            "revert"
680        );
681
682        // Test with scope
683        assert_eq!(extract_commit_type("feat(auth): add login"), "feat");
684        assert_eq!(
685            extract_commit_type("fix(api): resolve endpoint issue"),
686            "fix"
687        );
688        assert_eq!(
689            extract_commit_type("docs(readme): update instructions"),
690            "docs"
691        );
692
693        // Test case insensitivity
694        assert_eq!(extract_commit_type("FEAT: uppercase type"), "feat");
695        assert_eq!(extract_commit_type("Fix: mixed case"), "fix");
696        assert_eq!(extract_commit_type("DOCS: all caps"), "docs");
697
698        // Test non-conventional commits
699        assert_eq!(extract_commit_type("random commit message"), "other");
700        assert_eq!(extract_commit_type("update: not conventional"), "other");
701        assert_eq!(
702            extract_commit_type("feature: close but not standard"),
703            "other"
704        );
705        assert_eq!(extract_commit_type("no colon here"), "other");
706        assert_eq!(extract_commit_type(""), "other");
707
708        // Test edge cases
709        assert_eq!(extract_commit_type("feat:no space after colon"), "feat");
710        assert_eq!(extract_commit_type("feat  : extra spaces"), "feat");
711        assert_eq!(
712            extract_commit_type("feat(scope)(weird): nested parens"),
713            "feat"
714        );
715    }
716}