features_cli/
file_scanner.rs

1use anyhow::{Context, Result};
2use git2::Repository;
3use std::collections::HashMap;
4use std::fs;
5use std::path::Path;
6
7use crate::git_helper::get_all_commits_by_path;
8use crate::models::{Change, Feature, Stats};
9use crate::readme_parser::read_readme_info;
10
11fn is_documentation_directory(dir_path: &Path) -> bool {
12    let dir_name = dir_path
13        .file_name()
14        .and_then(|name| name.to_str())
15        .unwrap_or("");
16
17    // Common documentation directory names
18    let doc_dirs = ["docs", "__docs__", ".docs"];
19
20    doc_dirs.contains(&dir_name.to_lowercase().as_str())
21}
22
23fn is_inside_documentation_directory(dir_path: &Path) -> bool {
24    // Check if any parent directory is a documentation directory
25    for ancestor in dir_path.ancestors().skip(1) {
26        if is_documentation_directory(ancestor) {
27            return true;
28        }
29    }
30    false
31}
32
33fn is_direct_subfolder_of_features(dir_path: &Path) -> bool {
34    if let Some(parent) = dir_path.parent()
35        && let Some(parent_name) = parent.file_name().and_then(|name| name.to_str())
36    {
37        return parent_name == "features";
38    }
39    false
40}
41
42fn find_readme_file(dir_path: &Path) -> Option<std::path::PathBuf> {
43    let readme_candidates = ["README.md", "README.mdx"];
44
45    for candidate in &readme_candidates {
46        let readme_path = dir_path.join(candidate);
47        if readme_path.exists() {
48            return Some(readme_path);
49        }
50    }
51
52    None
53}
54
55/// Check if a directory has a README with `feature: true` in front matter
56fn has_feature_flag_in_readme(dir_path: &Path) -> bool {
57    if let Some(readme_path) = find_readme_file(dir_path)
58        && let Ok(content) = fs::read_to_string(&readme_path)
59    {
60        // Check if content starts with YAML front matter (---)
61        if let Some(stripped) = content.strip_prefix("---\n")
62            && let Some(end_pos) = stripped.find("\n---\n")
63        {
64            let yaml_content = &stripped[..end_pos];
65
66            // Parse YAML front matter
67            if let Ok(yaml_value) = serde_yaml::from_str::<serde_yaml::Value>(yaml_content)
68                && let Some(mapping) = yaml_value.as_mapping()
69            {
70                // Check for feature: true
71                if let Some(feature_value) =
72                    mapping.get(serde_yaml::Value::String("feature".to_string()))
73                {
74                    return feature_value.as_bool() == Some(true);
75                }
76            }
77        }
78    }
79    false
80}
81
82/// Check if a directory should be treated as a feature
83fn is_feature_directory(dir_path: &Path) -> bool {
84    // Skip documentation directories
85    if is_documentation_directory(dir_path) || is_inside_documentation_directory(dir_path) {
86        return false;
87    }
88
89    // Check if it's a direct subfolder of "features" (existing behavior)
90    if is_direct_subfolder_of_features(dir_path) {
91        return true;
92    }
93
94    // Check if the directory has a README with feature: true
95    has_feature_flag_in_readme(dir_path)
96}
97
98pub fn list_files_recursive(dir: &Path) -> Result<Vec<Feature>> {
99    list_files_recursive_impl(dir, None)
100}
101
102pub fn list_files_recursive_with_changes(dir: &Path) -> Result<Vec<Feature>> {
103    // Get all commits once at the beginning for efficiency
104    let all_commits = get_all_commits_by_path(dir).unwrap_or_default();
105    list_files_recursive_impl(dir, Some(&all_commits))
106}
107
108fn read_decision_files(feature_path: &Path) -> Result<Vec<String>> {
109    let mut decisions = Vec::new();
110
111    // Check both "decision" and "decisions" folder names
112    let decision_paths = [
113        feature_path.join(".docs").join("decisions"),
114        feature_path.join("__docs__").join("decisions"),
115    ];
116
117    for decisions_dir in &decision_paths {
118        if decisions_dir.exists() && decisions_dir.is_dir() {
119            let entries = fs::read_dir(decisions_dir).with_context(|| {
120                format!(
121                    "could not read decisions directory `{}`",
122                    decisions_dir.display()
123                )
124            })?;
125
126            for entry in entries {
127                let entry = entry?;
128                let path = entry.path();
129
130                // Skip README.md files and only process .md files
131                if path.is_file()
132                    && let Some(file_name) = path.file_name()
133                {
134                    let file_name_str = file_name.to_string_lossy();
135                    if file_name_str.ends_with(".md") && file_name_str != "README.md" {
136                        let content = fs::read_to_string(&path).with_context(|| {
137                            format!("could not read decision file `{}`", path.display())
138                        })?;
139                        decisions.push(content);
140                    }
141                }
142            }
143            break; // If we found one of the directories, don't check the other
144        }
145    }
146
147    Ok(decisions)
148}
149
150/// Count the number of files in a feature directory (excluding documentation)
151fn count_files(feature_path: &Path, nested_feature_paths: &[String]) -> usize {
152    let mut file_count = 0;
153
154    if let Ok(entries) = fs::read_dir(feature_path) {
155        for entry in entries.flatten() {
156            let path = entry.path();
157            let path_str = path.to_string_lossy().to_string();
158
159            // Skip documentation directories
160            if is_documentation_directory(&path) {
161                continue;
162            }
163
164            // Skip nested feature directories
165            if nested_feature_paths
166                .iter()
167                .any(|nfp| path_str.starts_with(nfp))
168            {
169                continue;
170            }
171
172            if path.is_file() {
173                file_count += 1;
174            } else if path.is_dir() {
175                // Recursively count files in subdirectories
176                file_count += count_files(&path, nested_feature_paths);
177            }
178        }
179    }
180
181    file_count
182}
183
184/// Count the total number of lines in all files in a feature directory (excluding documentation)
185fn count_lines(feature_path: &Path, nested_feature_paths: &[String]) -> usize {
186    let mut line_count = 0;
187
188    if let Ok(entries) = fs::read_dir(feature_path) {
189        for entry in entries.flatten() {
190            let path = entry.path();
191            let path_str = path.to_string_lossy().to_string();
192
193            // Skip documentation directories
194            if is_documentation_directory(&path) {
195                continue;
196            }
197
198            // Skip nested feature directories
199            if nested_feature_paths
200                .iter()
201                .any(|nfp| path_str.starts_with(nfp))
202            {
203                continue;
204            }
205
206            if path.is_file() {
207                // Try to read the file and count lines
208                if let Ok(content) = fs::read_to_string(&path) {
209                    line_count += content.lines().count();
210                }
211            } else if path.is_dir() {
212                // Recursively count lines in subdirectories
213                line_count += count_lines(&path, nested_feature_paths);
214            }
215        }
216    }
217
218    line_count
219}
220
221/// Count the total number of TODO comments in all files in a feature directory (excluding documentation)
222fn count_todos(feature_path: &Path, nested_feature_paths: &[String]) -> usize {
223    let mut todo_count = 0;
224
225    if let Ok(entries) = fs::read_dir(feature_path) {
226        for entry in entries.flatten() {
227            let path = entry.path();
228            let path_str = path.to_string_lossy().to_string();
229
230            // Skip documentation directories
231            if is_documentation_directory(&path) {
232                continue;
233            }
234
235            // Skip nested feature directories
236            if nested_feature_paths
237                .iter()
238                .any(|nfp| path_str.starts_with(nfp))
239            {
240                continue;
241            }
242
243            if path.is_file() {
244                // Try to read the file and count TODO comments
245                if let Ok(content) = fs::read_to_string(&path) {
246                    for line in content.lines() {
247                        // Look for TODO in comments (case-insensitive)
248                        let line_upper = line.to_uppercase();
249                        if line_upper.contains("TODO") {
250                            todo_count += 1;
251                        }
252                    }
253                }
254            } else if path.is_dir() {
255                // Recursively count TODOs in subdirectories
256                todo_count += count_todos(&path, nested_feature_paths);
257            }
258        }
259    }
260
261    todo_count
262}
263
264/// Get the paths affected by a specific commit
265fn get_commit_affected_paths(repo: &Repository, commit_hash: &str) -> Vec<String> {
266    let Ok(oid) = git2::Oid::from_str(commit_hash) else {
267        return Vec::new();
268    };
269
270    let Ok(commit) = repo.find_commit(oid) else {
271        return Vec::new();
272    };
273
274    let mut paths = Vec::new();
275
276    // For the first commit (no parents), get all files in the tree
277    if commit.parent_count() == 0 {
278        if let Ok(tree) = commit.tree() {
279            collect_all_tree_paths(repo, &tree, "", &mut paths);
280        }
281        return paths;
282    }
283
284    // For commits with parents, check the diff
285    let Ok(tree) = commit.tree() else {
286        return Vec::new();
287    };
288
289    let Ok(parent) = commit.parent(0) else {
290        return Vec::new();
291    };
292
293    let Ok(parent_tree) = parent.tree() else {
294        return Vec::new();
295    };
296
297    if let Ok(diff) = repo.diff_tree_to_tree(Some(&parent_tree), Some(&tree), None) {
298        let _ = diff.foreach(
299            &mut |delta, _| {
300                if let Some(path) = delta.new_file().path()
301                    && let Some(path_str) = path.to_str()
302                {
303                    paths.push(path_str.to_string());
304                }
305                if let Some(path) = delta.old_file().path()
306                    && let Some(path_str) = path.to_str()
307                    && !paths.contains(&path_str.to_string())
308                {
309                    paths.push(path_str.to_string());
310                }
311                true
312            },
313            None,
314            None,
315            None,
316        );
317    }
318
319    paths
320}
321
322/// Collect all file paths in a tree (helper for get_commit_affected_paths)
323fn collect_all_tree_paths(
324    repo: &Repository,
325    tree: &git2::Tree,
326    prefix: &str,
327    paths: &mut Vec<String>,
328) {
329    for entry in tree.iter() {
330        if let Some(name) = entry.name() {
331            let path = if prefix.is_empty() {
332                name.to_string()
333            } else {
334                format!("{}/{}", prefix, name)
335            };
336
337            paths.push(path.clone());
338
339            if entry.kind() == Some(git2::ObjectType::Tree)
340                && let Ok(obj) = entry.to_object(repo)
341                && let Ok(subtree) = obj.peel_to_tree()
342            {
343                collect_all_tree_paths(repo, &subtree, &path, paths);
344            }
345        }
346    }
347}
348
349/// Compute statistics from changes for a feature
350fn compute_stats_from_changes(
351    changes: &[Change],
352    feature_path: &Path,
353    nested_features: &[Feature],
354) -> Option<Stats> {
355    if changes.is_empty() {
356        return None;
357    }
358
359    // Collect paths of nested features to exclude from commit counts
360    let nested_feature_paths: Vec<String> =
361        nested_features.iter().map(|f| f.path.clone()).collect();
362
363    // Get repository to check commit details
364    let repo = Repository::discover(feature_path).ok();
365
366    // Get the feature's relative path from repo root
367    let feature_relative_path = if let Some(ref r) = repo {
368        if let Ok(canonical_path) = std::fs::canonicalize(feature_path) {
369            if let Some(workdir) = r.workdir() {
370                canonical_path
371                    .strip_prefix(workdir)
372                    .ok()
373                    .map(|p| p.to_string_lossy().to_string())
374            } else {
375                None
376            }
377        } else {
378            None
379        }
380    } else {
381        None
382    };
383
384    // Filter changes to only include those that affect files in this feature
385    // (not exclusively in nested features)
386    let filtered_changes: Vec<&Change> = changes
387        .iter()
388        .filter(|change| {
389            // If we don't have repo access, include all changes
390            let Some(ref r) = repo else {
391                return true;
392            };
393
394            let Some(ref feature_rel_path) = feature_relative_path else {
395                return true;
396            };
397
398            // Get the files affected by this commit
399            let affected_files = get_commit_affected_paths(r, &change.hash);
400
401            // Check if any affected file is in this feature but not in a nested feature
402            affected_files.iter().any(|file_path| {
403                // File must be in this feature
404                let in_feature = file_path.starts_with(feature_rel_path);
405
406                // File must not be exclusively in a nested feature
407                let in_nested = nested_feature_paths.iter().any(|nested_path| {
408                    // Convert nested absolute path to relative path
409                    if let Ok(nested_canonical) = std::fs::canonicalize(nested_path)
410                        && let Some(workdir) = r.workdir()
411                        && let Ok(nested_rel) = nested_canonical.strip_prefix(workdir)
412                    {
413                        let nested_rel_str = nested_rel.to_string_lossy();
414                        return file_path.starts_with(nested_rel_str.as_ref());
415                    }
416                    false
417                });
418
419                in_feature && !in_nested
420            })
421        })
422        .collect();
423
424    let mut commits = HashMap::new();
425
426    // Add total commit count
427    commits.insert(
428        "total_commits".to_string(),
429        serde_json::json!(filtered_changes.len()),
430    );
431
432    // Count commits by author
433    let mut authors_count: HashMap<String, usize> = HashMap::new();
434    for change in &filtered_changes {
435        *authors_count.entry(change.author_name.clone()).or_insert(0) += 1;
436    }
437    commits.insert(
438        "authors_count".to_string(),
439        serde_json::json!(authors_count),
440    );
441
442    // Count commits by conventional commit type
443    let mut count_by_type: HashMap<String, usize> = HashMap::new();
444    for change in &filtered_changes {
445        let commit_type = extract_commit_type(&change.title);
446        *count_by_type.entry(commit_type).or_insert(0) += 1;
447    }
448    commits.insert(
449        "count_by_type".to_string(),
450        serde_json::json!(count_by_type),
451    );
452
453    // Get first and last commit dates
454    if let Some(first) = filtered_changes.first() {
455        commits.insert(
456            "first_commit_date".to_string(),
457            serde_json::json!(first.date.clone()),
458        );
459    }
460    if let Some(last) = filtered_changes.last() {
461        commits.insert(
462            "last_commit_date".to_string(),
463            serde_json::json!(last.date.clone()),
464        );
465    }
466
467    // Count files and lines in the feature directory (excluding nested features)
468    let files_count = count_files(feature_path, &nested_feature_paths);
469    let lines_count = count_lines(feature_path, &nested_feature_paths);
470    let todos_count = count_todos(feature_path, &nested_feature_paths);
471
472    Some(Stats {
473        files_count: Some(files_count),
474        lines_count: Some(lines_count),
475        todos_count: Some(todos_count),
476        commits,
477    })
478}
479
480/// Extract the commit type from a conventional commit title
481fn extract_commit_type(title: &str) -> String {
482    // Common conventional commit types
483    let known_types = [
484        "feat", "fix", "docs", "style", "refactor", "perf", "test", "build", "ci", "chore",
485        "revert",
486    ];
487
488    // Check if the title follows conventional commit format (type: description or type(scope): description)
489    if let Some(colon_pos) = title.find(':') {
490        let prefix = &title[..colon_pos];
491
492        // Remove scope if present (e.g., "feat(auth)" -> "feat")
493        let type_part = if let Some(paren_pos) = prefix.find('(') {
494            &prefix[..paren_pos]
495        } else {
496            prefix
497        };
498
499        let type_part = type_part.trim().to_lowercase();
500
501        // Check if it's a known conventional commit type
502        if known_types.contains(&type_part.as_str()) {
503            return type_part;
504        }
505    }
506
507    // If not a conventional commit, return "other"
508    "other".to_string()
509}
510
511fn process_feature_directory(
512    path: &Path,
513    name: &str,
514    changes_map: Option<&HashMap<String, Vec<Change>>>,
515) -> Result<Feature> {
516    // Try to find and read README file, use defaults if not found
517    let mut readme_info = if let Some(readme_path) = find_readme_file(path) {
518        read_readme_info(&readme_path)?
519    } else {
520        use crate::readme_parser::ReadmeInfo;
521        ReadmeInfo {
522            title: None,
523            owner: "Unknown".to_string(),
524            description: "".to_string(),
525            meta: std::collections::HashMap::new(),
526        }
527    };
528
529    // Remove the 'feature' key from meta if it exists (it's redundant since we know it's a feature)
530    readme_info.meta.remove("feature");
531
532    let changes = if let Some(map) = changes_map {
533        // Convert the absolute path to a repo-relative path and look up changes
534        get_changes_for_path(path, map).unwrap_or_default()
535    } else {
536        Vec::new()
537    };
538
539    // Always include decisions regardless of include_changes flag
540    let decisions = read_decision_files(path).unwrap_or_default();
541
542    // Check if this feature has nested features in a 'features' subdirectory
543    let nested_features_path = path.join("features");
544    let mut nested_features = if nested_features_path.exists() && nested_features_path.is_dir() {
545        list_files_recursive_impl(&nested_features_path, changes_map).unwrap_or_default()
546    } else {
547        Vec::new()
548    };
549
550    // Also check for nested features marked with feature: true in subdirectories
551    let entries = fs::read_dir(path)
552        .with_context(|| format!("could not read directory `{}`", path.display()))?;
553
554    let mut entries: Vec<_> = entries.collect::<Result<_, _>>()?;
555    entries.sort_by_key(|entry| entry.path());
556
557    for entry in entries {
558        let entry_path = entry.path();
559        let entry_name = entry_path.file_name().unwrap().to_string_lossy();
560
561        if entry_path.is_dir()
562            && entry_name != "features" // Don't process 'features' folder twice
563            && !is_documentation_directory(&entry_path)
564        {
565            if has_feature_flag_in_readme(&entry_path) {
566                // This directory is a feature itself
567                let nested_feature =
568                    process_feature_directory(&entry_path, &entry_name, changes_map)?;
569                nested_features.push(nested_feature);
570            } else {
571                // This directory is not a feature, but might contain features
572                // Recursively search for features inside it
573                let deeper_features = list_files_recursive_impl(&entry_path, changes_map)?;
574                nested_features.extend(deeper_features);
575            }
576        }
577    }
578
579    // Compute stats from changes if available
580    let stats = compute_stats_from_changes(&changes, path, &nested_features);
581
582    Ok(Feature {
583        name: readme_info.title.unwrap_or_else(|| name.to_string()),
584        description: readme_info.description,
585        owner: readme_info.owner,
586        path: path.to_string_lossy().to_string(),
587        features: nested_features,
588        meta: readme_info.meta,
589        changes,
590        decisions,
591        stats,
592    })
593}
594
595fn list_files_recursive_impl(
596    dir: &Path,
597    changes_map: Option<&HashMap<String, Vec<Change>>>,
598) -> Result<Vec<Feature>> {
599    let entries = fs::read_dir(dir)
600        .with_context(|| format!("could not read directory `{}`", dir.display()))?;
601
602    let mut entries: Vec<_> = entries.collect::<Result<_, _>>()?;
603    entries.sort_by_key(|entry| entry.path());
604
605    let mut features: Vec<Feature> = Vec::new();
606
607    for entry in entries {
608        let path = entry.path();
609        let name = path.file_name().unwrap().to_string_lossy();
610
611        if path.is_dir() {
612            if is_feature_directory(&path) {
613                let feature = process_feature_directory(&path, &name, changes_map)?;
614                features.push(feature);
615            } else if !is_documentation_directory(&path)
616                && !is_inside_documentation_directory(&path)
617            {
618                // Recursively search for features in non-documentation subdirectories
619                let new_features = list_files_recursive_impl(&path, changes_map)?;
620                features.extend(new_features);
621            }
622        }
623    }
624
625    Ok(features)
626}
627
628/// Get changes for a specific path from the pre-computed changes map
629fn get_changes_for_path(
630    path: &Path,
631    changes_map: &HashMap<String, Vec<Change>>,
632) -> Result<Vec<Change>> {
633    // Canonicalize the path
634    let canonical_path = std::fs::canonicalize(path)?;
635
636    // Find the repository and get the working directory
637    let repo = Repository::discover(path)?;
638    let repo_workdir = repo
639        .workdir()
640        .context("repository has no working directory")?;
641
642    // Convert to relative path from repo root
643    let relative_path = canonical_path
644        .strip_prefix(repo_workdir)
645        .context("path is not within repository")?;
646
647    let relative_path_str = relative_path.to_string_lossy().to_string();
648
649    // Look up the changes in the map
650    Ok(changes_map
651        .get(&relative_path_str)
652        .cloned()
653        .unwrap_or_default())
654}
655
656#[cfg(test)]
657mod tests {
658    use super::*;
659
660    #[test]
661    fn test_extract_commit_type() {
662        // Test standard conventional commit types
663        assert_eq!(extract_commit_type("feat: add new feature"), "feat");
664        assert_eq!(extract_commit_type("fix: resolve bug"), "fix");
665        assert_eq!(extract_commit_type("docs: update README"), "docs");
666        assert_eq!(extract_commit_type("style: format code"), "style");
667        assert_eq!(
668            extract_commit_type("refactor: improve structure"),
669            "refactor"
670        );
671        assert_eq!(extract_commit_type("perf: optimize performance"), "perf");
672        assert_eq!(extract_commit_type("test: add unit tests"), "test");
673        assert_eq!(extract_commit_type("build: update dependencies"), "build");
674        assert_eq!(extract_commit_type("ci: fix CI pipeline"), "ci");
675        assert_eq!(extract_commit_type("chore: update gitignore"), "chore");
676        assert_eq!(
677            extract_commit_type("revert: undo previous commit"),
678            "revert"
679        );
680
681        // Test with scope
682        assert_eq!(extract_commit_type("feat(auth): add login"), "feat");
683        assert_eq!(
684            extract_commit_type("fix(api): resolve endpoint issue"),
685            "fix"
686        );
687        assert_eq!(
688            extract_commit_type("docs(readme): update instructions"),
689            "docs"
690        );
691
692        // Test case insensitivity
693        assert_eq!(extract_commit_type("FEAT: uppercase type"), "feat");
694        assert_eq!(extract_commit_type("Fix: mixed case"), "fix");
695        assert_eq!(extract_commit_type("DOCS: all caps"), "docs");
696
697        // Test non-conventional commits
698        assert_eq!(extract_commit_type("random commit message"), "other");
699        assert_eq!(extract_commit_type("update: not conventional"), "other");
700        assert_eq!(
701            extract_commit_type("feature: close but not standard"),
702            "other"
703        );
704        assert_eq!(extract_commit_type("no colon here"), "other");
705        assert_eq!(extract_commit_type(""), "other");
706
707        // Test edge cases
708        assert_eq!(extract_commit_type("feat:no space after colon"), "feat");
709        assert_eq!(extract_commit_type("feat  : extra spaces"), "feat");
710        assert_eq!(
711            extract_commit_type("feat(scope)(weird): nested parens"),
712            "feat"
713        );
714    }
715}