import subprocess
import re
import sys
from pathlib import Path
from typing import List, Dict, Tuple
def run_git_command(cmd: List[str]) -> str:
try:
result = subprocess.run(
['git'] + cmd,
capture_output=True,
text=True,
check=True
)
return result.stdout.strip()
except subprocess.CalledProcessError as e:
print(f"Error running git command: {e}", file=sys.stderr)
return ""
def get_staged_files() -> List[str]:
output = run_git_command(['diff', '--cached', '--name-only'])
return output.split('\n') if output else []
def get_unstaged_files() -> List[str]:
output = run_git_command(['diff', '--name-only'])
return output.split('\n') if output else []
def get_diff_summary(files: List[str], staged: bool = True) -> str:
if not files:
return ""
cmd = ['diff', '--stat']
if staged:
cmd.insert(1, '--cached')
cmd.extend(files)
return run_git_command(cmd)
def analyze_file_changes(diff_output: str) -> Dict[str, int]:
categories = {
'feat': 0,
'fix': 0,
'docs': 0,
'style': 0,
'refactor': 0,
'test': 0,
'chore': 0,
'perf': 0,
'ci': 0,
'build': 0
}
patterns = {
'feat': [
r'new.*function',
r'add.*feature',
r'implement',
r'\+def.*new',
r'\+class.*\w+',
r'\+async.*def'
],
'fix': [
r'bug.*fix',
r'fix.*issue',
r'resolve.*error',
r'correct.*logic',
r'patch.*bug',
r'-.*\w*error'
],
'docs': [
r'\.md',
r'readme',
r'documentation',
r'comment.*update',
r'docstring'
],
'style': [
r'format',
r'lint',
r'indent',
r'whitespace',
r'semicolon',
r'coding.*style'
],
'refactor': [
r'refactor',
r'restructure',
r'reorganize',
r'extract.*method',
r'rename.*\w+',
r'move.*\w+'
],
'test': [
r'test',
r'spec',
r'assert',
r'mock',
r'fixture',
r'coverage'
],
'chore': [
r'dependenc',
r'package\.json',
r'requirements\.txt',
r'cargo\.toml',
r'pipfile',
r'yarn\.lock'
],
'perf': [
r'performance',
r'optimize',
r'cache',
r'parallel',
r'async.*await',
r'lazy.*load'
],
'ci': [
r'\.github',
r'gitlab-ci',
r'travis',
r'jenkins',
r'workflow',
r'action'
],
'build': [
r'build',
r'compile',
r'webpack',
r'babel',
r'typescript',
r'rustc'
]
}
diff_lower = diff_output.lower()
for category, category_patterns in patterns.items():
for pattern in category_patterns:
matches = len(re.findall(pattern, diff_lower))
categories[category] += matches
return categories
def extract_scope(files: List[str]) -> str:
if not files:
return ""
paths = [Path(f).parts for f in files if f]
if not paths:
return ""
common_parts = []
for i in range(min(len(p) for p in paths)):
if all(p[i] == paths[0][i] for p in paths):
common_parts.append(paths[0][i])
else:
break
if common_parts:
return common_parts[0]
extensions = [Path(f).suffix for f in files if f]
if len(set(extensions)) == 1 and extensions[0]:
return extensions[0][1:]
return ""
def detect_breaking_changes(diff_output: str) -> List[str]:
breaking_patterns = [
r'remove.*\w+',
r'delete.*\w+',
r'break.*change',
r'breaking',
r'deprecated',
r'api.*change',
r'interface.*change',
r'backward.*incompatible'
]
breaking_changes = []
diff_lower = diff_output.lower()
for pattern in breaking_patterns:
matches = re.findall(pattern, diff_lower)
breaking_changes.extend(matches)
return list(set(breaking_changes))
def suggest_commit_message(files: List[str], staged: bool = True) -> str:
cmd = ['diff']
if staged:
cmd.insert(1, '--cached')
cmd.extend(files)
diff_output = run_git_command(cmd)
if not diff_output:
return "No changes detected"
categories = analyze_file_changes(diff_output)
scope = extract_scope(files)
breaking_changes = detect_breaking_changes(diff_output)
primary_category = max(categories, key=categories.get)
if categories[primary_category] == 0:
primary_category = 'chore'
description = generate_description(primary_category, files, diff_output)
commit_msg = f"{primary_category}"
if scope:
commit_msg += f"({scope})"
commit_msg += f": {description}"
if breaking_changes:
commit_msg += f"\n\nBREAKING CHANGE: {', '.join(breaking_changes[:2])}"
diff_lines = len(diff_output.split('\n'))
if diff_lines > 50:
commit_msg += f"\n\nMultiple files changed:\n"
for file in files[:5]:
commit_msg += f"- {file}\n"
if len(files) > 5:
commit_msg += f"- and {len(files) - 5} more files\n"
return commit_msg
def generate_description(category: str, files: List[str], diff_output: str) -> str:
descriptions = {
'feat': [
"add new functionality",
"implement feature",
"add capability",
"introduce feature"
],
'fix': [
"fix bug",
"resolve issue",
"correct error",
"patch bug"
],
'docs': [
"update documentation",
"improve docs",
"add documentation",
"clarify documentation"
],
'style': [
"improve formatting",
"fix code style",
"update formatting",
"apply linting fixes"
],
'refactor': [
"refactor code",
"improve structure",
"reorganize code",
"optimize structure"
],
'test': [
"add tests",
"improve test coverage",
"update tests",
"fix tests"
],
'chore': [
"update dependencies",
"perform maintenance",
"update configuration",
"chore task"
],
'perf': [
"improve performance",
"optimize code",
"speed up operations",
"reduce overhead"
],
'ci': [
"update CI configuration",
"improve build process",
"update workflow",
"fix pipeline"
],
'build': [
"update build configuration",
"fix build issues",
"improve build process",
"update compilation"
]
}
base_desc = descriptions.get(category, ["update code"])[0]
if files:
file_types = set(Path(f).suffix for f in files)
if '.py' in file_types:
return f"{base_desc} in Python code"
elif '.js' in file_types or '.ts' in file_types:
return f"{base_desc} in JavaScript/TypeScript"
elif '.rs' in file_types:
return f"{base_desc} in Rust code"
elif '.md' in file_types:
return f"{base_desc} in documentation"
return base_desc
def main():
import argparse
parser = argparse.ArgumentParser(description="Analyze changes and suggest commit messages")
parser.add_argument("--staged", action="store_true", help="Analyze staged changes")
parser.add_argument("--unstaged", action="store_true", help="Analyze unstaged changes")
parser.add_argument("--files", nargs="*", help="Specific files to analyze")
args = parser.parse_args()
if args.files:
files = args.files
staged = args.staged
elif args.staged:
files = get_staged_files()
staged = True
elif args.unstaged:
files = get_unstaged_files()
staged = False
else:
files = get_staged_files()
staged = True
if not files:
print("No files to analyze")
sys.exit(1)
commit_msg = suggest_commit_message(files, staged)
print(commit_msg)
if __name__ == "__main__":
main()