1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
use crate::error::header_error::HeaderError;
use regex::Regex;
use std::sync::LazyLock;
#[derive(Debug, Clone)]
pub struct ParsedHeader {
pub r#type: String,
pub scope: Option<String>,
// Number of spaces after colon
pub spaces_after_colon: usize,
pub subject: String,
// Whether this is a breaking change, indicated by `!` before the subject
// Example: `refactor(parser)!: change public API`
// No need to define in config file
pub breaking: bool,
}
pub fn parse_header(header: &str) -> Result<ParsedHeader, HeaderError> {
// regex: type(scope)?(!)?: subject
// type: letters/digits/_/-
// scope: anything except ')'
// breaking: optional '!'
// Full-width colon (U+FF1A) is not allowed
if header.contains(':') {
return Err(HeaderError::FullWidthColonNotAllowed);
}
// Must contain half-width colon
if !header.contains(':') {
return Err(HeaderError::MissingColon);
}
// Regex to match and extract headers following Conventional Commits style,
// capturing type, scope, breaking marker, and subject:
// type(scope)?(!)?: subject
// type = any character except whitespace, '(', '!', ':', supports Unicode and emoji
#[allow(clippy::expect_used)]
static RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r"^(?P<type>[^():!\s]+)(?:\((?P<scope>[^)]+)\))?(?P<breaking>!)?:(?P<spaces>\s*)(?P<subject>.+)$"
).expect("regex is valid")
});
if let Some(caps) = RE.captures(header) {
let t = caps.name("type").map(|m| m.as_str().to_string());
let t = t.ok_or(HeaderError::EmptyType)?;
let scope = caps.name("scope").map(|m| m.as_str().to_string());
// Number of spaces after colon
let spaces = caps
.name("spaces")
.map(|m| m.as_str())
.ok_or(HeaderError::MissingSpacesGroup)?;
// Check for full-width spaces after colon
if spaces.contains(' ') {
return Err(HeaderError::FullWidthSpaceNotAllowed);
}
let actual_spaces = spaces.chars().count();
let subject = caps.name("subject").map(|m| m.as_str().trim().to_string());
let subject = subject.ok_or(HeaderError::EmptySubject)?;
let breaking = caps.name("breaking").is_some();
Ok(ParsedHeader {
r#type: t,
scope,
// Number of spaces after colon
spaces_after_colon: actual_spaces,
subject,
breaking,
})
} else {
// Attempt to analyze and provide specific error messages to help user fix issues
Err(analyze_header_failure(header))
}
}
fn analyze_header_failure(header: &str) -> HeaderError {
let (left, right) = match header.split_once(':') {
Some((l, r)) => (l.trim(), r.trim()),
None => return HeaderError::MissingColon,
};
let left = left.trim();
let right = right.trim();
// Empty type
if left.is_empty() {
return HeaderError::EmptyType;
}
// Empty scope: feat():
if left.contains("()") {
return HeaderError::EmptyScopeWithParen {
left: left.to_string(),
};
}
// Empty subject
if right.is_empty() {
return HeaderError::EmptySubject;
}
// Scope missing right parenthesis (only check part before colon)
if left.contains('(') && !left.contains(')') {
return HeaderError::MissingRightParen {
left: left.to_string(),
};
}
// Scope missing left parenthesis (only check part before colon)
if left.contains(')') && !left.contains('(') {
return HeaderError::MissingLeftParen {
left: left.to_string(),
};
}
// Invalid breaking marker position
// Only check part before colon, if '!' exists it must be the last character of left
if let Some(colon_pos) = header.find(':') {
let left = &header[..colon_pos]; // Only check type(scope)! part
if let Some(bang_pos) = left.find('!') {
// '!' must be at the end of type or scope
if bang_pos != left.len() - 1 {
return HeaderError::InvalidBreakingPosition;
}
}
}
// Type contains invalid characters (whitespace, colon, parentheses, etc.)
// Note: only check left part, not subject
if left.chars().any(|c| c.is_whitespace() || c == ':') {
return HeaderError::InvalidType(left.to_string());
}
// Fallback
HeaderError::InvalidHeaderFormat(header.to_string())
}