1use std::collections::HashMap;
6
7use crate::error::{CommitGenError, Result};
8
9fn normalize_escaped_whitespace(text: &str) -> String {
21 let real = text.matches('\n').count();
22 let literal = text.matches("\\n").count();
23 if literal == 0 || literal < real {
24 return text.to_string();
25 }
26 text.replace("\\r\\n", "\n")
27 .replace("\\n", "\n")
28 .replace("\\r", "\n")
29 .replace("\\t", "\t")
30}
31
32fn strip_fences(text: &str) -> String {
36 let normalized = normalize_escaped_whitespace(text);
37 let t = normalized.trim();
38 if let Some(after_fence) = t.strip_prefix("```") {
40 let after_open = after_fence.split_once('\n').map_or("", |x| x.1);
42 let body = match after_open.rfind("```") {
43 Some(end) => &after_open[..end],
44 None => after_open,
45 };
46 return body.trim().to_string();
47 }
48 t.lines()
50 .filter(|l| l.trim_start().trim_end() != "```" && !l.trim_start().starts_with("```"))
51 .collect::<Vec<_>>()
52 .join("\n")
53 .trim()
54 .to_string()
55}
56
57fn strip_wrapping_quotes(s: &str) -> String {
59 let s = s.trim();
60 let pairs = [('"', '"'), ('\'', '\''), ('`', '`'), ('“', '”'), ('‘', '’')];
61 let chars: Vec<char> = s.chars().collect();
62 if chars.len() >= 2 {
63 let first = chars[0];
64 let last = chars[chars.len() - 1];
65 for (open, close) in pairs {
66 if first == open && last == close {
67 let inner: String = chars[1..chars.len() - 1].iter().collect();
68 return inner.trim().to_string();
69 }
70 }
71 }
72 s.to_string()
73}
74
75fn strip_label_prefix(s: &str) -> String {
77 if let Some(colon) = s.find(':') {
78 let label = s[..colon].trim().to_lowercase();
79 if matches!(label.as_str(), "title" | "summary" | "description" | "result") {
80 return s[colon + 1..].trim().to_string();
81 }
82 }
83 s.to_string()
84}
85
86fn strip_heading_markers(s: &str) -> String {
88 let mut t = s.trim();
89 t = t.trim_start_matches('#').trim_start();
91 for marker in ["**", "*", "__", "_"] {
93 if t.starts_with(marker) && t.ends_with(marker) && t.len() > 2 * marker.len() {
94 t = t[marker.len()..t.len() - marker.len()].trim();
95 }
96 }
97 t.to_string()
98}
99
100fn bullet_content(line: &str) -> Option<&str> {
102 let t = line.trim_start();
103 for glyph in ["- ", "* ", "• ", "– ", "+ "] {
104 if let Some(rest) = t.strip_prefix(glyph) {
105 return Some(rest.trim());
106 }
107 }
108 None
109}
110
111fn extract_tag_lenient(text: &str, tag: &str) -> Option<String> {
115 let lower = text.to_lowercase();
116 let open = format!("<{tag}");
117 let open_pos = lower.find(&open)?;
118 let after_open_rel = text[open_pos..].find('>')? + 1;
120 let content_start = open_pos + after_open_rel;
121 let rest = &text[content_start..];
122 let end = rest.find("</").unwrap_or(rest.len());
124 Some(rest[..end].trim().to_string())
125}
126
127pub fn parse_conventional_analysis(text: &str) -> Result<serde_json::Value> {
133 let unfenced = strip_fences(text);
134 let lines: Vec<&str> = unfenced.lines().collect();
135
136 let mut heading_idx = None;
138 let mut parsed_heading = None;
139 for (i, line) in lines.iter().enumerate() {
140 let candidate = strip_heading_markers(line);
141 if let Some(h) = parse_heading(&candidate) {
142 heading_idx = Some(i);
143 parsed_heading = Some(h);
144 break;
145 }
146 if i >= 5 {
148 break;
149 }
150 }
151
152 let (commit_type, scope, summary) = parsed_heading.ok_or_else(|| {
153 CommitGenError::Other(
154 "markdown analysis: no `type(scope): summary` heading found".to_string(),
155 )
156 })?;
157 let start = heading_idx.unwrap_or(0) + 1;
158
159 let mut details = Vec::new();
160 let mut issue_refs = Vec::new();
161
162 for line in &lines[start..] {
163 let trimmed_line = line.trim();
164 let lower = trimmed_line.to_lowercase();
165
166 if let Some(detail) = bullet_content(trimmed_line) {
167 if !detail.is_empty() {
168 details.push(serde_json::json!({ "text": detail }));
169 }
170 } else if let Some(rest) = lower
171 .strip_prefix("fixes:")
172 .or_else(|| lower.strip_prefix("closes:"))
173 .or_else(|| lower.strip_prefix("resolves:"))
174 {
175 let orig = &trimmed_line[trimmed_line.len() - rest.len()..];
177 for ref_str in orig.split(',') {
178 let r = ref_str.trim();
179 if !r.is_empty() {
180 issue_refs.push(r.to_string());
181 }
182 }
183 }
184 }
185
186 Ok(serde_json::json!({
187 "type": commit_type,
188 "scope": scope,
189 "summary": summary,
190 "details": details,
191 "issue_refs": issue_refs
192 }))
193}
194
195fn parse_heading(line: &str) -> Option<(String, Option<String>, String)> {
198 let colon = line.find(':')?;
199 let type_scope = line[..colon].trim();
200 let summary = line[colon + 1..].trim().to_string();
201 if type_scope.is_empty() || summary.is_empty() {
202 return None;
203 }
204
205 let (ty, scope) = if let Some(p_start) = type_scope.find('(') {
206 let p_end = type_scope.find(')')?;
207 if p_end < p_start {
208 return None;
209 }
210 let ty = type_scope[..p_start].trim().to_string();
211 let sc = type_scope[p_start + 1..p_end].trim();
212 (ty, if sc.is_empty() { None } else { Some(sc.to_string()) })
213 } else {
214 (type_scope.to_string(), None)
215 };
216
217 if ty.is_empty() || ty.contains(char::is_whitespace) || !ty.chars().all(|c| c.is_ascii_alphabetic()) {
219 return None;
220 }
221 Some((ty, scope, summary))
222}
223
224pub fn parse_summary_output(text: &str) -> Result<serde_json::Value> {
230 let unfenced = strip_fences(text);
231
232 let raw = extract_tag_lenient(&unfenced, "summary").unwrap_or_else(|| unfenced.clone());
234
235 let stripped = strip_heading_markers(&raw);
237 let stripped = strip_label_prefix(&stripped);
238 let stripped = strip_wrapping_quotes(&stripped);
239 let summary_text = stripped.split_whitespace().collect::<Vec<_>>().join(" ");
240
241 if summary_text.is_empty() {
242 return Err(CommitGenError::Other("markdown summary: empty summary text".to_string()));
243 }
244
245 Ok(serde_json::json!({ "summary": summary_text }))
246}
247
248pub fn parse_changelog_response(text: &str) -> Result<serde_json::Value> {
254 const KNOWN: [&str; 7] =
255 ["Added", "Changed", "Fixed", "Deprecated", "Removed", "Security", "Breaking"];
256
257 let unfenced = strip_fences(text);
258
259 let mut entries: HashMap<String, Vec<String>> = HashMap::new();
260 let mut current_category: Option<String> = None;
261
262 let canonical = |name: &str| -> Option<String> {
263 let n = name.trim().trim_end_matches(':').trim();
264 KNOWN
265 .iter()
266 .find(|k| k.eq_ignore_ascii_case(n))
267 .map(|k| (*k).to_string())
268 };
269
270 for line in unfenced.lines() {
271 let trimmed_line = line.trim();
272 if trimmed_line.is_empty() {
273 continue; }
275
276 let header = if trimmed_line.starts_with('#') {
281 let h = trimmed_line.trim_start_matches('#').trim().trim_end_matches(':').trim();
282 Some(canonical(h).unwrap_or_else(|| h.to_string()))
283 } else { canonical(trimmed_line) };
284
285 if let Some(h) = header {
286 current_category = Some(h);
287 continue;
288 }
289
290 let entry = bullet_content(trimmed_line).unwrap_or(trimmed_line).trim();
292 if let Some(cat) = ¤t_category
293 && !entry.is_empty() {
294 entries.entry(cat.clone()).or_default().push(entry.to_string());
295 }
296 }
297
298 if entries.is_empty() {
299 return Err(CommitGenError::Other(
300 "markdown changelog: no entries found (format: ## Category\\n- entry)".to_string(),
301 ));
302 }
303
304 Ok(serde_json::json!({ "entries": entries }))
305}
306
307pub fn parse_compose_intent(text: &str) -> Result<serde_json::Value> {
312 let trimmed = strip_fences(text);
313
314 let mut groups = Vec::new();
315 let mut group_map: HashMap<String, usize> = HashMap::new();
316
317 for line in trimmed.lines() {
319 let trimmed_line = line.trim();
320 if let Some(assign_pos) = trimmed_line.find(":=") {
321 let gid = trimmed_line[..assign_pos].trim().to_string();
322 let rest = &trimmed_line[assign_pos + 2..].trim();
323
324 if let Some(colon_pos) = rest.find(':') {
325 let type_scope = &rest[..colon_pos].trim();
326 let rationale = rest[colon_pos + 1..].trim().to_string();
327
328 let (gtype, scope) = if let Some(paren_start) = type_scope.find('(') {
329 if let Some(paren_end) = type_scope.find(')') {
330 let t = type_scope[..paren_start].trim();
331 let s = type_scope[paren_start + 1..paren_end].trim();
332 (t.to_string(), Some(s.to_string()))
333 } else {
334 (type_scope.to_string(), None)
335 }
336 } else {
337 (type_scope.to_string(), None)
338 };
339
340 group_map.insert(gid.clone(), groups.len());
341
342 let group_obj = serde_json::json!({
343 "group_id": gid,
344 "type": normalize_commit_type(>ype),
345 "scope": scope,
346 "rationale": rationale,
347 "file_ids": Vec::<String>::new(),
348 "dependencies": Vec::<String>::new()
349 });
350 groups.push(group_obj);
351 }
352 }
353 }
354
355 for line in trimmed.lines() {
357 let trimmed_line = line.trim();
358 if let Some(dep_pos) = trimmed_line.find("<-") {
359 let gid = trimmed_line[..dep_pos].trim().to_string();
360 let deps_str = trimmed_line[dep_pos + 2..].trim();
361
362 if let Some(idx) = group_map.get(&gid) {
363 let mut dependencies = Vec::new();
364 for dep_id in deps_str.split(',') {
365 let trimmed_dep = dep_id.trim();
366 if !trimmed_dep.is_empty() {
367 dependencies.push(trimmed_dep.to_string());
368 }
369 }
370 if let Some(group_obj) = groups.get_mut(*idx) {
371 group_obj["dependencies"] = serde_json::Value::Array(
372 dependencies.into_iter().map(serde_json::Value::String).collect(),
373 );
374 }
375 }
376 }
377 }
378
379 let mut in_files_section = false;
381 for line in trimmed.lines() {
382 let trimmed_line = line.trim();
383
384 if trimmed_line.to_lowercase().starts_with("files:") {
385 in_files_section = true;
386 continue;
387 }
388
389 if in_files_section && let Some(bullet) = bullet_content(trimmed_line)
390 && let Some(colon_pos) = bullet.find(':') {
391 let gid = bullet[..colon_pos].trim().to_string();
392 let files_str = bullet[colon_pos + 1..].trim();
393
394 if let Some(idx) = group_map.get(&gid)
395 && let Some(group_obj) = groups.get_mut(*idx) {
396 group_obj["file_ids"] = serde_json::Value::Array(
397 files_str.split(',').map(|f| serde_json::Value::String(f.trim().to_string())).collect(),
398 );
399 }
400 }
401 }
402
403 if groups.is_empty() {
404 return Err(CommitGenError::Other(
405 "markdown compose intent: no groups found (format: G1 := type(scope): rationale)".to_string(),
406 ));
407 }
408
409 Ok(serde_json::json!({
410 "groups": groups
411 }))
412}
413
414pub fn parse_compose_binding(text: &str) -> Result<serde_json::Value> {
419 let trimmed = strip_fences(text);
420
421 let mut assignments = Vec::new();
422 let mut current_group: Option<String> = None;
423 let mut current_hunks: Vec<String> = Vec::new();
424
425 for line in trimmed.lines() {
426 let trimmed_line = line.trim();
427
428 if trimmed_line.starts_with('#') {
429 if let Some(gid) = current_group.take() {
431 assignments.push(serde_json::json!({
432 "group_id": gid,
433 "hunk_ids": std::mem::take(&mut current_hunks)
434 }));
435 }
436 let new_gid = trimmed_line
438 .trim_start_matches('#')
439 .trim()
440 .trim_end_matches(':')
441 .trim()
442 .to_string();
443 current_group = Some(new_gid);
444 } else if let Some(hunk_id) = bullet_content(trimmed_line) {
445 current_hunks.push(hunk_id.to_string());
446 }
447 }
448
449 if let Some(gid) = current_group.take() {
451 assignments.push(serde_json::json!({
452 "group_id": gid,
453 "hunk_ids": std::mem::take(&mut current_hunks)
454 }));
455 }
456
457 if assignments.is_empty() {
458 return Err(CommitGenError::Other(
459 "markdown compose binding: no assignments found (format: # group_id\\n- hunk_id)".to_string(),
460 ));
461 }
462
463 Ok(serde_json::json!({
464 "assignments": assignments
465 }))
466}
467
468pub fn parse_batch_observations(text: &str) -> Result<serde_json::Value> {
475 let unfenced = strip_fences(text);
476
477 let mut files: Vec<serde_json::Value> = Vec::new();
478 let mut current_path: Option<String> = None;
479 let mut current_obs: Vec<String> = Vec::new();
480
481 for line in unfenced.lines() {
482 let t = line.trim();
483 if t.is_empty() {
484 continue;
485 }
486
487 if t.starts_with('#') {
488 if let Some(path) = current_path.take() {
490 files.push(serde_json::json!({
491 "path": path,
492 "observations": std::mem::take(&mut current_obs),
493 }));
494 }
495 current_path = Some(t.trim_start_matches('#').trim().to_string());
496 } else if current_path.is_some() {
497 let obs = bullet_content(t).unwrap_or(t).trim();
499 if !obs.is_empty() {
500 current_obs.push(obs.to_string());
501 }
502 }
503 }
504
505 if let Some(path) = current_path.take() {
506 files.push(serde_json::json!({
507 "path": path,
508 "observations": current_obs,
509 }));
510 }
511
512 if files.is_empty() {
513 return Err(CommitGenError::Other(
514 "markdown observations: no file sections found (format: ## path\\n- observation)"
515 .to_string(),
516 ));
517 }
518
519 Ok(serde_json::json!({ "files": files }))
520}
521
522fn normalize_commit_type(s: &str) -> String {
524 match s.to_lowercase().as_str() {
525 "feat" | "feature" => "feat".to_string(),
526 "fix" | "bugfix" => "fix".to_string(),
527 "docs" | "documentation" => "docs".to_string(),
528 "style" | "formatting" => "style".to_string(),
529 "refactor" | "refactoring" => "refactor".to_string(),
530 "perf" | "performance" => "perf".to_string(),
531 "test" | "tests" => "test".to_string(),
532 "build" | "builder" => "build".to_string(),
533 "ci" | "cicd" => "ci".to_string(),
534 "chore" | "maintenance" => "chore".to_string(),
535 "revert" | "reversion" => "revert".to_string(),
536 "deps" | "dependencies" | "dependency" => "deps".to_string(),
537 "security" | "sec" => "security".to_string(),
538 "config" | "configuration" => "config".to_string(),
539 "ux" | "ergonomics" => "ux".to_string(),
540 "release" | "version" => "release".to_string(),
541 "hotfix" => "hotfix".to_string(),
542 "infra" | "infrastructure" => "infra".to_string(),
543 "init" | "initialization" => "init".to_string(),
544 "merge" | "merging" => "merge".to_string(),
545 "hack" | "hacky" => "hack".to_string(),
546 "wip" | "work-in-progress" => "wip".to_string(),
547 other => other.to_string(),
548 }
549}
550
551#[cfg(test)]
552mod tests {
553 use super::*;
554
555 #[test]
558 fn test_conventional_analysis() {
559 let md = "# feat(api): add user authentication endpoint\n\n- Added POST \
560 /auth/login endpoint\n- Implemented bcrypt password hashing\n\nFixes: #123";
561 let r = parse_conventional_analysis(md).unwrap();
562 assert_eq!(r["type"], "feat");
563 assert_eq!(r["scope"], "api");
564 assert_eq!(r["details"].as_array().unwrap().len(), 2);
565 assert_eq!(r["issue_refs"][0], "#123");
566 }
567
568 #[test]
569 fn test_analysis_lenient_variations() {
570 let md = "```md\n**fix(core): corrected null deref**\n\n* fixed a crash\n* \
572 guarded the pointer\n\nCloses: #7, #8\n```";
573 let r = parse_conventional_analysis(md).unwrap();
574 assert_eq!(r["type"], "fix");
575 assert_eq!(r["scope"], "core");
576 assert_eq!(r["details"].as_array().unwrap().len(), 2);
577 assert_eq!(r["issue_refs"].as_array().unwrap().len(), 2);
578 }
579
580 #[test]
581 fn test_analysis_no_scope_and_leading_blank_lines() {
582 let md = "\n\n\n# chore: bumped version\n";
583 let r = parse_conventional_analysis(md).unwrap();
584 assert_eq!(r["type"], "chore");
585 assert!(r["scope"].is_null());
586 }
587
588 #[test]
591 fn test_summary_variations() {
592 let cases = [
593 "<summary>Added JWT auth</summary>",
594 "Added JWT auth", "\"Added JWT auth\"", "<summary>\"Added JWT auth\"</title>", "```md\n<summary>\nAdded JWT auth\n</summary>\n```", "Title: Added JWT auth", "# Added JWT auth", "\n\n Added JWT auth \n\n", ];
602 for c in cases {
603 let r = parse_summary_output(c).unwrap();
604 assert_eq!(r["summary"], "Added JWT auth", "input was: {c:?}");
605 }
606 }
607
608 #[test]
611 fn test_changelog_hash_and_dash() {
612 let md = "# Added\n- POST /auth/login endpoint\n\n# Fixed\n- Race condition";
613 let r = parse_changelog_response(md).unwrap();
614 let e = r["entries"].as_object().unwrap();
615 assert_eq!(e["Added"].as_array().unwrap().len(), 1);
616 assert_eq!(e["Fixed"].as_array().unwrap().len(), 1);
617 }
618
619 #[test]
620 fn test_changelog_lenient_mixed() {
621 let md = "## Added\n- one\n* two\n\n\nFixed:\nthree\n- four\n\n# Security\n\n five ";
624 let r = parse_changelog_response(md).unwrap();
625 let e = r["entries"].as_object().unwrap();
626 assert_eq!(e["Added"].as_array().unwrap().len(), 2, "Added");
627 assert_eq!(e["Fixed"].as_array().unwrap().len(), 2, "Fixed (bare + dash)");
628 assert_eq!(e["Security"].as_array().unwrap().len(), 1, "Security (bare item)");
629 }
630
631 #[test]
632 fn test_changelog_bare_category_not_confused_with_item() {
633 let md = "# Security\n- Added rate limiting on auth endpoints";
635 let r = parse_changelog_response(md).unwrap();
636 let e = r["entries"].as_object().unwrap();
637 assert!(e.contains_key("Security"));
638 assert!(!e.contains_key("Added"));
639 assert_eq!(e["Security"][0], "Added rate limiting on auth endpoints");
640 }
641
642 #[test]
643 fn test_changelog_fenced() {
644 let md = "```\n# Added\n- thing\n```";
645 let r = parse_changelog_response(md).unwrap();
646 assert_eq!(r["entries"]["Added"][0], "thing");
647 }
648
649 #[test]
652 fn test_literal_backslash_n_analysis() {
653 let md = "# feat(api): add auth\\n\\n- did a thing\\n- did another\\n\\nFixes: #1";
655 let r = parse_conventional_analysis(md).unwrap();
656 assert_eq!(r["type"], "feat");
657 assert_eq!(r["scope"], "api");
658 assert_eq!(r["details"].as_array().unwrap().len(), 2);
659 assert_eq!(r["issue_refs"][0], "#1");
660 }
661
662 #[test]
663 fn test_literal_backslash_n_changelog() {
664 let md = "# Added\\n- one\\n- two\\n# Fixed\\n- three";
665 let r = parse_changelog_response(md).unwrap();
666 let e = r["entries"].as_object().unwrap();
667 assert_eq!(e["Added"].as_array().unwrap().len(), 2);
668 assert_eq!(e["Fixed"].as_array().unwrap().len(), 1);
669 }
670
671 #[test]
672 fn test_real_newlines_with_stray_backslash_preserved() {
673 let md = "# docs: explain C:\\\\path usage\n- noted the path C:\\nope is literal";
675 let r = parse_conventional_analysis(md).unwrap();
676 assert_eq!(r["type"], "docs");
677 assert_eq!(r["details"].as_array().unwrap().len(), 1);
679 }
680
681 #[test]
684 fn test_compose_intent_fenced() {
685 let md = "```\nG1 := feat(api): add endpoints\nG2 := test(api): add tests\n\nG2 \
686 <- G1\n\nFiles:\n- G1: a.rs, b.rs\n* G2: c.test.ts\n```";
687 let r = parse_compose_intent(md).unwrap();
688 let g = r["groups"].as_array().unwrap();
689 assert_eq!(g.len(), 2);
690 assert_eq!(g[0]["file_ids"].as_array().unwrap().len(), 2);
691 assert_eq!(g[1]["dependencies"][0], "G1");
692 assert_eq!(g[1]["file_ids"][0], "c.test.ts"); }
694
695 #[test]
696 fn test_compose_binding_lenient() {
697 let md = "```\n## G1:\n- h1\n* h2\n# G2\n- h3\n```";
698 let r = parse_compose_binding(md).unwrap();
699 let a = r["assignments"].as_array().unwrap();
700 assert_eq!(a.len(), 2);
701 assert_eq!(a[0]["group_id"], "G1"); assert_eq!(a[0]["hunk_ids"].as_array().unwrap().len(), 2);
703 }
704
705 #[test]
708 fn test_batch_observations() {
709 let md = "## src/config.rs\n- added TOML loading\n- changed timeout\n\n## \
710 src/main.rs\n- wired CLI flag\n\n## src/empty.rs";
711 let r = parse_batch_observations(md).unwrap();
712 let files = r["files"].as_array().unwrap();
713 assert_eq!(files.len(), 3);
714 assert_eq!(files[0]["path"], "src/config.rs");
715 assert_eq!(files[0]["observations"].as_array().unwrap().len(), 2);
716 assert_eq!(files[1]["observations"].as_array().unwrap().len(), 1);
717 assert_eq!(files[2]["observations"].as_array().unwrap().len(), 0); }
719
720 #[test]
721 fn test_batch_observations_fenced_and_literal_newlines() {
722 let md = "```\\n## a.rs\\n- did x\\n* did y\\n## b.rs\\n- did z\\n```";
723 let r = parse_batch_observations(md).unwrap();
724 let files = r["files"].as_array().unwrap();
725 assert_eq!(files.len(), 2);
726 assert_eq!(files[0]["path"], "a.rs");
727 assert_eq!(files[0]["observations"].as_array().unwrap().len(), 2);
728 }
729}