llm_git/
markdown_output.rs

1//! Markdown format parsers for structured LLM outputs
2//!
3//! Provides parsers for markdown-formatted responses as an alternative to JSON tool calls.
4
5use std::collections::HashMap;
6
7use crate::{
8   error::{CommitGenError, Result},
9   types::CommitType,
10};
11
12// ===== Leniency helpers =====
13// Models wrap the same content many ways: code fences, quotes, mismatched or
14// missing tags, bullet glyph variations. These helpers normalize all of that
15// before structured parsing so the parsers stay tolerant.
16
17/// Convert literal escape sequences (`\n`, `\r`, `\t`) into real whitespace.
18///
19/// Some models emit a single physical line containing literal backslash-n
20/// instead of real newlines. Only triggers when literal `\n` appears at least
21/// as often as real newlines, so text that legitimately contains a stray
22/// backslash isn't mangled.
23fn normalize_escaped_whitespace(text: &str) -> String {
24   let real = text.matches('\n').count();
25   let literal = text.matches("\\n").count();
26   if literal == 0 || literal < real {
27      return text.to_string();
28   }
29   text.replace("\\r\\n", "\n")
30      .replace("\\n", "\n")
31      .replace("\\r", "\n")
32      .replace("\\t", "\t")
33}
34
35/// Strip surrounding Markdown code fences (```lang ... ```), if present.
36/// Also normalizes literal `\n`/`\t` escapes first, so every parser that
37/// routes through here inherits both behaviors.
38fn strip_fences(text: &str) -> String {
39   let normalized = normalize_escaped_whitespace(text);
40   let t = normalized.trim();
41   // Whole-block fence: starts with ``` and ends with ```
42   if let Some(after_fence) = t.strip_prefix("```") {
43      // Drop the opening fence line (may carry a language tag like ```md).
44      let after_open = after_fence.split_once('\n').map_or("", |x| x.1);
45      let body = match after_open.rfind("```") {
46         Some(end) => &after_open[..end],
47         None => after_open,
48      };
49      return body.trim().to_string();
50   }
51   // No leading fence: just remove any stray ``` lines.
52   t.lines()
53      .filter(|l| l.trim_start().trim_end() != "```" && !l.trim_start().starts_with("```"))
54      .collect::<Vec<_>>()
55      .join("\n")
56      .trim()
57      .to_string()
58}
59
60/// Remove matching wrapping quotes (straight or smart, single/double/backtick).
61fn strip_wrapping_quotes(s: &str) -> String {
62   let s = s.trim();
63   let pairs = [('"', '"'), ('\'', '\''), ('`', '`'), ('“', '”'), ('‘', '’')];
64   let chars: Vec<char> = s.chars().collect();
65   if chars.len() >= 2 {
66      let first = chars[0];
67      let last = chars[chars.len() - 1];
68      for (open, close) in pairs {
69         if first == open && last == close {
70            let inner: String = chars[1..chars.len() - 1].iter().collect();
71            return inner.trim().to_string();
72         }
73      }
74   }
75   s.to_string()
76}
77
78/// Strip a leading `Label:` prefix (e.g. "Title:", "Summary:") if present.
79fn strip_label_prefix(s: &str) -> String {
80   if let Some(colon) = s.find(':') {
81      let label = s[..colon].trim().to_lowercase();
82      if matches!(label.as_str(), "title" | "summary" | "description" | "result") {
83         return s[colon + 1..].trim().to_string();
84      }
85   }
86   s.to_string()
87}
88
89/// Strip leading Markdown heading hashes and bold/italic emphasis markers.
90fn strip_heading_markers(s: &str) -> String {
91   let mut t = s.trim();
92   // leading #'s
93   t = t.trim_start_matches('#').trim_start();
94   // surrounding ** or * emphasis on the whole line
95   for marker in ["**", "*", "__", "_"] {
96      if t.starts_with(marker) && t.ends_with(marker) && t.len() > 2 * marker.len() {
97         t = t[marker.len()..t.len() - marker.len()].trim();
98      }
99   }
100   t.to_string()
101}
102
103/// Return the content of a bullet line (`-`, `*`, `•`, `–`) or None.
104fn bullet_content(line: &str) -> Option<&str> {
105   let t = line.trim_start();
106   for glyph in ["- ", "* ", "• ", "– ", "+ "] {
107      if let Some(rest) = t.strip_prefix(glyph) {
108         return Some(rest.trim());
109      }
110   }
111   None
112}
113
114/// Extract content between the first `<tag>` and the next closing `</...>`,
115/// tolerating a mismatched closing tag (e.g. `<summary>X</title>`) or a missing
116/// close (takes the remainder). Case-insensitive on the opening tag name.
117fn extract_tag_lenient(text: &str, tag: &str) -> Option<String> {
118   let lower = text.to_lowercase();
119   let open = format!("<{tag}");
120   let open_pos = lower.find(&open)?;
121   // advance to end of the opening tag '>'
122   let after_open_rel = text[open_pos..].find('>')? + 1;
123   let content_start = open_pos + after_open_rel;
124   let rest = &text[content_start..];
125   // Find next closing tag of ANY name: "</"
126   let end = rest.find("</").unwrap_or(rest.len());
127   Some(rest[..end].trim().to_string())
128}
129
130/// Shared core: parse `# type(scope): summary` + detail bullets + issue footer.
131/// Returns the raw pieces so callers can shape them for their target struct.
132struct AnalysisParts {
133   commit_type: String,
134   scope:       Option<String>,
135   summary:     String,
136   details:     Vec<String>,
137   issue_refs:  Vec<String>,
138}
139
140fn parse_analysis_parts(text: &str) -> Result<AnalysisParts> {
141   let unfenced = strip_fences(text);
142   let lines: Vec<&str> = unfenced.lines().collect();
143
144   // Find the heading line: the first line that parses as `type(scope)?: summary`.
145   let mut heading_idx = None;
146   let mut parsed_heading = None;
147   for (i, line) in lines.iter().enumerate() {
148      let candidate = strip_heading_markers(line);
149      if let Some(h) = parse_heading(&candidate) {
150         heading_idx = Some(i);
151         parsed_heading = Some(h);
152         break;
153      }
154      // Only scan the first few lines for the heading.
155      if i >= 5 {
156         break;
157      }
158   }
159
160   let (commit_type, scope, summary) = parsed_heading.ok_or_else(|| {
161      CommitGenError::Other(
162         "markdown analysis: no `type(scope): summary` heading found".to_string(),
163      )
164   })?;
165   let start = heading_idx.unwrap_or(0) + 1;
166
167   let mut details = Vec::new();
168   let mut issue_refs = Vec::new();
169
170   for line in &lines[start..] {
171      let trimmed_line = line.trim();
172      let lower = trimmed_line.to_lowercase();
173
174      if let Some(detail) = bullet_content(trimmed_line) {
175         if !detail.is_empty() {
176            details.push(detail.to_string());
177         }
178      } else if let Some(rest) = lower
179         .strip_prefix("fixes:")
180         .or_else(|| lower.strip_prefix("closes:"))
181         .or_else(|| lower.strip_prefix("resolves:"))
182      {
183         // Use the original-case slice for the refs themselves.
184         let orig = &trimmed_line[trimmed_line.len() - rest.len()..];
185         for ref_str in orig.split(',') {
186            let r = ref_str.trim();
187            if !r.is_empty() {
188               issue_refs.push(r.to_string());
189            }
190         }
191      }
192   }
193
194   Ok(AnalysisParts { commit_type, scope, summary, details, issue_refs })
195}
196
197/// Parse markdown conventional analysis format (details as `{text}` objects,
198/// matching `ConventionalAnalysis`).
199///
200/// Lenient: tolerates code fences, headings with/without `#`, bold emphasis,
201/// the `type(scope): summary` line appearing on any of the first lines, bullet
202/// glyph variations, and `Fixes:`/`Closes:`/`Resolves:` footers.
203pub fn parse_conventional_analysis(text: &str) -> Result<serde_json::Value> {
204   let p = parse_analysis_parts(text)?;
205   let details: Vec<serde_json::Value> =
206      p.details.into_iter().map(|t| serde_json::json!({ "text": t })).collect();
207   Ok(serde_json::json!({
208      "type": p.commit_type,
209      "scope": p.scope,
210      "summary": p.summary,
211      "details": details,
212      "issue_refs": p.issue_refs
213   }))
214}
215
216/// Parse markdown fast-commit format (details as plain strings, matching
217/// `FastCommitOutput`). Same heading/bullet grammar as the analysis parser.
218pub fn parse_fast_commit(text: &str) -> Result<serde_json::Value> {
219   let p = parse_analysis_parts(text)?;
220   Ok(serde_json::json!({
221      "type": p.commit_type,
222      "scope": p.scope,
223      "summary": p.summary,
224      "details": p.details
225   }))
226}
227
228/// Parse a `type(scope): summary` or `type: summary` heading line.
229/// Returns (type, optional scope, summary). None if it doesn't look like one.
230fn parse_heading(line: &str) -> Option<(String, Option<String>, String)> {
231   let colon = line.find(':')?;
232   let type_scope = line[..colon].trim();
233   let summary = line[colon + 1..].trim().to_string();
234   if type_scope.is_empty() || summary.is_empty() {
235      return None;
236   }
237
238   let (ty, scope) = if let Some(p_start) = type_scope.find('(') {
239      let p_end = type_scope.find(')')?;
240      if p_end < p_start {
241         return None;
242      }
243      let ty = type_scope[..p_start].trim().to_string();
244      let sc = type_scope[p_start + 1..p_end].trim();
245      (ty, if sc.is_empty() { None } else { Some(sc.to_string()) })
246   } else {
247      (type_scope.to_string(), None)
248   };
249
250   // The type token must be a *valid conventional commit type*. This is what
251   // prevents stray `key: value` lines (e.g. `type: "refactor",` from a JSON
252   // blob, or `summary: ...`) from being misread as a heading — which would
253   // otherwise produce garbage like {"type":"type"} that caches and then fails
254   // downstream validation. Only a real type makes a real heading.
255   if CommitType::new(&ty).is_err() {
256      return None;
257   }
258   Some((ty, scope, summary))
259}
260
261/// Parse markdown summary format.
262///
263/// Lenient: accepts `<summary>X</summary>`, mismatched/missing close tags,
264/// bare text, quoted text, `Title:`-labeled text, and code fences. Collapses
265/// internal whitespace so multiline tag bodies become a single line.
266pub fn parse_summary_output(text: &str) -> Result<serde_json::Value> {
267   let unfenced = strip_fences(text);
268
269   // Prefer an explicit <summary> tag if present (tolerating bad/missing close).
270   let raw = extract_tag_lenient(&unfenced, "summary").unwrap_or_else(|| unfenced.clone());
271
272   // Normalize: drop heading markers, label prefixes, quotes; collapse whitespace.
273   let stripped = strip_heading_markers(&raw);
274   let stripped = strip_label_prefix(&stripped);
275   let stripped = strip_wrapping_quotes(&stripped);
276   let summary_text = stripped.split_whitespace().collect::<Vec<_>>().join(" ");
277
278   if summary_text.is_empty() {
279      return Err(CommitGenError::Other("markdown summary: empty summary text".to_string()));
280   }
281
282   Ok(serde_json::json!({ "summary": summary_text }))
283}
284
285/// Parse markdown changelog format.
286///
287/// Lenient: tolerates code fences, headers as `#`/`##`/`###` or bare
288/// `Category:` lines, and bullet glyph variations. Recognized categories are
289/// matched case-insensitively; unknown `#` headers are still accepted verbatim.
290pub fn parse_changelog_response(text: &str) -> Result<serde_json::Value> {
291   const KNOWN: [&str; 7] =
292      ["Added", "Changed", "Fixed", "Deprecated", "Removed", "Security", "Breaking"];
293
294   let unfenced = strip_fences(text);
295
296   let mut entries: HashMap<String, Vec<String>> = HashMap::new();
297   let mut current_category: Option<String> = None;
298
299   let canonical = |name: &str| -> Option<String> {
300      let n = name.trim().trim_end_matches(':').trim();
301      KNOWN
302         .iter()
303         .find(|k| k.eq_ignore_ascii_case(n))
304         .map(|k| (*k).to_string())
305   };
306
307   for line in unfenced.lines() {
308      let trimmed_line = line.trim();
309      if trimmed_line.is_empty() {
310         continue; // tolerate any number of blank/whitespace lines
311      }
312
313      // Header detection:
314      //  - `#`/`##`/`###` prefixed line (any text), or
315      //  - a bare line that *exactly* equals a known category (with optional
316      //    trailing `:`), e.g. `Added`, `Added:` — but NOT `Added rate limiting`.
317      let header = if trimmed_line.starts_with('#') {
318         let h = trimmed_line.trim_start_matches('#').trim().trim_end_matches(':').trim();
319         Some(canonical(h).unwrap_or_else(|| h.to_string()))
320      } else { canonical(trimmed_line) };
321
322      if let Some(h) = header {
323         current_category = Some(h);
324         continue;
325      }
326
327      // Otherwise it's an entry. Accept bulleted (`-`, `*`, `•`, …) or bare lines.
328      let entry = bullet_content(trimmed_line).unwrap_or(trimmed_line).trim();
329      if let Some(cat) = &current_category
330         && !entry.is_empty() {
331            entries.entry(cat.clone()).or_default().push(entry.to_string());
332         }
333   }
334
335   if entries.is_empty() {
336      return Err(CommitGenError::Other(
337         "markdown changelog: no entries found (format: ## Category\\n- entry)".to_string(),
338      ));
339   }
340
341   Ok(serde_json::json!({ "entries": entries }))
342}
343
344/// Parse markdown compose intent format.
345///
346/// Lenient: strips code fences before parsing the `G1 := type(scope): rationale`,
347/// `G2 <- G1`, and `Files:` sections; bullet glyphs in the files section vary.
348pub fn parse_compose_intent(text: &str) -> Result<serde_json::Value> {
349   let trimmed = strip_fences(text);
350
351   let mut groups = Vec::new();
352   let mut group_map: HashMap<String, usize> = HashMap::new();
353
354   // First pass: collect group definitions (G1 := type(scope): rationale)
355   for line in trimmed.lines() {
356      let trimmed_line = line.trim();
357      if let Some(assign_pos) = trimmed_line.find(":=") {
358         let gid = trimmed_line[..assign_pos].trim().to_string();
359         let rest = &trimmed_line[assign_pos + 2..].trim();
360
361         if let Some(colon_pos) = rest.find(':') {
362            let type_scope = &rest[..colon_pos].trim();
363            let rationale = rest[colon_pos + 1..].trim().to_string();
364
365            let (gtype, scope) = if let Some(paren_start) = type_scope.find('(') {
366               if let Some(paren_end) = type_scope.find(')') {
367                  let t = type_scope[..paren_start].trim();
368                  let s = type_scope[paren_start + 1..paren_end].trim();
369                  (t.to_string(), Some(s.to_string()))
370               } else {
371                  (type_scope.to_string(), None)
372               }
373            } else {
374               (type_scope.to_string(), None)
375            };
376
377            group_map.insert(gid.clone(), groups.len());
378
379            let group_obj = serde_json::json!({
380               "group_id": gid,
381               "type": normalize_commit_type(&gtype),
382               "scope": scope,
383               "rationale": rationale,
384               "file_ids": Vec::<String>::new(),
385               "dependencies": Vec::<String>::new()
386            });
387            groups.push(group_obj);
388         }
389      }
390   }
391
392   // Second pass: parse dependencies (G2 <- G1)
393   for line in trimmed.lines() {
394      let trimmed_line = line.trim();
395      if let Some(dep_pos) = trimmed_line.find("<-") {
396         let gid = trimmed_line[..dep_pos].trim().to_string();
397         let deps_str = trimmed_line[dep_pos + 2..].trim();
398
399         if let Some(idx) = group_map.get(&gid) {
400            let mut dependencies = Vec::new();
401            for dep_id in deps_str.split(',') {
402               let trimmed_dep = dep_id.trim();
403               if !trimmed_dep.is_empty() {
404                  dependencies.push(trimmed_dep.to_string());
405               }
406            }
407            if let Some(group_obj) = groups.get_mut(*idx) {
408               group_obj["dependencies"] = serde_json::Value::Array(
409                  dependencies.into_iter().map(serde_json::Value::String).collect(),
410               );
411            }
412         }
413      }
414   }
415
416   // Third pass: parse file assignments (- G1: file1, file2)
417   let mut in_files_section = false;
418   for line in trimmed.lines() {
419      let trimmed_line = line.trim();
420
421      if trimmed_line.to_lowercase().starts_with("files:") {
422         in_files_section = true;
423         continue;
424      }
425
426      if in_files_section && let Some(bullet) = bullet_content(trimmed_line)
427         && let Some(colon_pos) = bullet.find(':') {
428            let gid = bullet[..colon_pos].trim().to_string();
429            let files_str = bullet[colon_pos + 1..].trim();
430
431            if let Some(idx) = group_map.get(&gid)
432               && let Some(group_obj) = groups.get_mut(*idx) {
433               group_obj["file_ids"] = serde_json::Value::Array(
434                  files_str.split(',').map(|f| serde_json::Value::String(f.trim().to_string())).collect(),
435               );
436            }
437         }
438   }
439
440   if groups.is_empty() {
441      return Err(CommitGenError::Other(
442         "markdown compose intent: no groups found (format: G1 := type(scope): rationale)".to_string(),
443      ));
444   }
445
446   Ok(serde_json::json!({
447      "groups": groups
448   }))
449}
450
451/// Parse markdown compose binding format.
452///
453/// Lenient: strips code fences; group headers accept `#`/`##` (with or without
454/// trailing colon); hunk bullets accept varied glyphs.
455pub fn parse_compose_binding(text: &str) -> Result<serde_json::Value> {
456   let trimmed = strip_fences(text);
457
458   let mut assignments = Vec::new();
459   let mut current_group: Option<String> = None;
460   let mut current_hunks: Vec<String> = Vec::new();
461
462   for line in trimmed.lines() {
463      let trimmed_line = line.trim();
464
465      if trimmed_line.starts_with('#') {
466         // Save previous group if any
467         if let Some(gid) = current_group.take() {
468            assignments.push(serde_json::json!({
469               "group_id": gid,
470               "hunk_ids": std::mem::take(&mut current_hunks)
471            }));
472         }
473         // Start new group (strip hashes and any trailing colon)
474         let new_gid = trimmed_line
475            .trim_start_matches('#')
476            .trim()
477            .trim_end_matches(':')
478            .trim()
479            .to_string();
480         current_group = Some(new_gid);
481      } else if let Some(hunk_id) = bullet_content(trimmed_line) {
482         current_hunks.push(hunk_id.to_string());
483      }
484   }
485
486   // Save final group
487   if let Some(gid) = current_group.take() {
488      assignments.push(serde_json::json!({
489         "group_id": gid,
490         "hunk_ids": std::mem::take(&mut current_hunks)
491      }));
492   }
493
494   if assignments.is_empty() {
495      return Err(CommitGenError::Other(
496         "markdown compose binding: no assignments found (format: # group_id\\n- hunk_id)".to_string(),
497      ));
498   }
499
500   Ok(serde_json::json!({
501      "assignments": assignments
502   }))
503}
504
505/// Parse markdown map-phase batch observations.
506///
507/// Format: each file is a `## path` (or `# path`) header, followed by bullet or
508/// bare-line observations. Produces `{ "files": [{ "path", "observations" }] }`.
509/// Files with no observations are kept with an empty array. Lenient: strips
510/// fences, accepts varied bullet glyphs and bare-line observations.
511pub fn parse_batch_observations(text: &str) -> Result<serde_json::Value> {
512   let unfenced = strip_fences(text);
513
514   let mut files: Vec<serde_json::Value> = Vec::new();
515   let mut current_path: Option<String> = None;
516   let mut current_obs: Vec<String> = Vec::new();
517
518   for line in unfenced.lines() {
519      let t = line.trim();
520      if t.is_empty() {
521         continue;
522      }
523
524      if t.starts_with('#') {
525         // New file header — flush the previous one.
526         if let Some(path) = current_path.take() {
527            files.push(serde_json::json!({
528               "path": path,
529               "observations": std::mem::take(&mut current_obs),
530            }));
531         }
532         current_path = Some(t.trim_start_matches('#').trim().to_string());
533      } else if current_path.is_some() {
534         // Observation: bullet or bare line.
535         let obs = bullet_content(t).unwrap_or(t).trim();
536         if !obs.is_empty() {
537            current_obs.push(obs.to_string());
538         }
539      }
540   }
541
542   if let Some(path) = current_path.take() {
543      files.push(serde_json::json!({
544         "path": path,
545         "observations": current_obs,
546      }));
547   }
548
549   if files.is_empty() {
550      return Err(CommitGenError::Other(
551         "markdown observations: no file sections found (format: ## path\\n- observation)"
552            .to_string(),
553      ));
554   }
555
556   Ok(serde_json::json!({ "files": files }))
557}
558
559/// Normalize commit type string
560fn normalize_commit_type(s: &str) -> String {
561   match s.to_lowercase().as_str() {
562      "feat" | "feature" => "feat".to_string(),
563      "fix" | "bugfix" => "fix".to_string(),
564      "docs" | "documentation" => "docs".to_string(),
565      "style" | "formatting" => "style".to_string(),
566      "refactor" | "refactoring" => "refactor".to_string(),
567      "perf" | "performance" => "perf".to_string(),
568      "test" | "tests" => "test".to_string(),
569      "build" | "builder" => "build".to_string(),
570      "ci" | "cicd" => "ci".to_string(),
571      "chore" | "maintenance" => "chore".to_string(),
572      "revert" | "reversion" => "revert".to_string(),
573      "deps" | "dependencies" | "dependency" => "deps".to_string(),
574      "security" | "sec" => "security".to_string(),
575      "config" | "configuration" => "config".to_string(),
576      "ux" | "ergonomics" => "ux".to_string(),
577      "release" | "version" => "release".to_string(),
578      "hotfix" => "hotfix".to_string(),
579      "infra" | "infrastructure" => "infra".to_string(),
580      "init" | "initialization" => "init".to_string(),
581      "merge" | "merging" => "merge".to_string(),
582      "hack" | "hacky" => "hack".to_string(),
583      "wip" | "work-in-progress" => "wip".to_string(),
584      other => other.to_string(),
585   }
586}
587
588#[cfg(test)]
589mod tests {
590   use super::*;
591
592   // ===== conventional analysis =====
593
594   #[test]
595   fn test_conventional_analysis() {
596      let md = "# feat(api): add user authentication endpoint\n\n- Added POST \
597                /auth/login endpoint\n- Implemented bcrypt password hashing\n\nFixes: #123";
598      let r = parse_conventional_analysis(md).unwrap();
599      assert_eq!(r["type"], "feat");
600      assert_eq!(r["scope"], "api");
601      assert_eq!(r["details"].as_array().unwrap().len(), 2);
602      assert_eq!(r["issue_refs"][0], "#123");
603   }
604
605   #[test]
606   fn test_analysis_lenient_variations() {
607      // fenced, no `#`, bold heading, `*` bullets, Closes: footer
608      let md = "```md\n**fix(core): corrected null deref**\n\n* fixed a crash\n* \
609                guarded the pointer\n\nCloses: #7, #8\n```";
610      let r = parse_conventional_analysis(md).unwrap();
611      assert_eq!(r["type"], "fix");
612      assert_eq!(r["scope"], "core");
613      assert_eq!(r["details"].as_array().unwrap().len(), 2);
614      assert_eq!(r["issue_refs"].as_array().unwrap().len(), 2);
615   }
616
617   #[test]
618   fn test_analysis_no_scope_and_leading_blank_lines() {
619      let md = "\n\n\n# chore: bumped version\n";
620      let r = parse_conventional_analysis(md).unwrap();
621      assert_eq!(r["type"], "chore");
622      assert!(r["scope"].is_null());
623   }
624
625   #[test]
626   fn test_heading_requires_known_type_not_json_key() {
627      // A stray JSON/YAML `type:` key must NOT be misread as a heading.
628      // (This used to yield {"type":"type"} which cached and then blew up.)
629      let json_ish = "{\n  \"type\": \"refactor\",\n  \"summary\": \"did things\"\n}";
630      assert!(parse_conventional_analysis(json_ish).is_err());
631      // And `summary:`/`scope:` key lines are likewise not headings.
632      assert!(parse_conventional_analysis("summary: did a thing\nscope: core").is_err());
633   }
634
635   #[test]
636   fn test_fast_commit_details_are_plain_strings() {
637      // FastCommitOutput.details is Vec<String>, so the fast parser must emit
638      // string details (not {text} objects like the analysis parser).
639      let md = "# refactor(web): derive provider order from options\n\n- Derived the \
640                metadata dynamically.\n- Reprioritized the default sequence.";
641      let r = parse_fast_commit(md).unwrap();
642      assert_eq!(r["type"], "refactor");
643      assert_eq!(r["scope"], "web");
644      let details = r["details"].as_array().unwrap();
645      assert_eq!(details.len(), 2);
646      assert!(details[0].is_string(), "fast details must be strings");
647      // It must deserialize into the real FastCommitOutput shape.
648      #[derive(serde::Deserialize)]
649      struct FastShape {
650         #[serde(rename = "type")]
651         _t:      String,
652         details: Vec<String>,
653      }
654      let parsed: FastShape = serde_json::from_value(r).unwrap();
655      assert_eq!(parsed.details.len(), 2);
656   }
657
658   // ===== summary: all the wrapping variations =====
659
660   #[test]
661   fn test_summary_variations() {
662      let cases = [
663         "<summary>Added JWT auth</summary>",
664         "Added JWT auth",                      // bare
665         "\"Added JWT auth\"",                  // quoted
666         "<summary>\"Added JWT auth\"</title>", // quoted + mismatched close tag
667         "```md\n<summary>\nAdded JWT auth\n</summary>\n```", // fenced + multiline
668         "Title: Added JWT auth",               // labeled
669         "# Added JWT auth",                    // heading marker
670         "\n\n  Added JWT auth  \n\n",          // stray whitespace
671      ];
672      for c in cases {
673         let r = parse_summary_output(c).unwrap();
674         assert_eq!(r["summary"], "Added JWT auth", "input was: {c:?}");
675      }
676   }
677
678   // ===== changelog: header + item variations =====
679
680   #[test]
681   fn test_changelog_hash_and_dash() {
682      let md = "# Added\n- POST /auth/login endpoint\n\n# Fixed\n- Race condition";
683      let r = parse_changelog_response(md).unwrap();
684      let e = r["entries"].as_object().unwrap();
685      assert_eq!(e["Added"].as_array().unwrap().len(), 1);
686      assert_eq!(e["Fixed"].as_array().unwrap().len(), 1);
687   }
688
689   #[test]
690   fn test_changelog_lenient_mixed() {
691      // `##` and `#` and bare `Category:` headers; `-`, `*`, and bare items;
692      // random blank lines.
693      let md = "## Added\n- one\n* two\n\n\nFixed:\nthree\n- four\n\n# Security\n\n  five  ";
694      let r = parse_changelog_response(md).unwrap();
695      let e = r["entries"].as_object().unwrap();
696      assert_eq!(e["Added"].as_array().unwrap().len(), 2, "Added");
697      assert_eq!(e["Fixed"].as_array().unwrap().len(), 2, "Fixed (bare + dash)");
698      assert_eq!(e["Security"].as_array().unwrap().len(), 1, "Security (bare item)");
699   }
700
701   #[test]
702   fn test_changelog_bare_category_not_confused_with_item() {
703      // "Added rate limiting" must be an ITEM, not a header.
704      let md = "# Security\n- Added rate limiting on auth endpoints";
705      let r = parse_changelog_response(md).unwrap();
706      let e = r["entries"].as_object().unwrap();
707      assert!(e.contains_key("Security"));
708      assert!(!e.contains_key("Added"));
709      assert_eq!(e["Security"][0], "Added rate limiting on auth endpoints");
710   }
711
712   #[test]
713   fn test_changelog_fenced() {
714      let md = "```\n# Added\n- thing\n```";
715      let r = parse_changelog_response(md).unwrap();
716      assert_eq!(r["entries"]["Added"][0], "thing");
717   }
718
719   // ===== literal \n escapes =====
720
721   #[test]
722   fn test_literal_backslash_n_analysis() {
723      // A model emitted the whole thing on one physical line with literal \n.
724      let md = "# feat(api): add auth\\n\\n- did a thing\\n- did another\\n\\nFixes: #1";
725      let r = parse_conventional_analysis(md).unwrap();
726      assert_eq!(r["type"], "feat");
727      assert_eq!(r["scope"], "api");
728      assert_eq!(r["details"].as_array().unwrap().len(), 2);
729      assert_eq!(r["issue_refs"][0], "#1");
730   }
731
732   #[test]
733   fn test_literal_backslash_n_changelog() {
734      let md = "# Added\\n- one\\n- two\\n# Fixed\\n- three";
735      let r = parse_changelog_response(md).unwrap();
736      let e = r["entries"].as_object().unwrap();
737      assert_eq!(e["Added"].as_array().unwrap().len(), 2);
738      assert_eq!(e["Fixed"].as_array().unwrap().len(), 1);
739   }
740
741   #[test]
742   fn test_real_newlines_with_stray_backslash_preserved() {
743      // Real newlines dominate → don't touch a legitimate backslash in content.
744      let md = "# docs: explain C:\\\\path usage\n- noted the path C:\\nope is literal";
745      let r = parse_conventional_analysis(md).unwrap();
746      assert_eq!(r["type"], "docs");
747      // The single detail line is preserved (not split on the literal \n).
748      assert_eq!(r["details"].as_array().unwrap().len(), 1);
749   }
750
751   // ===== compose =====
752
753   #[test]
754   fn test_compose_intent_fenced() {
755      let md = "```\nG1 := feat(api): add endpoints\nG2 := test(api): add tests\n\nG2 \
756                <- G1\n\nFiles:\n- G1: a.rs, b.rs\n* G2: c.test.ts\n```";
757      let r = parse_compose_intent(md).unwrap();
758      let g = r["groups"].as_array().unwrap();
759      assert_eq!(g.len(), 2);
760      assert_eq!(g[0]["file_ids"].as_array().unwrap().len(), 2);
761      assert_eq!(g[1]["dependencies"][0], "G1");
762      assert_eq!(g[1]["file_ids"][0], "c.test.ts"); // `*` bullet handled
763   }
764
765   #[test]
766   fn test_compose_binding_lenient() {
767      let md = "```\n## G1:\n- h1\n* h2\n# G2\n- h3\n```";
768      let r = parse_compose_binding(md).unwrap();
769      let a = r["assignments"].as_array().unwrap();
770      assert_eq!(a.len(), 2);
771      assert_eq!(a[0]["group_id"], "G1"); // trailing colon + `##` stripped
772      assert_eq!(a[0]["hunk_ids"].as_array().unwrap().len(), 2);
773   }
774
775   // ===== map-phase batch observations =====
776
777   #[test]
778   fn test_batch_observations() {
779      let md = "## src/config.rs\n- added TOML loading\n- changed timeout\n\n## \
780                src/main.rs\n- wired CLI flag\n\n## src/empty.rs";
781      let r = parse_batch_observations(md).unwrap();
782      let files = r["files"].as_array().unwrap();
783      assert_eq!(files.len(), 3);
784      assert_eq!(files[0]["path"], "src/config.rs");
785      assert_eq!(files[0]["observations"].as_array().unwrap().len(), 2);
786      assert_eq!(files[1]["observations"].as_array().unwrap().len(), 1);
787      assert_eq!(files[2]["observations"].as_array().unwrap().len(), 0); // header only
788   }
789
790   #[test]
791   fn test_batch_observations_fenced_and_literal_newlines() {
792      let md = "```\\n## a.rs\\n- did x\\n* did y\\n## b.rs\\n- did z\\n```";
793      let r = parse_batch_observations(md).unwrap();
794      let files = r["files"].as_array().unwrap();
795      assert_eq!(files.len(), 2);
796      assert_eq!(files[0]["path"], "a.rs");
797      assert_eq!(files[0]["observations"].as_array().unwrap().len(), 2);
798   }
799}
llm_git/markdown_output.rs

llm_git/
markdown_output.rs