llm_git/
markdown_output.rs

1//! Markdown format parsers for structured LLM outputs
2//!
3//! Provides parsers for markdown-formatted responses as an alternative to JSON
4//! tool calls.
5
6use std::collections::HashMap;
7
8use crate::{
9   error::{CommitGenError, Result},
10   types::CommitType,
11};
12
13// ===== Leniency helpers =====
14// Models wrap the same content many ways: code fences, quotes, mismatched or
15// missing tags, bullet glyph variations. These helpers normalize all of that
16// before structured parsing so the parsers stay tolerant.
17
18/// Convert literal escape sequences (`\n`, `\r`, `\t`) into real whitespace.
19///
20/// Some models emit a single physical line containing literal backslash-n
21/// instead of real newlines. Only triggers when literal `\n` appears at least
22/// as often as real newlines, so text that legitimately contains a stray
23/// backslash isn't mangled.
24fn normalize_escaped_whitespace(text: &str) -> String {
25   let real = text.matches('\n').count();
26   let literal = text.matches("\\n").count();
27   if literal == 0 || literal < real {
28      return text.to_string();
29   }
30   text
31      .replace("\\r\\n", "\n")
32      .replace("\\n", "\n")
33      .replace("\\r", "\n")
34      .replace("\\t", "\t")
35}
36
37/// Strip surrounding Markdown code fences (```lang ... ```), if present.
38/// Also normalizes literal `\n`/`\t` escapes first, so every parser that
39/// routes through here inherits both behaviors.
40fn strip_fences(text: &str) -> String {
41   let normalized = normalize_escaped_whitespace(text);
42   let t = normalized.trim();
43   // Whole-block fence: starts with ``` and ends with ```
44   if let Some(after_fence) = t.strip_prefix("```") {
45      // Drop the opening fence line (may carry a language tag like ```md).
46      let after_open = after_fence.split_once('\n').map_or("", |x| x.1);
47      let body = match after_open.rfind("```") {
48         Some(end) => &after_open[..end],
49         None => after_open,
50      };
51      return body.trim().to_string();
52   }
53   // No leading fence: just remove any stray ``` lines.
54   t.lines()
55      .filter(|l| l.trim_start().trim_end() != "```" && !l.trim_start().starts_with("```"))
56      .collect::<Vec<_>>()
57      .join("\n")
58      .trim()
59      .to_string()
60}
61
62/// Remove matching wrapping quotes (straight or smart, single/double/backtick).
63fn strip_wrapping_quotes(s: &str) -> String {
64   let s = s.trim();
65   let pairs = [('"', '"'), ('\'', '\''), ('`', '`'), ('“', '”'), ('‘', '’')];
66   let chars: Vec<char> = s.chars().collect();
67   if chars.len() >= 2 {
68      let first = chars[0];
69      let last = chars[chars.len() - 1];
70      for (open, close) in pairs {
71         if first == open && last == close {
72            let inner: String = chars[1..chars.len() - 1].iter().collect();
73            return inner.trim().to_string();
74         }
75      }
76   }
77   s.to_string()
78}
79
80/// Strip a leading `Label:` prefix (e.g. "Title:", "Summary:") if present.
81fn strip_label_prefix(s: &str) -> String {
82   if let Some(colon) = s.find(':') {
83      let label = s[..colon].trim().to_lowercase();
84      if matches!(label.as_str(), "title" | "summary" | "description" | "result") {
85         return s[colon + 1..].trim().to_string();
86      }
87   }
88   s.to_string()
89}
90
91/// Strip leading Markdown heading hashes and bold/italic emphasis markers.
92fn strip_heading_markers(s: &str) -> String {
93   let mut t = s.trim();
94   // leading #'s
95   t = t.trim_start_matches('#').trim_start();
96   // surrounding ** or * emphasis on the whole line
97   for marker in ["**", "*", "__", "_"] {
98      if t.starts_with(marker) && t.ends_with(marker) && t.len() > 2 * marker.len() {
99         t = t[marker.len()..t.len() - marker.len()].trim();
100      }
101   }
102   t.to_string()
103}
104
105/// Return the content of a bullet line (`-`, `*`, `•`, `–`) or None.
106fn bullet_content(line: &str) -> Option<&str> {
107   let t = line.trim_start();
108   for glyph in ["- ", "* ", "• ", "– ", "+ "] {
109      if let Some(rest) = t.strip_prefix(glyph) {
110         return Some(rest.trim());
111      }
112   }
113   None
114}
115
116/// Extract content between the first `<tag>` and the next closing `</...>`,
117/// tolerating a mismatched closing tag (e.g. `<summary>X</title>`) or a missing
118/// close (takes the remainder). Case-insensitive on the opening tag name.
119fn extract_tag_lenient(text: &str, tag: &str) -> Option<String> {
120   let lower = text.to_lowercase();
121   let open = format!("<{tag}");
122   let open_pos = lower.find(&open)?;
123   // advance to end of the opening tag '>'
124   let after_open_rel = text[open_pos..].find('>')? + 1;
125   let content_start = open_pos + after_open_rel;
126   let rest = &text[content_start..];
127   // Find next closing tag of ANY name: "</"
128   let end = rest.find("</").unwrap_or(rest.len());
129   Some(rest[..end].trim().to_string())
130}
131
132/// Shared core: parse `# type(scope): summary` + detail bullets + issue footer.
133/// Returns the raw pieces so callers can shape them for their target struct.
134struct AnalysisParts {
135   commit_type: String,
136   scope:       Option<String>,
137   summary:     String,
138   details:     Vec<String>,
139   issue_refs:  Vec<String>,
140}
141
142fn parse_analysis_parts(text: &str) -> Result<AnalysisParts> {
143   let unfenced = strip_fences(text);
144   let lines: Vec<&str> = unfenced.lines().collect();
145
146   // Find the heading line: the first line that parses as `type(scope)?: summary`.
147   let mut heading_idx = None;
148   let mut parsed_heading = None;
149   for (i, line) in lines.iter().enumerate() {
150      let candidate = strip_heading_markers(line);
151      if let Some(h) = parse_heading(&candidate) {
152         heading_idx = Some(i);
153         parsed_heading = Some(h);
154         break;
155      }
156      // Only scan the first few lines for the heading.
157      if i >= 5 {
158         break;
159      }
160   }
161
162   let (commit_type, scope, summary) = parsed_heading.ok_or_else(|| {
163      CommitGenError::Other(
164         "markdown analysis: no `type(scope): summary` heading found".to_string(),
165      )
166   })?;
167   let start = heading_idx.unwrap_or(0) + 1;
168
169   let mut details = Vec::new();
170   let mut issue_refs = Vec::new();
171
172   for line in &lines[start..] {
173      let trimmed_line = line.trim();
174      let lower = trimmed_line.to_lowercase();
175
176      if let Some(detail) = bullet_content(trimmed_line) {
177         if !detail.is_empty() {
178            details.push(detail.to_string());
179         }
180      } else if let Some(rest) = lower
181         .strip_prefix("fixes:")
182         .or_else(|| lower.strip_prefix("closes:"))
183         .or_else(|| lower.strip_prefix("resolves:"))
184      {
185         // Use the original-case slice for the refs themselves.
186         let orig = &trimmed_line[trimmed_line.len() - rest.len()..];
187         for ref_str in orig.split(',') {
188            let r = ref_str.trim();
189            if !r.is_empty() {
190               issue_refs.push(r.to_string());
191            }
192         }
193      }
194   }
195
196   Ok(AnalysisParts { commit_type, scope, summary, details, issue_refs })
197}
198
199/// Parse markdown conventional analysis format (details as `{text}` objects,
200/// matching `ConventionalAnalysis`).
201///
202/// Lenient: tolerates code fences, headings with/without `#`, bold emphasis,
203/// the `type(scope): summary` line appearing on any of the first lines, bullet
204/// glyph variations, and `Fixes:`/`Closes:`/`Resolves:` footers.
205pub fn parse_conventional_analysis(text: &str) -> Result<serde_json::Value> {
206   let p = parse_analysis_parts(text)?;
207   let details: Vec<serde_json::Value> = p
208      .details
209      .into_iter()
210      .map(|t| serde_json::json!({ "text": t }))
211      .collect();
212   Ok(serde_json::json!({
213      "type": p.commit_type,
214      "scope": p.scope,
215      "summary": p.summary,
216      "details": details,
217      "issue_refs": p.issue_refs
218   }))
219}
220
221/// Parse markdown fast-commit format (details as plain strings, matching
222/// `FastCommitOutput`). Same heading/bullet grammar as the analysis parser.
223pub fn parse_fast_commit(text: &str) -> Result<serde_json::Value> {
224   let p = parse_analysis_parts(text)?;
225   Ok(serde_json::json!({
226      "type": p.commit_type,
227      "scope": p.scope,
228      "summary": p.summary,
229      "details": p.details
230   }))
231}
232
233/// Parse a `type(scope): summary` or `type: summary` heading line.
234/// Returns (type, optional scope, summary). None if it doesn't look like one.
235fn parse_heading(line: &str) -> Option<(String, Option<String>, String)> {
236   let colon = line.find(':')?;
237   let type_scope = line[..colon].trim();
238   let summary = line[colon + 1..].trim().to_string();
239   if type_scope.is_empty() || summary.is_empty() {
240      return None;
241   }
242
243   let (ty, scope) = if let Some(p_start) = type_scope.find('(') {
244      let p_end = type_scope.find(')')?;
245      if p_end < p_start {
246         return None;
247      }
248      let ty = type_scope[..p_start].trim().to_string();
249      let sc = type_scope[p_start + 1..p_end].trim();
250      (
251         ty,
252         if sc.is_empty() {
253            None
254         } else {
255            Some(sc.to_string())
256         },
257      )
258   } else {
259      (type_scope.to_string(), None)
260   };
261
262   // The type token must be a *valid conventional commit type*. This is what
263   // prevents stray `key: value` lines (e.g. `type: "refactor",` from a JSON
264   // blob, or `summary: ...`) from being misread as a heading — which would
265   // otherwise produce garbage like {"type":"type"} that caches and then fails
266   // downstream validation. Only a real type makes a real heading.
267   if CommitType::new(&ty).is_err() {
268      return None;
269   }
270   Some((ty, scope, summary))
271}
272
273/// Parse markdown summary format.
274///
275/// Lenient: accepts `<summary>X</summary>`, mismatched/missing close tags,
276/// bare text, quoted text, `Title:`-labeled text, and code fences. Collapses
277/// internal whitespace so multiline tag bodies become a single line.
278pub fn parse_summary_output(text: &str) -> Result<serde_json::Value> {
279   let unfenced = strip_fences(text);
280
281   // Prefer an explicit <summary> tag if present (tolerating bad/missing close).
282   let raw = extract_tag_lenient(&unfenced, "summary").unwrap_or_else(|| unfenced.clone());
283
284   // Normalize: drop heading markers, label prefixes, quotes; collapse whitespace.
285   let stripped = strip_heading_markers(&raw);
286   let stripped = strip_label_prefix(&stripped);
287   let stripped = strip_wrapping_quotes(&stripped);
288   let summary_text = stripped.split_whitespace().collect::<Vec<_>>().join(" ");
289
290   if summary_text.is_empty() {
291      return Err(CommitGenError::Other("markdown summary: empty summary text".to_string()));
292   }
293
294   Ok(serde_json::json!({ "summary": summary_text }))
295}
296
297/// Parse markdown changelog format.
298///
299/// Lenient: tolerates code fences, headers as `#`/`##`/`###` or bare
300/// `Category:` lines, and bullet glyph variations. Recognized categories are
301/// matched case-insensitively; unknown `#` headers are still accepted verbatim.
302pub fn parse_changelog_response(text: &str) -> Result<serde_json::Value> {
303   const KNOWN: [&str; 7] =
304      ["Added", "Changed", "Fixed", "Deprecated", "Removed", "Security", "Breaking"];
305
306   let unfenced = strip_fences(text);
307
308   let mut entries: HashMap<String, Vec<String>> = HashMap::new();
309   let mut current_category: Option<String> = None;
310
311   let canonical = |name: &str| -> Option<String> {
312      let n = name.trim().trim_end_matches(':').trim();
313      KNOWN
314         .iter()
315         .find(|k| k.eq_ignore_ascii_case(n))
316         .map(|k| (*k).to_string())
317   };
318
319   for line in unfenced.lines() {
320      let trimmed_line = line.trim();
321      if trimmed_line.is_empty() {
322         continue; // tolerate any number of blank/whitespace lines
323      }
324
325      // Header detection:
326      //  - `#`/`##`/`###` prefixed line (any text), or
327      //  - a bare line that *exactly* equals a known category (with optional trailing
328      //    `:`), e.g. `Added`, `Added:` — but NOT `Added rate limiting`.
329      let header = if trimmed_line.starts_with('#') {
330         let h = trimmed_line
331            .trim_start_matches('#')
332            .trim()
333            .trim_end_matches(':')
334            .trim();
335         Some(canonical(h).unwrap_or_else(|| h.to_string()))
336      } else {
337         canonical(trimmed_line)
338      };
339
340      if let Some(h) = header {
341         current_category = Some(h);
342         continue;
343      }
344
345      // Otherwise it's an entry. Accept bulleted (`-`, `*`, `•`, …) or bare lines.
346      let entry = bullet_content(trimmed_line).unwrap_or(trimmed_line).trim();
347      if let Some(cat) = &current_category
348         && !entry.is_empty()
349      {
350         entries
351            .entry(cat.clone())
352            .or_default()
353            .push(entry.to_string());
354      }
355   }
356
357   if entries.is_empty() {
358      return Err(CommitGenError::Other(
359         "markdown changelog: no entries found (format: ## Category\\n- entry)".to_string(),
360      ));
361   }
362
363   Ok(serde_json::json!({ "entries": entries }))
364}
365
366/// Parse markdown compose intent format.
367///
368/// Lenient: strips code fences before parsing the `G1 := type(scope):
369/// rationale`, `G2 <- G1`, and `Files:` sections; bullet glyphs in the files
370/// section vary.
371pub fn parse_compose_intent(text: &str) -> Result<serde_json::Value> {
372   let trimmed = strip_fences(text);
373
374   let mut groups = Vec::new();
375   let mut group_map: HashMap<String, usize> = HashMap::new();
376
377   // First pass: collect group definitions (G1 := type(scope): rationale)
378   for line in trimmed.lines() {
379      let trimmed_line = line.trim();
380      if let Some(assign_pos) = trimmed_line.find(":=") {
381         let gid = trimmed_line[..assign_pos].trim().to_string();
382         let rest = &trimmed_line[assign_pos + 2..].trim();
383
384         if let Some(colon_pos) = rest.find(':') {
385            let type_scope = &rest[..colon_pos].trim();
386            let rationale = rest[colon_pos + 1..].trim().to_string();
387
388            let (gtype, scope) = if let Some(paren_start) = type_scope.find('(') {
389               if let Some(paren_end) = type_scope.find(')') {
390                  let t = type_scope[..paren_start].trim();
391                  let s = type_scope[paren_start + 1..paren_end].trim();
392                  (t.to_string(), Some(s.to_string()))
393               } else {
394                  (type_scope.to_string(), None)
395               }
396            } else {
397               (type_scope.to_string(), None)
398            };
399
400            group_map.insert(gid.clone(), groups.len());
401
402            let group_obj = serde_json::json!({
403               "group_id": gid,
404               "type": normalize_commit_type(&gtype),
405               "scope": scope,
406               "rationale": rationale,
407               "file_ids": Vec::<String>::new(),
408               "dependencies": Vec::<String>::new()
409            });
410            groups.push(group_obj);
411         }
412      }
413   }
414
415   // Second pass: parse dependencies (G2 <- G1)
416   for line in trimmed.lines() {
417      let trimmed_line = line.trim();
418      if let Some(dep_pos) = trimmed_line.find("<-") {
419         let gid = trimmed_line[..dep_pos].trim().to_string();
420         let deps_str = trimmed_line[dep_pos + 2..].trim();
421
422         if let Some(idx) = group_map.get(&gid) {
423            let mut dependencies = Vec::new();
424            for dep_id in deps_str.split(',') {
425               let trimmed_dep = dep_id.trim();
426               if !trimmed_dep.is_empty() {
427                  dependencies.push(trimmed_dep.to_string());
428               }
429            }
430            if let Some(group_obj) = groups.get_mut(*idx) {
431               group_obj["dependencies"] = serde_json::Value::Array(
432                  dependencies
433                     .into_iter()
434                     .map(serde_json::Value::String)
435                     .collect(),
436               );
437            }
438         }
439      }
440   }
441
442   // Third pass: parse file assignments (- G1: file1, file2)
443   let mut in_files_section = false;
444   for line in trimmed.lines() {
445      let trimmed_line = line.trim();
446
447      if trimmed_line.to_lowercase().starts_with("files:") {
448         in_files_section = true;
449         continue;
450      }
451
452      if in_files_section
453         && let Some(bullet) = bullet_content(trimmed_line)
454         && let Some(colon_pos) = bullet.find(':')
455      {
456         let gid = bullet[..colon_pos].trim().to_string();
457         let files_str = bullet[colon_pos + 1..].trim();
458
459         if let Some(idx) = group_map.get(&gid)
460            && let Some(group_obj) = groups.get_mut(*idx)
461         {
462            group_obj["file_ids"] = serde_json::Value::Array(
463               files_str
464                  .split(',')
465                  .map(|f| serde_json::Value::String(f.trim().to_string()))
466                  .collect(),
467            );
468         }
469      }
470   }
471
472   if groups.is_empty() {
473      return Err(CommitGenError::Other(
474         "markdown compose intent: no groups found (format: G1 := type(scope): rationale)"
475            .to_string(),
476      ));
477   }
478
479   Ok(serde_json::json!({
480      "groups": groups
481   }))
482}
483
484/// Parse markdown compose binding format.
485///
486/// Lenient: strips code fences; group headers accept `#`/`##` (with or without
487/// trailing colon); hunk bullets accept varied glyphs.
488pub fn parse_compose_binding(text: &str) -> Result<serde_json::Value> {
489   let trimmed = strip_fences(text);
490
491   let mut assignments = Vec::new();
492   let mut current_group: Option<String> = None;
493   let mut current_hunks: Vec<String> = Vec::new();
494
495   for line in trimmed.lines() {
496      let trimmed_line = line.trim();
497
498      if trimmed_line.starts_with('#') {
499         // Save previous group if any
500         if let Some(gid) = current_group.take() {
501            assignments.push(serde_json::json!({
502               "group_id": gid,
503               "hunk_ids": std::mem::take(&mut current_hunks)
504            }));
505         }
506         // Start new group (strip hashes and any trailing colon)
507         let new_gid = trimmed_line
508            .trim_start_matches('#')
509            .trim()
510            .trim_end_matches(':')
511            .trim()
512            .to_string();
513         current_group = Some(new_gid);
514      } else if let Some(hunk_id) = bullet_content(trimmed_line) {
515         current_hunks.push(hunk_id.to_string());
516      }
517   }
518
519   // Save final group
520   if let Some(gid) = current_group.take() {
521      assignments.push(serde_json::json!({
522         "group_id": gid,
523         "hunk_ids": std::mem::take(&mut current_hunks)
524      }));
525   }
526
527   if assignments.is_empty() {
528      return Err(CommitGenError::Other(
529         "markdown compose binding: no assignments found (format: # group_id\\n- hunk_id)"
530            .to_string(),
531      ));
532   }
533
534   Ok(serde_json::json!({
535      "assignments": assignments
536   }))
537}
538
539/// Parse markdown map-phase batch observations.
540///
541/// Format: each file is a `## path` (or `# path`) header, followed by bullet or
542/// bare-line observations. Produces `{ "files": [{ "path", "observations" }]
543/// }`. Files with no observations are kept with an empty array. Lenient: strips
544/// fences, accepts varied bullet glyphs and bare-line observations.
545pub fn parse_batch_observations(text: &str) -> Result<serde_json::Value> {
546   let unfenced = strip_fences(text);
547
548   let mut files: Vec<serde_json::Value> = Vec::new();
549   let mut current_path: Option<String> = None;
550   let mut current_obs: Vec<String> = Vec::new();
551
552   for line in unfenced.lines() {
553      let t = line.trim();
554      if t.is_empty() {
555         continue;
556      }
557
558      if t.starts_with('#') {
559         // New file header — flush the previous one.
560         if let Some(path) = current_path.take() {
561            files.push(serde_json::json!({
562               "path": path,
563               "observations": std::mem::take(&mut current_obs),
564            }));
565         }
566         current_path = Some(t.trim_start_matches('#').trim().to_string());
567      } else if current_path.is_some() {
568         // Observation: bullet or bare line.
569         let obs = bullet_content(t).unwrap_or(t).trim();
570         if !obs.is_empty() {
571            current_obs.push(obs.to_string());
572         }
573      }
574   }
575
576   if let Some(path) = current_path.take() {
577      files.push(serde_json::json!({
578         "path": path,
579         "observations": current_obs,
580      }));
581   }
582
583   if files.is_empty() {
584      return Err(CommitGenError::Other(
585         "markdown observations: no file sections found (format: ## path\\n- observation)"
586            .to_string(),
587      ));
588   }
589
590   Ok(serde_json::json!({ "files": files }))
591}
592
593/// Normalize commit type string
594fn normalize_commit_type(s: &str) -> String {
595   match s.to_lowercase().as_str() {
596      "feat" | "feature" => "feat".to_string(),
597      "fix" | "bugfix" => "fix".to_string(),
598      "docs" | "documentation" => "docs".to_string(),
599      "style" | "formatting" => "style".to_string(),
600      "refactor" | "refactoring" => "refactor".to_string(),
601      "perf" | "performance" => "perf".to_string(),
602      "test" | "tests" => "test".to_string(),
603      "build" | "builder" => "build".to_string(),
604      "ci" | "cicd" => "ci".to_string(),
605      "chore" | "maintenance" => "chore".to_string(),
606      "revert" | "reversion" => "revert".to_string(),
607      "deps" | "dependencies" | "dependency" => "deps".to_string(),
608      "security" | "sec" => "security".to_string(),
609      "config" | "configuration" => "config".to_string(),
610      "ux" | "ergonomics" => "ux".to_string(),
611      "release" | "version" => "release".to_string(),
612      "hotfix" => "hotfix".to_string(),
613      "infra" | "infrastructure" => "infra".to_string(),
614      "init" | "initialization" => "init".to_string(),
615      "merge" | "merging" => "merge".to_string(),
616      "hack" | "hacky" => "hack".to_string(),
617      "wip" | "work-in-progress" => "wip".to_string(),
618      other => other.to_string(),
619   }
620}
621
622#[cfg(test)]
623mod tests {
624   use super::*;
625
626   // ===== conventional analysis =====
627
628   #[test]
629   fn test_conventional_analysis() {
630      let md = "# feat(api): add user authentication endpoint\n\n- Added POST /auth/login \
631                endpoint\n- Implemented bcrypt password hashing\n\nFixes: #123";
632      let r = parse_conventional_analysis(md).unwrap();
633      assert_eq!(r["type"], "feat");
634      assert_eq!(r["scope"], "api");
635      assert_eq!(r["details"].as_array().unwrap().len(), 2);
636      assert_eq!(r["issue_refs"][0], "#123");
637   }
638
639   #[test]
640   fn test_analysis_lenient_variations() {
641      // fenced, no `#`, bold heading, `*` bullets, Closes: footer
642      let md = "```md\n**fix(core): corrected null deref**\n\n* fixed a crash\n* guarded the \
643                pointer\n\nCloses: #7, #8\n```";
644      let r = parse_conventional_analysis(md).unwrap();
645      assert_eq!(r["type"], "fix");
646      assert_eq!(r["scope"], "core");
647      assert_eq!(r["details"].as_array().unwrap().len(), 2);
648      assert_eq!(r["issue_refs"].as_array().unwrap().len(), 2);
649   }
650
651   #[test]
652   fn test_analysis_no_scope_and_leading_blank_lines() {
653      let md = "\n\n\n# chore: bumped version\n";
654      let r = parse_conventional_analysis(md).unwrap();
655      assert_eq!(r["type"], "chore");
656      assert!(r["scope"].is_null());
657   }
658
659   #[test]
660   fn test_heading_requires_known_type_not_json_key() {
661      // A stray JSON/YAML `type:` key must NOT be misread as a heading.
662      // (This used to yield {"type":"type"} which cached and then blew up.)
663      let json_ish = "{\n  \"type\": \"refactor\",\n  \"summary\": \"did things\"\n}";
664      assert!(parse_conventional_analysis(json_ish).is_err());
665      // And `summary:`/`scope:` key lines are likewise not headings.
666      assert!(parse_conventional_analysis("summary: did a thing\nscope: core").is_err());
667   }
668
669   #[test]
670   fn test_fast_commit_details_are_plain_strings() {
671      // FastCommitOutput.details is Vec<String>, so the fast parser must emit
672      // string details (not {text} objects like the analysis parser).
673      let md = "# refactor(web): derive provider order from options\n\n- Derived the metadata \
674                dynamically.\n- Reprioritized the default sequence.";
675      let r = parse_fast_commit(md).unwrap();
676      assert_eq!(r["type"], "refactor");
677      assert_eq!(r["scope"], "web");
678      let details = r["details"].as_array().unwrap();
679      assert_eq!(details.len(), 2);
680      assert!(details[0].is_string(), "fast details must be strings");
681      // It must deserialize into the real FastCommitOutput shape.
682      #[derive(serde::Deserialize)]
683      struct FastShape {
684         #[serde(rename = "type")]
685         _t:      String,
686         details: Vec<String>,
687      }
688      let parsed: FastShape = serde_json::from_value(r).unwrap();
689      assert_eq!(parsed.details.len(), 2);
690   }
691
692   // ===== summary: all the wrapping variations =====
693
694   #[test]
695   fn test_summary_variations() {
696      let cases = [
697         "<summary>Added JWT auth</summary>",
698         "Added JWT auth",                                    // bare
699         "\"Added JWT auth\"",                                // quoted
700         "<summary>\"Added JWT auth\"</title>",               // quoted + mismatched close tag
701         "```md\n<summary>\nAdded JWT auth\n</summary>\n```", // fenced + multiline
702         "Title: Added JWT auth",                             // labeled
703         "# Added JWT auth",                                  // heading marker
704         "\n\n  Added JWT auth  \n\n",                        // stray whitespace
705      ];
706      for c in cases {
707         let r = parse_summary_output(c).unwrap();
708         assert_eq!(r["summary"], "Added JWT auth", "input was: {c:?}");
709      }
710   }
711
712   // ===== changelog: header + item variations =====
713
714   #[test]
715   fn test_changelog_hash_and_dash() {
716      let md = "# Added\n- POST /auth/login endpoint\n\n# Fixed\n- Race condition";
717      let r = parse_changelog_response(md).unwrap();
718      let e = r["entries"].as_object().unwrap();
719      assert_eq!(e["Added"].as_array().unwrap().len(), 1);
720      assert_eq!(e["Fixed"].as_array().unwrap().len(), 1);
721   }
722
723   #[test]
724   fn test_changelog_lenient_mixed() {
725      // `##` and `#` and bare `Category:` headers; `-`, `*`, and bare items;
726      // random blank lines.
727      let md = "## Added\n- one\n* two\n\n\nFixed:\nthree\n- four\n\n# Security\n\n  five  ";
728      let r = parse_changelog_response(md).unwrap();
729      let e = r["entries"].as_object().unwrap();
730      assert_eq!(e["Added"].as_array().unwrap().len(), 2, "Added");
731      assert_eq!(e["Fixed"].as_array().unwrap().len(), 2, "Fixed (bare + dash)");
732      assert_eq!(e["Security"].as_array().unwrap().len(), 1, "Security (bare item)");
733   }
734
735   #[test]
736   fn test_changelog_bare_category_not_confused_with_item() {
737      // "Added rate limiting" must be an ITEM, not a header.
738      let md = "# Security\n- Added rate limiting on auth endpoints";
739      let r = parse_changelog_response(md).unwrap();
740      let e = r["entries"].as_object().unwrap();
741      assert!(e.contains_key("Security"));
742      assert!(!e.contains_key("Added"));
743      assert_eq!(e["Security"][0], "Added rate limiting on auth endpoints");
744   }
745
746   #[test]
747   fn test_changelog_fenced() {
748      let md = "```\n# Added\n- thing\n```";
749      let r = parse_changelog_response(md).unwrap();
750      assert_eq!(r["entries"]["Added"][0], "thing");
751   }
752
753   // ===== literal \n escapes =====
754
755   #[test]
756   fn test_literal_backslash_n_analysis() {
757      // A model emitted the whole thing on one physical line with literal \n.
758      let md = "# feat(api): add auth\\n\\n- did a thing\\n- did another\\n\\nFixes: #1";
759      let r = parse_conventional_analysis(md).unwrap();
760      assert_eq!(r["type"], "feat");
761      assert_eq!(r["scope"], "api");
762      assert_eq!(r["details"].as_array().unwrap().len(), 2);
763      assert_eq!(r["issue_refs"][0], "#1");
764   }
765
766   #[test]
767   fn test_literal_backslash_n_changelog() {
768      let md = "# Added\\n- one\\n- two\\n# Fixed\\n- three";
769      let r = parse_changelog_response(md).unwrap();
770      let e = r["entries"].as_object().unwrap();
771      assert_eq!(e["Added"].as_array().unwrap().len(), 2);
772      assert_eq!(e["Fixed"].as_array().unwrap().len(), 1);
773   }
774
775   #[test]
776   fn test_real_newlines_with_stray_backslash_preserved() {
777      // Real newlines dominate → don't touch a legitimate backslash in content.
778      let md = "# docs: explain C:\\\\path usage\n- noted the path C:\\nope is literal";
779      let r = parse_conventional_analysis(md).unwrap();
780      assert_eq!(r["type"], "docs");
781      // The single detail line is preserved (not split on the literal \n).
782      assert_eq!(r["details"].as_array().unwrap().len(), 1);
783   }
784
785   // ===== compose =====
786
787   #[test]
788   fn test_compose_intent_fenced() {
789      let md = "```\nG1 := feat(api): add endpoints\nG2 := test(api): add tests\n\nG2 <- \
790                G1\n\nFiles:\n- G1: a.rs, b.rs\n* G2: c.test.ts\n```";
791      let r = parse_compose_intent(md).unwrap();
792      let g = r["groups"].as_array().unwrap();
793      assert_eq!(g.len(), 2);
794      assert_eq!(g[0]["file_ids"].as_array().unwrap().len(), 2);
795      assert_eq!(g[1]["dependencies"][0], "G1");
796      assert_eq!(g[1]["file_ids"][0], "c.test.ts"); // `*` bullet handled
797   }
798
799   #[test]
800   fn test_compose_binding_lenient() {
801      let md = "```\n## G1:\n- h1\n* h2\n# G2\n- h3\n```";
802      let r = parse_compose_binding(md).unwrap();
803      let a = r["assignments"].as_array().unwrap();
804      assert_eq!(a.len(), 2);
805      assert_eq!(a[0]["group_id"], "G1"); // trailing colon + `##` stripped
806      assert_eq!(a[0]["hunk_ids"].as_array().unwrap().len(), 2);
807   }
808
809   // ===== map-phase batch observations =====
810
811   #[test]
812   fn test_batch_observations() {
813      let md = "## src/config.rs\n- added TOML loading\n- changed timeout\n\n## src/main.rs\n- \
814                wired CLI flag\n\n## src/empty.rs";
815      let r = parse_batch_observations(md).unwrap();
816      let files = r["files"].as_array().unwrap();
817      assert_eq!(files.len(), 3);
818      assert_eq!(files[0]["path"], "src/config.rs");
819      assert_eq!(files[0]["observations"].as_array().unwrap().len(), 2);
820      assert_eq!(files[1]["observations"].as_array().unwrap().len(), 1);
821      assert_eq!(files[2]["observations"].as_array().unwrap().len(), 0); // header only
822   }
823
824   #[test]
825   fn test_batch_observations_fenced_and_literal_newlines() {
826      let md = "```\\n## a.rs\\n- did x\\n* did y\\n## b.rs\\n- did z\\n```";
827      let r = parse_batch_observations(md).unwrap();
828      let files = r["files"].as_array().unwrap();
829      assert_eq!(files.len(), 2);
830      assert_eq!(files[0]["path"], "a.rs");
831      assert_eq!(files[0]["observations"].as_array().unwrap().len(), 2);
832   }
833}
llm_git/markdown_output.rs

llm_git/
markdown_output.rs