llm_git/
markdown_output.rs

1//! Markdown format parsers for structured LLM outputs
2//!
3//! Provides parsers for markdown-formatted responses as an alternative to JSON tool calls.
4
5use std::collections::HashMap;
6
7use crate::error::{CommitGenError, Result};
8
9// ===== Leniency helpers =====
10// Models wrap the same content many ways: code fences, quotes, mismatched or
11// missing tags, bullet glyph variations. These helpers normalize all of that
12// before structured parsing so the parsers stay tolerant.
13
14/// Convert literal escape sequences (`\n`, `\r`, `\t`) into real whitespace.
15///
16/// Some models emit a single physical line containing literal backslash-n
17/// instead of real newlines. Only triggers when literal `\n` appears at least
18/// as often as real newlines, so text that legitimately contains a stray
19/// backslash isn't mangled.
20fn normalize_escaped_whitespace(text: &str) -> String {
21   let real = text.matches('\n').count();
22   let literal = text.matches("\\n").count();
23   if literal == 0 || literal < real {
24      return text.to_string();
25   }
26   text.replace("\\r\\n", "\n")
27      .replace("\\n", "\n")
28      .replace("\\r", "\n")
29      .replace("\\t", "\t")
30}
31
32/// Strip surrounding Markdown code fences (```lang ... ```), if present.
33/// Also normalizes literal `\n`/`\t` escapes first, so every parser that
34/// routes through here inherits both behaviors.
35fn strip_fences(text: &str) -> String {
36   let normalized = normalize_escaped_whitespace(text);
37   let t = normalized.trim();
38   // Whole-block fence: starts with ``` and ends with ```
39   if let Some(after_fence) = t.strip_prefix("```") {
40      // Drop the opening fence line (may carry a language tag like ```md).
41      let after_open = after_fence.split_once('\n').map_or("", |x| x.1);
42      let body = match after_open.rfind("```") {
43         Some(end) => &after_open[..end],
44         None => after_open,
45      };
46      return body.trim().to_string();
47   }
48   // No leading fence: just remove any stray ``` lines.
49   t.lines()
50      .filter(|l| l.trim_start().trim_end() != "```" && !l.trim_start().starts_with("```"))
51      .collect::<Vec<_>>()
52      .join("\n")
53      .trim()
54      .to_string()
55}
56
57/// Remove matching wrapping quotes (straight or smart, single/double/backtick).
58fn strip_wrapping_quotes(s: &str) -> String {
59   let s = s.trim();
60   let pairs = [('"', '"'), ('\'', '\''), ('`', '`'), ('“', '”'), ('‘', '’')];
61   let chars: Vec<char> = s.chars().collect();
62   if chars.len() >= 2 {
63      let first = chars[0];
64      let last = chars[chars.len() - 1];
65      for (open, close) in pairs {
66         if first == open && last == close {
67            let inner: String = chars[1..chars.len() - 1].iter().collect();
68            return inner.trim().to_string();
69         }
70      }
71   }
72   s.to_string()
73}
74
75/// Strip a leading `Label:` prefix (e.g. "Title:", "Summary:") if present.
76fn strip_label_prefix(s: &str) -> String {
77   if let Some(colon) = s.find(':') {
78      let label = s[..colon].trim().to_lowercase();
79      if matches!(label.as_str(), "title" | "summary" | "description" | "result") {
80         return s[colon + 1..].trim().to_string();
81      }
82   }
83   s.to_string()
84}
85
86/// Strip leading Markdown heading hashes and bold/italic emphasis markers.
87fn strip_heading_markers(s: &str) -> String {
88   let mut t = s.trim();
89   // leading #'s
90   t = t.trim_start_matches('#').trim_start();
91   // surrounding ** or * emphasis on the whole line
92   for marker in ["**", "*", "__", "_"] {
93      if t.starts_with(marker) && t.ends_with(marker) && t.len() > 2 * marker.len() {
94         t = t[marker.len()..t.len() - marker.len()].trim();
95      }
96   }
97   t.to_string()
98}
99
100/// Return the content of a bullet line (`-`, `*`, `•`, `–`) or None.
101fn bullet_content(line: &str) -> Option<&str> {
102   let t = line.trim_start();
103   for glyph in ["- ", "* ", "• ", "– ", "+ "] {
104      if let Some(rest) = t.strip_prefix(glyph) {
105         return Some(rest.trim());
106      }
107   }
108   None
109}
110
111/// Extract content between the first `<tag>` and the next closing `</...>`,
112/// tolerating a mismatched closing tag (e.g. `<summary>X</title>`) or a missing
113/// close (takes the remainder). Case-insensitive on the opening tag name.
114fn extract_tag_lenient(text: &str, tag: &str) -> Option<String> {
115   let lower = text.to_lowercase();
116   let open = format!("<{tag}");
117   let open_pos = lower.find(&open)?;
118   // advance to end of the opening tag '>'
119   let after_open_rel = text[open_pos..].find('>')? + 1;
120   let content_start = open_pos + after_open_rel;
121   let rest = &text[content_start..];
122   // Find next closing tag of ANY name: "</"
123   let end = rest.find("</").unwrap_or(rest.len());
124   Some(rest[..end].trim().to_string())
125}
126
127/// Parse markdown conventional analysis format.
128///
129/// Lenient: tolerates code fences, headings with/without `#`, bold emphasis,
130/// the `type(scope): summary` line appearing on any of the first lines, bullet
131/// glyph variations, and `Fixes:`/`Closes:`/`Resolves:` footers.
132pub fn parse_conventional_analysis(text: &str) -> Result<serde_json::Value> {
133   let unfenced = strip_fences(text);
134   let lines: Vec<&str> = unfenced.lines().collect();
135
136   // Find the heading line: the first line that parses as `type(scope)?: summary`.
137   let mut heading_idx = None;
138   let mut parsed_heading = None;
139   for (i, line) in lines.iter().enumerate() {
140      let candidate = strip_heading_markers(line);
141      if let Some(h) = parse_heading(&candidate) {
142         heading_idx = Some(i);
143         parsed_heading = Some(h);
144         break;
145      }
146      // Only scan the first few lines for the heading.
147      if i >= 5 {
148         break;
149      }
150   }
151
152   let (commit_type, scope, summary) = parsed_heading.ok_or_else(|| {
153      CommitGenError::Other(
154         "markdown analysis: no `type(scope): summary` heading found".to_string(),
155      )
156   })?;
157   let start = heading_idx.unwrap_or(0) + 1;
158
159   let mut details = Vec::new();
160   let mut issue_refs = Vec::new();
161
162   for line in &lines[start..] {
163      let trimmed_line = line.trim();
164      let lower = trimmed_line.to_lowercase();
165
166      if let Some(detail) = bullet_content(trimmed_line) {
167         if !detail.is_empty() {
168            details.push(serde_json::json!({ "text": detail }));
169         }
170      } else if let Some(rest) = lower
171         .strip_prefix("fixes:")
172         .or_else(|| lower.strip_prefix("closes:"))
173         .or_else(|| lower.strip_prefix("resolves:"))
174      {
175         // Use the original-case slice for the refs themselves.
176         let orig = &trimmed_line[trimmed_line.len() - rest.len()..];
177         for ref_str in orig.split(',') {
178            let r = ref_str.trim();
179            if !r.is_empty() {
180               issue_refs.push(r.to_string());
181            }
182         }
183      }
184   }
185
186   Ok(serde_json::json!({
187      "type": commit_type,
188      "scope": scope,
189      "summary": summary,
190      "details": details,
191      "issue_refs": issue_refs
192   }))
193}
194
195/// Parse a `type(scope): summary` or `type: summary` heading line.
196/// Returns (type, optional scope, summary). None if it doesn't look like one.
197fn parse_heading(line: &str) -> Option<(String, Option<String>, String)> {
198   let colon = line.find(':')?;
199   let type_scope = line[..colon].trim();
200   let summary = line[colon + 1..].trim().to_string();
201   if type_scope.is_empty() || summary.is_empty() {
202      return None;
203   }
204
205   let (ty, scope) = if let Some(p_start) = type_scope.find('(') {
206      let p_end = type_scope.find(')')?;
207      if p_end < p_start {
208         return None;
209      }
210      let ty = type_scope[..p_start].trim().to_string();
211      let sc = type_scope[p_start + 1..p_end].trim();
212      (ty, if sc.is_empty() { None } else { Some(sc.to_string()) })
213   } else {
214      (type_scope.to_string(), None)
215   };
216
217   // Heuristic: a type token is a single alphabetic word (no spaces).
218   if ty.is_empty() || ty.contains(char::is_whitespace) || !ty.chars().all(|c| c.is_ascii_alphabetic()) {
219      return None;
220   }
221   Some((ty, scope, summary))
222}
223
224/// Parse markdown summary format.
225///
226/// Lenient: accepts `<summary>X</summary>`, mismatched/missing close tags,
227/// bare text, quoted text, `Title:`-labeled text, and code fences. Collapses
228/// internal whitespace so multiline tag bodies become a single line.
229pub fn parse_summary_output(text: &str) -> Result<serde_json::Value> {
230   let unfenced = strip_fences(text);
231
232   // Prefer an explicit <summary> tag if present (tolerating bad/missing close).
233   let raw = extract_tag_lenient(&unfenced, "summary").unwrap_or_else(|| unfenced.clone());
234
235   // Normalize: drop heading markers, label prefixes, quotes; collapse whitespace.
236   let stripped = strip_heading_markers(&raw);
237   let stripped = strip_label_prefix(&stripped);
238   let stripped = strip_wrapping_quotes(&stripped);
239   let summary_text = stripped.split_whitespace().collect::<Vec<_>>().join(" ");
240
241   if summary_text.is_empty() {
242      return Err(CommitGenError::Other("markdown summary: empty summary text".to_string()));
243   }
244
245   Ok(serde_json::json!({ "summary": summary_text }))
246}
247
248/// Parse markdown changelog format.
249///
250/// Lenient: tolerates code fences, headers as `#`/`##`/`###` or bare
251/// `Category:` lines, and bullet glyph variations. Recognized categories are
252/// matched case-insensitively; unknown `#` headers are still accepted verbatim.
253pub fn parse_changelog_response(text: &str) -> Result<serde_json::Value> {
254   const KNOWN: [&str; 7] =
255      ["Added", "Changed", "Fixed", "Deprecated", "Removed", "Security", "Breaking"];
256
257   let unfenced = strip_fences(text);
258
259   let mut entries: HashMap<String, Vec<String>> = HashMap::new();
260   let mut current_category: Option<String> = None;
261
262   let canonical = |name: &str| -> Option<String> {
263      let n = name.trim().trim_end_matches(':').trim();
264      KNOWN
265         .iter()
266         .find(|k| k.eq_ignore_ascii_case(n))
267         .map(|k| (*k).to_string())
268   };
269
270   for line in unfenced.lines() {
271      let trimmed_line = line.trim();
272      if trimmed_line.is_empty() {
273         continue; // tolerate any number of blank/whitespace lines
274      }
275
276      // Header detection:
277      //  - `#`/`##`/`###` prefixed line (any text), or
278      //  - a bare line that *exactly* equals a known category (with optional
279      //    trailing `:`), e.g. `Added`, `Added:` — but NOT `Added rate limiting`.
280      let header = if trimmed_line.starts_with('#') {
281         let h = trimmed_line.trim_start_matches('#').trim().trim_end_matches(':').trim();
282         Some(canonical(h).unwrap_or_else(|| h.to_string()))
283      } else { canonical(trimmed_line) };
284
285      if let Some(h) = header {
286         current_category = Some(h);
287         continue;
288      }
289
290      // Otherwise it's an entry. Accept bulleted (`-`, `*`, `•`, …) or bare lines.
291      let entry = bullet_content(trimmed_line).unwrap_or(trimmed_line).trim();
292      if let Some(cat) = &current_category
293         && !entry.is_empty() {
294            entries.entry(cat.clone()).or_default().push(entry.to_string());
295         }
296   }
297
298   if entries.is_empty() {
299      return Err(CommitGenError::Other(
300         "markdown changelog: no entries found (format: ## Category\\n- entry)".to_string(),
301      ));
302   }
303
304   Ok(serde_json::json!({ "entries": entries }))
305}
306
307/// Parse markdown compose intent format.
308///
309/// Lenient: strips code fences before parsing the `G1 := type(scope): rationale`,
310/// `G2 <- G1`, and `Files:` sections; bullet glyphs in the files section vary.
311pub fn parse_compose_intent(text: &str) -> Result<serde_json::Value> {
312   let trimmed = strip_fences(text);
313
314   let mut groups = Vec::new();
315   let mut group_map: HashMap<String, usize> = HashMap::new();
316
317   // First pass: collect group definitions (G1 := type(scope): rationale)
318   for line in trimmed.lines() {
319      let trimmed_line = line.trim();
320      if let Some(assign_pos) = trimmed_line.find(":=") {
321         let gid = trimmed_line[..assign_pos].trim().to_string();
322         let rest = &trimmed_line[assign_pos + 2..].trim();
323
324         if let Some(colon_pos) = rest.find(':') {
325            let type_scope = &rest[..colon_pos].trim();
326            let rationale = rest[colon_pos + 1..].trim().to_string();
327
328            let (gtype, scope) = if let Some(paren_start) = type_scope.find('(') {
329               if let Some(paren_end) = type_scope.find(')') {
330                  let t = type_scope[..paren_start].trim();
331                  let s = type_scope[paren_start + 1..paren_end].trim();
332                  (t.to_string(), Some(s.to_string()))
333               } else {
334                  (type_scope.to_string(), None)
335               }
336            } else {
337               (type_scope.to_string(), None)
338            };
339
340            group_map.insert(gid.clone(), groups.len());
341
342            let group_obj = serde_json::json!({
343               "group_id": gid,
344               "type": normalize_commit_type(&gtype),
345               "scope": scope,
346               "rationale": rationale,
347               "file_ids": Vec::<String>::new(),
348               "dependencies": Vec::<String>::new()
349            });
350            groups.push(group_obj);
351         }
352      }
353   }
354
355   // Second pass: parse dependencies (G2 <- G1)
356   for line in trimmed.lines() {
357      let trimmed_line = line.trim();
358      if let Some(dep_pos) = trimmed_line.find("<-") {
359         let gid = trimmed_line[..dep_pos].trim().to_string();
360         let deps_str = trimmed_line[dep_pos + 2..].trim();
361
362         if let Some(idx) = group_map.get(&gid) {
363            let mut dependencies = Vec::new();
364            for dep_id in deps_str.split(',') {
365               let trimmed_dep = dep_id.trim();
366               if !trimmed_dep.is_empty() {
367                  dependencies.push(trimmed_dep.to_string());
368               }
369            }
370            if let Some(group_obj) = groups.get_mut(*idx) {
371               group_obj["dependencies"] = serde_json::Value::Array(
372                  dependencies.into_iter().map(serde_json::Value::String).collect(),
373               );
374            }
375         }
376      }
377   }
378
379   // Third pass: parse file assignments (- G1: file1, file2)
380   let mut in_files_section = false;
381   for line in trimmed.lines() {
382      let trimmed_line = line.trim();
383
384      if trimmed_line.to_lowercase().starts_with("files:") {
385         in_files_section = true;
386         continue;
387      }
388
389      if in_files_section && let Some(bullet) = bullet_content(trimmed_line)
390         && let Some(colon_pos) = bullet.find(':') {
391            let gid = bullet[..colon_pos].trim().to_string();
392            let files_str = bullet[colon_pos + 1..].trim();
393
394            if let Some(idx) = group_map.get(&gid)
395               && let Some(group_obj) = groups.get_mut(*idx) {
396               group_obj["file_ids"] = serde_json::Value::Array(
397                  files_str.split(',').map(|f| serde_json::Value::String(f.trim().to_string())).collect(),
398               );
399            }
400         }
401   }
402
403   if groups.is_empty() {
404      return Err(CommitGenError::Other(
405         "markdown compose intent: no groups found (format: G1 := type(scope): rationale)".to_string(),
406      ));
407   }
408
409   Ok(serde_json::json!({
410      "groups": groups
411   }))
412}
413
414/// Parse markdown compose binding format.
415///
416/// Lenient: strips code fences; group headers accept `#`/`##` (with or without
417/// trailing colon); hunk bullets accept varied glyphs.
418pub fn parse_compose_binding(text: &str) -> Result<serde_json::Value> {
419   let trimmed = strip_fences(text);
420
421   let mut assignments = Vec::new();
422   let mut current_group: Option<String> = None;
423   let mut current_hunks: Vec<String> = Vec::new();
424
425   for line in trimmed.lines() {
426      let trimmed_line = line.trim();
427
428      if trimmed_line.starts_with('#') {
429         // Save previous group if any
430         if let Some(gid) = current_group.take() {
431            assignments.push(serde_json::json!({
432               "group_id": gid,
433               "hunk_ids": std::mem::take(&mut current_hunks)
434            }));
435         }
436         // Start new group (strip hashes and any trailing colon)
437         let new_gid = trimmed_line
438            .trim_start_matches('#')
439            .trim()
440            .trim_end_matches(':')
441            .trim()
442            .to_string();
443         current_group = Some(new_gid);
444      } else if let Some(hunk_id) = bullet_content(trimmed_line) {
445         current_hunks.push(hunk_id.to_string());
446      }
447   }
448
449   // Save final group
450   if let Some(gid) = current_group.take() {
451      assignments.push(serde_json::json!({
452         "group_id": gid,
453         "hunk_ids": std::mem::take(&mut current_hunks)
454      }));
455   }
456
457   if assignments.is_empty() {
458      return Err(CommitGenError::Other(
459         "markdown compose binding: no assignments found (format: # group_id\\n- hunk_id)".to_string(),
460      ));
461   }
462
463   Ok(serde_json::json!({
464      "assignments": assignments
465   }))
466}
467
468/// Parse markdown map-phase batch observations.
469///
470/// Format: each file is a `## path` (or `# path`) header, followed by bullet or
471/// bare-line observations. Produces `{ "files": [{ "path", "observations" }] }`.
472/// Files with no observations are kept with an empty array. Lenient: strips
473/// fences, accepts varied bullet glyphs and bare-line observations.
474pub fn parse_batch_observations(text: &str) -> Result<serde_json::Value> {
475   let unfenced = strip_fences(text);
476
477   let mut files: Vec<serde_json::Value> = Vec::new();
478   let mut current_path: Option<String> = None;
479   let mut current_obs: Vec<String> = Vec::new();
480
481   for line in unfenced.lines() {
482      let t = line.trim();
483      if t.is_empty() {
484         continue;
485      }
486
487      if t.starts_with('#') {
488         // New file header — flush the previous one.
489         if let Some(path) = current_path.take() {
490            files.push(serde_json::json!({
491               "path": path,
492               "observations": std::mem::take(&mut current_obs),
493            }));
494         }
495         current_path = Some(t.trim_start_matches('#').trim().to_string());
496      } else if current_path.is_some() {
497         // Observation: bullet or bare line.
498         let obs = bullet_content(t).unwrap_or(t).trim();
499         if !obs.is_empty() {
500            current_obs.push(obs.to_string());
501         }
502      }
503   }
504
505   if let Some(path) = current_path.take() {
506      files.push(serde_json::json!({
507         "path": path,
508         "observations": current_obs,
509      }));
510   }
511
512   if files.is_empty() {
513      return Err(CommitGenError::Other(
514         "markdown observations: no file sections found (format: ## path\\n- observation)"
515            .to_string(),
516      ));
517   }
518
519   Ok(serde_json::json!({ "files": files }))
520}
521
522/// Normalize commit type string
523fn normalize_commit_type(s: &str) -> String {
524   match s.to_lowercase().as_str() {
525      "feat" | "feature" => "feat".to_string(),
526      "fix" | "bugfix" => "fix".to_string(),
527      "docs" | "documentation" => "docs".to_string(),
528      "style" | "formatting" => "style".to_string(),
529      "refactor" | "refactoring" => "refactor".to_string(),
530      "perf" | "performance" => "perf".to_string(),
531      "test" | "tests" => "test".to_string(),
532      "build" | "builder" => "build".to_string(),
533      "ci" | "cicd" => "ci".to_string(),
534      "chore" | "maintenance" => "chore".to_string(),
535      "revert" | "reversion" => "revert".to_string(),
536      "deps" | "dependencies" | "dependency" => "deps".to_string(),
537      "security" | "sec" => "security".to_string(),
538      "config" | "configuration" => "config".to_string(),
539      "ux" | "ergonomics" => "ux".to_string(),
540      "release" | "version" => "release".to_string(),
541      "hotfix" => "hotfix".to_string(),
542      "infra" | "infrastructure" => "infra".to_string(),
543      "init" | "initialization" => "init".to_string(),
544      "merge" | "merging" => "merge".to_string(),
545      "hack" | "hacky" => "hack".to_string(),
546      "wip" | "work-in-progress" => "wip".to_string(),
547      other => other.to_string(),
548   }
549}
550
551#[cfg(test)]
552mod tests {
553   use super::*;
554
555   // ===== conventional analysis =====
556
557   #[test]
558   fn test_conventional_analysis() {
559      let md = "# feat(api): add user authentication endpoint\n\n- Added POST \
560                /auth/login endpoint\n- Implemented bcrypt password hashing\n\nFixes: #123";
561      let r = parse_conventional_analysis(md).unwrap();
562      assert_eq!(r["type"], "feat");
563      assert_eq!(r["scope"], "api");
564      assert_eq!(r["details"].as_array().unwrap().len(), 2);
565      assert_eq!(r["issue_refs"][0], "#123");
566   }
567
568   #[test]
569   fn test_analysis_lenient_variations() {
570      // fenced, no `#`, bold heading, `*` bullets, Closes: footer
571      let md = "```md\n**fix(core): corrected null deref**\n\n* fixed a crash\n* \
572                guarded the pointer\n\nCloses: #7, #8\n```";
573      let r = parse_conventional_analysis(md).unwrap();
574      assert_eq!(r["type"], "fix");
575      assert_eq!(r["scope"], "core");
576      assert_eq!(r["details"].as_array().unwrap().len(), 2);
577      assert_eq!(r["issue_refs"].as_array().unwrap().len(), 2);
578   }
579
580   #[test]
581   fn test_analysis_no_scope_and_leading_blank_lines() {
582      let md = "\n\n\n# chore: bumped version\n";
583      let r = parse_conventional_analysis(md).unwrap();
584      assert_eq!(r["type"], "chore");
585      assert!(r["scope"].is_null());
586   }
587
588   // ===== summary: all the wrapping variations =====
589
590   #[test]
591   fn test_summary_variations() {
592      let cases = [
593         "<summary>Added JWT auth</summary>",
594         "Added JWT auth",                      // bare
595         "\"Added JWT auth\"",                  // quoted
596         "<summary>\"Added JWT auth\"</title>", // quoted + mismatched close tag
597         "```md\n<summary>\nAdded JWT auth\n</summary>\n```", // fenced + multiline
598         "Title: Added JWT auth",               // labeled
599         "# Added JWT auth",                    // heading marker
600         "\n\n  Added JWT auth  \n\n",          // stray whitespace
601      ];
602      for c in cases {
603         let r = parse_summary_output(c).unwrap();
604         assert_eq!(r["summary"], "Added JWT auth", "input was: {c:?}");
605      }
606   }
607
608   // ===== changelog: header + item variations =====
609
610   #[test]
611   fn test_changelog_hash_and_dash() {
612      let md = "# Added\n- POST /auth/login endpoint\n\n# Fixed\n- Race condition";
613      let r = parse_changelog_response(md).unwrap();
614      let e = r["entries"].as_object().unwrap();
615      assert_eq!(e["Added"].as_array().unwrap().len(), 1);
616      assert_eq!(e["Fixed"].as_array().unwrap().len(), 1);
617   }
618
619   #[test]
620   fn test_changelog_lenient_mixed() {
621      // `##` and `#` and bare `Category:` headers; `-`, `*`, and bare items;
622      // random blank lines.
623      let md = "## Added\n- one\n* two\n\n\nFixed:\nthree\n- four\n\n# Security\n\n  five  ";
624      let r = parse_changelog_response(md).unwrap();
625      let e = r["entries"].as_object().unwrap();
626      assert_eq!(e["Added"].as_array().unwrap().len(), 2, "Added");
627      assert_eq!(e["Fixed"].as_array().unwrap().len(), 2, "Fixed (bare + dash)");
628      assert_eq!(e["Security"].as_array().unwrap().len(), 1, "Security (bare item)");
629   }
630
631   #[test]
632   fn test_changelog_bare_category_not_confused_with_item() {
633      // "Added rate limiting" must be an ITEM, not a header.
634      let md = "# Security\n- Added rate limiting on auth endpoints";
635      let r = parse_changelog_response(md).unwrap();
636      let e = r["entries"].as_object().unwrap();
637      assert!(e.contains_key("Security"));
638      assert!(!e.contains_key("Added"));
639      assert_eq!(e["Security"][0], "Added rate limiting on auth endpoints");
640   }
641
642   #[test]
643   fn test_changelog_fenced() {
644      let md = "```\n# Added\n- thing\n```";
645      let r = parse_changelog_response(md).unwrap();
646      assert_eq!(r["entries"]["Added"][0], "thing");
647   }
648
649   // ===== literal \n escapes =====
650
651   #[test]
652   fn test_literal_backslash_n_analysis() {
653      // A model emitted the whole thing on one physical line with literal \n.
654      let md = "# feat(api): add auth\\n\\n- did a thing\\n- did another\\n\\nFixes: #1";
655      let r = parse_conventional_analysis(md).unwrap();
656      assert_eq!(r["type"], "feat");
657      assert_eq!(r["scope"], "api");
658      assert_eq!(r["details"].as_array().unwrap().len(), 2);
659      assert_eq!(r["issue_refs"][0], "#1");
660   }
661
662   #[test]
663   fn test_literal_backslash_n_changelog() {
664      let md = "# Added\\n- one\\n- two\\n# Fixed\\n- three";
665      let r = parse_changelog_response(md).unwrap();
666      let e = r["entries"].as_object().unwrap();
667      assert_eq!(e["Added"].as_array().unwrap().len(), 2);
668      assert_eq!(e["Fixed"].as_array().unwrap().len(), 1);
669   }
670
671   #[test]
672   fn test_real_newlines_with_stray_backslash_preserved() {
673      // Real newlines dominate → don't touch a legitimate backslash in content.
674      let md = "# docs: explain C:\\\\path usage\n- noted the path C:\\nope is literal";
675      let r = parse_conventional_analysis(md).unwrap();
676      assert_eq!(r["type"], "docs");
677      // The single detail line is preserved (not split on the literal \n).
678      assert_eq!(r["details"].as_array().unwrap().len(), 1);
679   }
680
681   // ===== compose =====
682
683   #[test]
684   fn test_compose_intent_fenced() {
685      let md = "```\nG1 := feat(api): add endpoints\nG2 := test(api): add tests\n\nG2 \
686                <- G1\n\nFiles:\n- G1: a.rs, b.rs\n* G2: c.test.ts\n```";
687      let r = parse_compose_intent(md).unwrap();
688      let g = r["groups"].as_array().unwrap();
689      assert_eq!(g.len(), 2);
690      assert_eq!(g[0]["file_ids"].as_array().unwrap().len(), 2);
691      assert_eq!(g[1]["dependencies"][0], "G1");
692      assert_eq!(g[1]["file_ids"][0], "c.test.ts"); // `*` bullet handled
693   }
694
695   #[test]
696   fn test_compose_binding_lenient() {
697      let md = "```\n## G1:\n- h1\n* h2\n# G2\n- h3\n```";
698      let r = parse_compose_binding(md).unwrap();
699      let a = r["assignments"].as_array().unwrap();
700      assert_eq!(a.len(), 2);
701      assert_eq!(a[0]["group_id"], "G1"); // trailing colon + `##` stripped
702      assert_eq!(a[0]["hunk_ids"].as_array().unwrap().len(), 2);
703   }
704
705   // ===== map-phase batch observations =====
706
707   #[test]
708   fn test_batch_observations() {
709      let md = "## src/config.rs\n- added TOML loading\n- changed timeout\n\n## \
710                src/main.rs\n- wired CLI flag\n\n## src/empty.rs";
711      let r = parse_batch_observations(md).unwrap();
712      let files = r["files"].as_array().unwrap();
713      assert_eq!(files.len(), 3);
714      assert_eq!(files[0]["path"], "src/config.rs");
715      assert_eq!(files[0]["observations"].as_array().unwrap().len(), 2);
716      assert_eq!(files[1]["observations"].as_array().unwrap().len(), 1);
717      assert_eq!(files[2]["observations"].as_array().unwrap().len(), 0); // header only
718   }
719
720   #[test]
721   fn test_batch_observations_fenced_and_literal_newlines() {
722      let md = "```\\n## a.rs\\n- did x\\n* did y\\n## b.rs\\n- did z\\n```";
723      let r = parse_batch_observations(md).unwrap();
724      let files = r["files"].as_array().unwrap();
725      assert_eq!(files.len(), 2);
726      assert_eq!(files[0]["path"], "a.rs");
727      assert_eq!(files[0]["observations"].as_array().unwrap().len(), 2);
728   }
729}
llm_git/markdown_output.rs

llm_git/
markdown_output.rs